2024-03-19 13:28:42 +00:00
|
|
|
package nu.marginalia.segmentation;
|
2024-03-12 12:12:50 +00:00
|
|
|
|
|
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
|
2024-04-12 15:45:06 +00:00
|
|
|
import java.util.List;
|
|
|
|
|
2024-03-12 12:12:50 +00:00
|
|
|
import static org.junit.jupiter.api.Assertions.*;
|
|
|
|
|
|
|
|
class NgramLexiconTest {
|
|
|
|
NgramLexicon lexicon = new NgramLexicon();
|
|
|
|
@BeforeEach
|
|
|
|
public void setUp() {
|
|
|
|
lexicon.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void addNgram(String... ngram) {
|
|
|
|
lexicon.incOrdered(HasherGroup.ordered().rollingHash(ngram));
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
void findSegments() {
|
|
|
|
addNgram("hello", "world");
|
|
|
|
addNgram("rye", "bread");
|
|
|
|
addNgram("rye", "world");
|
|
|
|
|
2024-04-12 15:45:06 +00:00
|
|
|
List<String[]> segments = lexicon.findSegmentsStrings(2, 2, "hello", "world", "rye", "bread");
|
2024-03-12 12:12:50 +00:00
|
|
|
|
2024-04-11 16:12:01 +00:00
|
|
|
assertEquals(2, segments.size());
|
2024-03-12 12:12:50 +00:00
|
|
|
|
2024-04-11 16:12:01 +00:00
|
|
|
for (int i = 0; i < 2; i++) {
|
2024-03-12 12:12:50 +00:00
|
|
|
var segment = segments.get(i);
|
|
|
|
switch (i) {
|
|
|
|
case 0 -> {
|
2024-04-11 16:12:01 +00:00
|
|
|
assertArrayEquals(new String[]{"hello", "world"}, segment);
|
2024-03-12 12:12:50 +00:00
|
|
|
}
|
|
|
|
case 1 -> {
|
2024-04-11 16:12:01 +00:00
|
|
|
assertArrayEquals(new String[]{"rye", "bread"}, segment);
|
2024-03-12 12:12:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|