MarginaliaSearch/code/libraries/term-frequency-dict/test/nu/marginalia/segmentation/NgramLexiconTest.java

53 lines
1.8 KiB
Java
Raw Normal View History

package nu.marginalia.segmentation;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class NgramLexiconTest {
NgramLexicon lexicon = new NgramLexicon();
@BeforeEach
public void setUp() {
lexicon.clear();
}
void addNgram(String... ngram) {
lexicon.incOrdered(HasherGroup.ordered().rollingHash(ngram));
lexicon.addUnordered(HasherGroup.unordered().rollingHash(ngram));
}
@Test
void findSegments() {
addNgram("hello", "world");
addNgram("rye", "bread");
addNgram("rye", "world");
String[] sent = { "hello", "world", "rye", "bread" };
var segments = lexicon.findSegments(2, "hello", "world", "rye", "bread");
assertEquals(3, segments.size());
for (int i = 0; i < 3; i++) {
var segment = segments.get(i);
switch (i) {
case 0 -> {
assertArrayEquals(new String[]{"hello", "world"}, segment.project(sent));
assertEquals(1, segment.count());
assertEquals(NgramLexicon.PositionType.NGRAM, segment.type());
}
case 1 -> {
assertArrayEquals(new String[]{"world", "rye"}, segment.project(sent));
assertEquals(0, segment.count());
assertEquals(NgramLexicon.PositionType.PERMUTATION, segment.type());
}
case 2 -> {
assertArrayEquals(new String[]{"rye", "bread"}, segment.project(sent));
assertEquals(1, segment.count());
assertEquals(NgramLexicon.PositionType.NGRAM, segment.type());
}
}
}
}
}