mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(span) Correct intersection counting logic, add comprehensive tests
This commit is contained in:
parent
ba47d72bf4
commit
e11ebf18e5
@ -101,13 +101,14 @@ public class DocumentSpan {
|
||||
int start = startsEnds.getInt(sei++);
|
||||
int end = startsEnds.getInt(sei++);
|
||||
|
||||
for (int pi = 0; pi < positions.size(); pi++) {
|
||||
for (int pi = 0; pi < positions.size();) {
|
||||
int position = positions.getInt(pi);
|
||||
if (position >= start && position + len <= end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sei + 2 < startsEnds.size()) {
|
||||
else if (position < end) {
|
||||
pi++;
|
||||
} else if (sei + 2 <= startsEnds.size()) {
|
||||
start = startsEnds.getInt(sei++);
|
||||
end = startsEnds.getInt(sei++);
|
||||
}
|
||||
@ -133,14 +134,15 @@ public class DocumentSpan {
|
||||
int start = startsEnds.getInt(sei++);
|
||||
int end = startsEnds.getInt(sei++);
|
||||
|
||||
for (int pi = 0; pi < positions.size(); pi++) {
|
||||
for (int pi = 0; pi < positions.size(); ) {
|
||||
int position = positions.getInt(pi);
|
||||
|
||||
if (position == start && position + len == end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sei + 2 < startsEnds.size()) {
|
||||
else if (position < end) {
|
||||
pi++;
|
||||
} else if (sei + 2 <= startsEnds.size()) {
|
||||
start = startsEnds.getInt(sei++);
|
||||
end = startsEnds.getInt(sei++);
|
||||
}
|
||||
@ -152,6 +154,39 @@ public class DocumentSpan {
|
||||
return false;
|
||||
}
|
||||
|
||||
public int countRangeMatches(IntList positions, int len) {
|
||||
if (null == startsEnds || startsEnds.size() < 2 || positions.isEmpty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sei = 0;
|
||||
int ret = 0;
|
||||
|
||||
int start = startsEnds.getInt(sei++);
|
||||
int end = startsEnds.getInt(sei++);
|
||||
|
||||
for (int pi = 0; pi < positions.size();) {
|
||||
int position = positions.getInt(pi);
|
||||
if (position >= start && position + len <= end) {
|
||||
ret++;
|
||||
pi++;
|
||||
}
|
||||
else if (position < end) {
|
||||
pi++;
|
||||
}
|
||||
else if (sei + 2 <= startsEnds.size()) {
|
||||
start = startsEnds.getInt(sei++);
|
||||
end = startsEnds.getInt(sei++);
|
||||
}
|
||||
else {
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Returns an iterator over each position between the start and end positions of each span in the document of this type */
|
||||
public IntIterator iterator() {
|
||||
if (null == startsEnds) {
|
||||
|
@ -6,6 +6,7 @@ import nu.marginalia.index.forward.spans.ForwardIndexSpansWriter;
|
||||
import nu.marginalia.language.sentence.tag.HtmlTag;
|
||||
import nu.marginalia.sequence.VarintCodedSequence;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -28,7 +29,7 @@ class ForwardIndexSpansReaderTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSunnyDay() throws IOException {
|
||||
void testContainsPosition() throws IOException {
|
||||
ByteBuffer wa = ByteBuffer.allocate(32);
|
||||
|
||||
long offset1;
|
||||
@ -72,4 +73,91 @@ class ForwardIndexSpansReaderTest {
|
||||
assertFalse(spans2.title.containsPosition(8));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testContainsRange() throws IOException {
|
||||
long offset1;
|
||||
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||
writer.beginRecord(1);
|
||||
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||
offset1 = writer.endRecord();
|
||||
}
|
||||
|
||||
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||
var arena = Arena.ofConfined()
|
||||
) {
|
||||
var spans1 = reader.readSpans(arena, offset1);
|
||||
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(10), 2));
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(8, 10), 2));
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(8, 10, 14), 2));
|
||||
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(10), 5));
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(8, 10), 5));
|
||||
assertTrue(spans1.heading.containsRange(IntList.of(8, 10, 14), 5));
|
||||
|
||||
assertFalse(spans1.heading.containsRange(IntList.of(11), 5));
|
||||
assertFalse(spans1.heading.containsRange(IntList.of(9), 5));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testContainsRangeExact() throws IOException {
|
||||
long offset1;
|
||||
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||
writer.beginRecord(1);
|
||||
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||
offset1 = writer.endRecord();
|
||||
}
|
||||
|
||||
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||
var arena = Arena.ofConfined()
|
||||
) {
|
||||
var spans1 = reader.readSpans(arena, offset1);
|
||||
|
||||
assertFalse(spans1.heading.containsRangeExact(IntList.of(10), 2));
|
||||
assertFalse(spans1.heading.containsRangeExact(IntList.of(8, 10), 2));
|
||||
assertFalse(spans1.heading.containsRangeExact(IntList.of(8, 10, 14), 2));
|
||||
|
||||
assertTrue(spans1.heading.containsRangeExact(IntList.of(10), 5));
|
||||
assertTrue(spans1.heading.containsRangeExact(IntList.of(8, 10), 5));
|
||||
assertTrue(spans1.heading.containsRangeExact(IntList.of(8, 10, 14), 5));
|
||||
|
||||
assertFalse(spans1.heading.containsRangeExact(IntList.of(11), 5));
|
||||
assertFalse(spans1.heading.containsRangeExact(IntList.of(9), 5));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCountRangeMatches() throws IOException {
|
||||
long offset1;
|
||||
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||
writer.beginRecord(1);
|
||||
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||
offset1 = writer.endRecord();
|
||||
}
|
||||
|
||||
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||
var arena = Arena.ofConfined()
|
||||
) {
|
||||
var spans1 = reader.readSpans(arena, offset1);
|
||||
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(10), 2));
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10), 2));
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10, 14), 2));
|
||||
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(10), 5));
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10), 5));
|
||||
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10, 14), 5));
|
||||
|
||||
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(10, 20), 5));
|
||||
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(8, 10, 13, 20), 5));
|
||||
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(8, 10, 14, 20, 55), 5));
|
||||
|
||||
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(10, 12), 2));
|
||||
|
||||
Assertions.assertEquals(0, spans1.heading.countRangeMatches(IntList.of(11), 5));
|
||||
Assertions.assertEquals(0, spans1.heading.countRangeMatches(IntList.of(9), 5));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user