mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(span) Correct intersection counting logic, add comprehensive tests
This commit is contained in:
parent
ba47d72bf4
commit
e11ebf18e5
@ -101,13 +101,14 @@ public class DocumentSpan {
|
|||||||
int start = startsEnds.getInt(sei++);
|
int start = startsEnds.getInt(sei++);
|
||||||
int end = startsEnds.getInt(sei++);
|
int end = startsEnds.getInt(sei++);
|
||||||
|
|
||||||
for (int pi = 0; pi < positions.size(); pi++) {
|
for (int pi = 0; pi < positions.size();) {
|
||||||
int position = positions.getInt(pi);
|
int position = positions.getInt(pi);
|
||||||
if (position >= start && position + len <= end) {
|
if (position >= start && position + len <= end) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
else if (position < end) {
|
||||||
if (sei + 2 < startsEnds.size()) {
|
pi++;
|
||||||
|
} else if (sei + 2 <= startsEnds.size()) {
|
||||||
start = startsEnds.getInt(sei++);
|
start = startsEnds.getInt(sei++);
|
||||||
end = startsEnds.getInt(sei++);
|
end = startsEnds.getInt(sei++);
|
||||||
}
|
}
|
||||||
@ -133,14 +134,15 @@ public class DocumentSpan {
|
|||||||
int start = startsEnds.getInt(sei++);
|
int start = startsEnds.getInt(sei++);
|
||||||
int end = startsEnds.getInt(sei++);
|
int end = startsEnds.getInt(sei++);
|
||||||
|
|
||||||
for (int pi = 0; pi < positions.size(); pi++) {
|
for (int pi = 0; pi < positions.size(); ) {
|
||||||
int position = positions.getInt(pi);
|
int position = positions.getInt(pi);
|
||||||
|
|
||||||
if (position == start && position + len == end) {
|
if (position == start && position + len == end) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
else if (position < end) {
|
||||||
if (sei + 2 < startsEnds.size()) {
|
pi++;
|
||||||
|
} else if (sei + 2 <= startsEnds.size()) {
|
||||||
start = startsEnds.getInt(sei++);
|
start = startsEnds.getInt(sei++);
|
||||||
end = startsEnds.getInt(sei++);
|
end = startsEnds.getInt(sei++);
|
||||||
}
|
}
|
||||||
@ -152,6 +154,39 @@ public class DocumentSpan {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int countRangeMatches(IntList positions, int len) {
|
||||||
|
if (null == startsEnds || startsEnds.size() < 2 || positions.isEmpty()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sei = 0;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
int start = startsEnds.getInt(sei++);
|
||||||
|
int end = startsEnds.getInt(sei++);
|
||||||
|
|
||||||
|
for (int pi = 0; pi < positions.size();) {
|
||||||
|
int position = positions.getInt(pi);
|
||||||
|
if (position >= start && position + len <= end) {
|
||||||
|
ret++;
|
||||||
|
pi++;
|
||||||
|
}
|
||||||
|
else if (position < end) {
|
||||||
|
pi++;
|
||||||
|
}
|
||||||
|
else if (sei + 2 <= startsEnds.size()) {
|
||||||
|
start = startsEnds.getInt(sei++);
|
||||||
|
end = startsEnds.getInt(sei++);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns an iterator over each position between the start and end positions of each span in the document of this type */
|
/** Returns an iterator over each position between the start and end positions of each span in the document of this type */
|
||||||
public IntIterator iterator() {
|
public IntIterator iterator() {
|
||||||
if (null == startsEnds) {
|
if (null == startsEnds) {
|
||||||
|
@ -6,6 +6,7 @@ import nu.marginalia.index.forward.spans.ForwardIndexSpansWriter;
|
|||||||
import nu.marginalia.language.sentence.tag.HtmlTag;
|
import nu.marginalia.language.sentence.tag.HtmlTag;
|
||||||
import nu.marginalia.sequence.VarintCodedSequence;
|
import nu.marginalia.sequence.VarintCodedSequence;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -28,7 +29,7 @@ class ForwardIndexSpansReaderTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testSunnyDay() throws IOException {
|
void testContainsPosition() throws IOException {
|
||||||
ByteBuffer wa = ByteBuffer.allocate(32);
|
ByteBuffer wa = ByteBuffer.allocate(32);
|
||||||
|
|
||||||
long offset1;
|
long offset1;
|
||||||
@ -72,4 +73,91 @@ class ForwardIndexSpansReaderTest {
|
|||||||
assertFalse(spans2.title.containsPosition(8));
|
assertFalse(spans2.title.containsPosition(8));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testContainsRange() throws IOException {
|
||||||
|
long offset1;
|
||||||
|
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||||
|
writer.beginRecord(1);
|
||||||
|
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||||
|
offset1 = writer.endRecord();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||||
|
var arena = Arena.ofConfined()
|
||||||
|
) {
|
||||||
|
var spans1 = reader.readSpans(arena, offset1);
|
||||||
|
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(10), 2));
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(8, 10), 2));
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(8, 10, 14), 2));
|
||||||
|
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(10), 5));
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(8, 10), 5));
|
||||||
|
assertTrue(spans1.heading.containsRange(IntList.of(8, 10, 14), 5));
|
||||||
|
|
||||||
|
assertFalse(spans1.heading.containsRange(IntList.of(11), 5));
|
||||||
|
assertFalse(spans1.heading.containsRange(IntList.of(9), 5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testContainsRangeExact() throws IOException {
|
||||||
|
long offset1;
|
||||||
|
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||||
|
writer.beginRecord(1);
|
||||||
|
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||||
|
offset1 = writer.endRecord();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||||
|
var arena = Arena.ofConfined()
|
||||||
|
) {
|
||||||
|
var spans1 = reader.readSpans(arena, offset1);
|
||||||
|
|
||||||
|
assertFalse(spans1.heading.containsRangeExact(IntList.of(10), 2));
|
||||||
|
assertFalse(spans1.heading.containsRangeExact(IntList.of(8, 10), 2));
|
||||||
|
assertFalse(spans1.heading.containsRangeExact(IntList.of(8, 10, 14), 2));
|
||||||
|
|
||||||
|
assertTrue(spans1.heading.containsRangeExact(IntList.of(10), 5));
|
||||||
|
assertTrue(spans1.heading.containsRangeExact(IntList.of(8, 10), 5));
|
||||||
|
assertTrue(spans1.heading.containsRangeExact(IntList.of(8, 10, 14), 5));
|
||||||
|
|
||||||
|
assertFalse(spans1.heading.containsRangeExact(IntList.of(11), 5));
|
||||||
|
assertFalse(spans1.heading.containsRangeExact(IntList.of(9), 5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCountRangeMatches() throws IOException {
|
||||||
|
long offset1;
|
||||||
|
try (var writer = new ForwardIndexSpansWriter(testFile)) {
|
||||||
|
writer.beginRecord(1);
|
||||||
|
writer.writeSpan(HtmlTag.HEADING.code, VarintCodedSequence.generate( 1, 2, 10, 15, 20, 25).buffer());
|
||||||
|
offset1 = writer.endRecord();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var reader = new ForwardIndexSpansReader(testFile);
|
||||||
|
var arena = Arena.ofConfined()
|
||||||
|
) {
|
||||||
|
var spans1 = reader.readSpans(arena, offset1);
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(10), 2));
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10), 2));
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10, 14), 2));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(10), 5));
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10), 5));
|
||||||
|
Assertions.assertEquals(1, spans1.heading.countRangeMatches(IntList.of(8, 10, 14), 5));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(10, 20), 5));
|
||||||
|
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(8, 10, 13, 20), 5));
|
||||||
|
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(8, 10, 14, 20, 55), 5));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, spans1.heading.countRangeMatches(IntList.of(10, 12), 2));
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, spans1.heading.countRangeMatches(IntList.of(11), 5));
|
||||||
|
Assertions.assertEquals(0, spans1.heading.countRangeMatches(IntList.of(9), 5));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user