mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Optimize DocumentSpan
This commit is contained in:
parent
982b03382b
commit
965c89798e
@ -68,21 +68,31 @@ public class DocumentSpan {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean containsRange(int rangeStart, int len) {
|
||||
if (startsEnds == null) {
|
||||
public boolean containsRange(IntIterator positionsIter, int len) {
|
||||
if (null == startsEnds || !positionsIter.hasNext()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var iter = startsEnds.iterator();
|
||||
while (iter.hasNext()) {
|
||||
int start = iter.nextInt();
|
||||
if (start > rangeStart) {
|
||||
return false;
|
||||
int start = -1;
|
||||
int end = -1;
|
||||
|
||||
while (iter.hasNext() && positionsIter.hasNext()) {
|
||||
if (start < 0) {
|
||||
start = iter.nextInt();
|
||||
end = iter.nextInt();
|
||||
}
|
||||
int end = iter.nextInt();
|
||||
if (end > rangeStart + len) {
|
||||
|
||||
int position = positionsIter.nextInt();
|
||||
if (position < start) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (position + len < end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
start = -1;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -398,8 +398,9 @@ public class IndexResultScoreCalculator {
|
||||
float verbatimMatchScore = 0.f;
|
||||
|
||||
var fullGroup = constraints.getFullGroup();
|
||||
IntList fullGroupIntersections = fullGroup.findIntersections(positions);
|
||||
for (var tag : HtmlTag.includedTags) {
|
||||
if (fullGroup.test(spans.getSpan(tag), positions)) {
|
||||
if (spans.getSpan(tag).containsRange(fullGroupIntersections.iterator(), fullGroup.size)) {
|
||||
verbatimMatchScore += verbatimMatches.getWeightFull(tag) * fullGroup.size;
|
||||
verbatimMatches.set(tag);
|
||||
}
|
||||
@ -410,8 +411,9 @@ public class IndexResultScoreCalculator {
|
||||
int groupSize = optionalGroup.size;
|
||||
float sizeScalingFactor = groupSize / (float) largestOptional;
|
||||
|
||||
IntList intersections = optionalGroup.findIntersections(positions);
|
||||
for (var tag : HtmlTag.includedTags) {
|
||||
if (optionalGroup.test(spans.getSpan(tag), positions)) {
|
||||
if (spans.getSpan(tag).containsRange(intersections.iterator(), groupSize)) {
|
||||
verbatimMatchScore += verbatimMatches.getWeightPartial(tag) * sizeScalingFactor * groupSize;
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package nu.marginalia.index.results.model;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import nu.marginalia.index.forward.spans.DocumentSpan;
|
||||
import nu.marginalia.index.model.SearchTermsUtil;
|
||||
import nu.marginalia.index.results.model.ids.TermIdList;
|
||||
import nu.marginalia.sequence.CodedSequence;
|
||||
@ -114,7 +113,7 @@ public class PhraseConstraintGroupList {
|
||||
}
|
||||
|
||||
|
||||
public boolean test(DocumentSpan span, IntList[] positions) {
|
||||
public IntList findIntersections(IntList[] positions) {
|
||||
IntIterator[] sequences = new IntIterator[present.cardinality()];
|
||||
int[] iterOffsets = new int[sequences.length];
|
||||
|
||||
@ -124,7 +123,7 @@ public class PhraseConstraintGroupList {
|
||||
}
|
||||
int offset = offsets[oi];
|
||||
if (offset < 0)
|
||||
return false;
|
||||
return IntList.of();
|
||||
|
||||
// Create iterators that are offset by their relative position in the
|
||||
// sequence. This is done by subtracting the index from the offset,
|
||||
@ -133,21 +132,13 @@ public class PhraseConstraintGroupList {
|
||||
|
||||
var posForTerm = positions[offset];
|
||||
if (posForTerm == null) {
|
||||
return false;
|
||||
return IntList.of();
|
||||
}
|
||||
sequences[si++] = posForTerm.iterator();
|
||||
iterOffsets[si - 1] = -oi;
|
||||
}
|
||||
|
||||
var intersections = SequenceOperations.findIntersections(iterOffsets, sequences);
|
||||
|
||||
for (int idx = 0; idx < intersections.size(); idx++) {
|
||||
if (span.containsRange(intersections.getInt(idx), sequences.length)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return SequenceOperations.findIntersections(iterOffsets, sequences);
|
||||
}
|
||||
|
||||
public int minDistance(IntList[] positions) {
|
||||
|
@ -71,7 +71,7 @@ class SequenceOperationsTest {
|
||||
GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 10, 14);
|
||||
GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 1, 5, 8, 9, 10);
|
||||
|
||||
assertEquals(IntList.of(8, 10), SequenceOperations.findIntersections(iterOffsets, seq1.iterator(), seq2.iterator(), seq3.iterator()));
|
||||
assertEquals(IntList.of(8, 10), SequenceOperations.findIntersections(seq1.iterator(), seq2.iterator(), seq3.iterator()));
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user