mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(qdebug) Accurately display positions when intersecting with spans
This commit is contained in:
parent
03d5dec24c
commit
0a383a712d
@ -2,31 +2,5 @@ package nu.marginalia.model.idx;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record CodedWordSpan(byte code, GammaCodedSequence spans) {
|
||||
public static SplitSpansList fromSplit(String codes, List<GammaCodedSequence> spans) {
|
||||
return new SplitSpansList(codes, spans);
|
||||
}
|
||||
public static SplitSpansList split(List<CodedWordSpan> spanList) {
|
||||
return new SplitSpansList(
|
||||
spanList.stream()
|
||||
.map(CodedWordSpan::code)
|
||||
.collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString(),
|
||||
spanList.stream()
|
||||
.map(CodedWordSpan::spans)
|
||||
.toList()
|
||||
);
|
||||
}
|
||||
|
||||
public record SplitSpansList(String codes, List<GammaCodedSequence> spans) {
|
||||
public List<CodedWordSpan> unite() {
|
||||
if (null == codes) {
|
||||
return List.of();
|
||||
}
|
||||
else {
|
||||
return codes.chars().mapToObj(c -> new CodedWordSpan((byte) c, spans.get(codes.indexOf(c)))).toList();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package nu.marginalia.index.forward.spans;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import nu.marginalia.sequence.CodedSequence;
|
||||
import nu.marginalia.sequence.SequenceOperations;
|
||||
|
||||
public class DocumentSpan {
|
||||
|
||||
@ -58,17 +57,55 @@ public class DocumentSpan {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean overlapsRange(CodedSequence sequence) {
|
||||
return SequenceOperations.intersectSequences(iterator(), sequence.iterator());
|
||||
}
|
||||
|
||||
/** Returns an iterator over the start and end positions of each span in the document of this type */
|
||||
public IntIterator iterator() {
|
||||
if (null == startsEnds) {
|
||||
return IntList.of().iterator();
|
||||
}
|
||||
|
||||
return startsEnds.iterator();
|
||||
return new DocumentSpanPositionsIterator();
|
||||
}
|
||||
|
||||
/** Iteator over the values between the start and end positions of each span in the document of this type */
|
||||
class DocumentSpanPositionsIterator implements IntIterator {
|
||||
private final IntIterator startStopIterator;
|
||||
|
||||
private int value = -1;
|
||||
private int current = -1;
|
||||
private int end = -1;
|
||||
|
||||
public DocumentSpanPositionsIterator() {
|
||||
this.startStopIterator = startsEnds.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInt() {
|
||||
if (hasNext()) {
|
||||
int ret = value;
|
||||
value = -1;
|
||||
return ret;
|
||||
}
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (value >= 0) {
|
||||
return true;
|
||||
}
|
||||
else if (current >= 0 && current < end) {
|
||||
value = ++current;
|
||||
return true;
|
||||
}
|
||||
else if (startStopIterator.hasNext()) {
|
||||
current = startStopIterator.nextInt();
|
||||
end = startStopIterator.nextInt();
|
||||
value = current;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public int length() {
|
||||
|
@ -4,7 +4,7 @@ import nu.marginalia.language.sentence.tag.HtmlTag;
|
||||
import nu.marginalia.sequence.CodedSequence;
|
||||
|
||||
public class DocumentSpans {
|
||||
private static DocumentSpan EMPTY_SPAN = new DocumentSpan();
|
||||
private static final DocumentSpan EMPTY_SPAN = new DocumentSpan();
|
||||
|
||||
public DocumentSpan title = EMPTY_SPAN;
|
||||
public DocumentSpan heading = EMPTY_SPAN;
|
||||
|
@ -142,7 +142,19 @@ public class DocumentKeywordsBuilder {
|
||||
StringBuilder sb = new StringBuilder("[ ");
|
||||
|
||||
wordToMeta.forEach((word, meta) -> {
|
||||
sb.append(word).append("->").append(WordFlags.decode(meta)).append(',').append(wordToPos.getOrDefault(word, new IntArrayList())).append(' ');
|
||||
sb.append(word)
|
||||
.append("->")
|
||||
.append(WordFlags.decode(meta))
|
||||
.append(',')
|
||||
.append(wordToPos.getOrDefault(word, new IntArrayList()))
|
||||
.append(' ');
|
||||
});
|
||||
|
||||
wordSpans.forEach((tag, spans) -> {
|
||||
sb.append(tag)
|
||||
.append("->")
|
||||
.append(spans)
|
||||
.append(' ');
|
||||
});
|
||||
return sb.append(']').toString();
|
||||
}
|
||||
|
@ -209,7 +209,9 @@ public class IntegrationTest {
|
||||
|
||||
var params = QueryProtobufCodec.convertRequest(request);
|
||||
|
||||
var query = queryFactory.createQuery(params, ResultRankingParameters.sensibleDefaults());
|
||||
var p = ResultRankingParameters.sensibleDefaults();
|
||||
p.exportDebugData = true;
|
||||
var query = queryFactory.createQuery(params, p);
|
||||
|
||||
|
||||
var indexRequest = QueryProtobufCodec.convertQuery(request, query);
|
||||
|
Loading…
Reference in New Issue
Block a user