(qdebug) Accurately display positions when intersecting with spans

This commit is contained in:
Viktor Lofgren 2024-08-15 11:44:17 +02:00
parent 03d5dec24c
commit 0a383a712d
5 changed files with 60 additions and 35 deletions

View File

@ -2,31 +2,5 @@ package nu.marginalia.model.idx;
import nu.marginalia.sequence.GammaCodedSequence;
import java.util.List;
public record CodedWordSpan(byte code, GammaCodedSequence spans) {
public static SplitSpansList fromSplit(String codes, List<GammaCodedSequence> spans) {
return new SplitSpansList(codes, spans);
}
public static SplitSpansList split(List<CodedWordSpan> spanList) {
return new SplitSpansList(
spanList.stream()
.map(CodedWordSpan::code)
.collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString(),
spanList.stream()
.map(CodedWordSpan::spans)
.toList()
);
}
public record SplitSpansList(String codes, List<GammaCodedSequence> spans) {
public List<CodedWordSpan> unite() {
if (null == codes) {
return List.of();
}
else {
return codes.chars().mapToObj(c -> new CodedWordSpan((byte) c, spans.get(codes.indexOf(c)))).toList();
}
}
}
}

View File

@ -3,7 +3,6 @@ package nu.marginalia.index.forward.spans;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import nu.marginalia.sequence.CodedSequence;
import nu.marginalia.sequence.SequenceOperations;
public class DocumentSpan {
@ -58,17 +57,55 @@ public class DocumentSpan {
return false;
}
public boolean overlapsRange(CodedSequence sequence) {
return SequenceOperations.intersectSequences(iterator(), sequence.iterator());
}
/** Returns an iterator over the start and end positions of each span in the document of this type */
public IntIterator iterator() {
if (null == startsEnds) {
return IntList.of().iterator();
}
return startsEnds.iterator();
return new DocumentSpanPositionsIterator();
}
/** Iteator over the values between the start and end positions of each span in the document of this type */
class DocumentSpanPositionsIterator implements IntIterator {
private final IntIterator startStopIterator;
private int value = -1;
private int current = -1;
private int end = -1;
public DocumentSpanPositionsIterator() {
this.startStopIterator = startsEnds.iterator();
}
@Override
public int nextInt() {
if (hasNext()) {
int ret = value;
value = -1;
return ret;
}
throw new IllegalStateException();
}
@Override
public boolean hasNext() {
if (value >= 0) {
return true;
}
else if (current >= 0 && current < end) {
value = ++current;
return true;
}
else if (startStopIterator.hasNext()) {
current = startStopIterator.nextInt();
end = startStopIterator.nextInt();
value = current;
return true;
}
return false;
}
}
public int length() {

View File

@ -4,7 +4,7 @@ import nu.marginalia.language.sentence.tag.HtmlTag;
import nu.marginalia.sequence.CodedSequence;
public class DocumentSpans {
private static DocumentSpan EMPTY_SPAN = new DocumentSpan();
private static final DocumentSpan EMPTY_SPAN = new DocumentSpan();
public DocumentSpan title = EMPTY_SPAN;
public DocumentSpan heading = EMPTY_SPAN;

View File

@ -142,7 +142,19 @@ public class DocumentKeywordsBuilder {
StringBuilder sb = new StringBuilder("[ ");
wordToMeta.forEach((word, meta) -> {
sb.append(word).append("->").append(WordFlags.decode(meta)).append(',').append(wordToPos.getOrDefault(word, new IntArrayList())).append(' ');
sb.append(word)
.append("->")
.append(WordFlags.decode(meta))
.append(',')
.append(wordToPos.getOrDefault(word, new IntArrayList()))
.append(' ');
});
wordSpans.forEach((tag, spans) -> {
sb.append(tag)
.append("->")
.append(spans)
.append(' ');
});
return sb.append(']').toString();
}

View File

@ -209,7 +209,9 @@ public class IntegrationTest {
var params = QueryProtobufCodec.convertRequest(request);
var query = queryFactory.createQuery(params, ResultRankingParameters.sensibleDefaults());
var p = ResultRankingParameters.sensibleDefaults();
p.exportDebugData = true;
var query = queryFactory.createQuery(params, p);
var indexRequest = QueryProtobufCodec.convertQuery(request, query);