mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(qdebug) Accurately display positions when intersecting with spans
This commit is contained in:
parent
03d5dec24c
commit
0a383a712d
@ -2,31 +2,5 @@ package nu.marginalia.model.idx;
|
|||||||
|
|
||||||
import nu.marginalia.sequence.GammaCodedSequence;
|
import nu.marginalia.sequence.GammaCodedSequence;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record CodedWordSpan(byte code, GammaCodedSequence spans) {
|
public record CodedWordSpan(byte code, GammaCodedSequence spans) {
|
||||||
public static SplitSpansList fromSplit(String codes, List<GammaCodedSequence> spans) {
|
|
||||||
return new SplitSpansList(codes, spans);
|
|
||||||
}
|
|
||||||
public static SplitSpansList split(List<CodedWordSpan> spanList) {
|
|
||||||
return new SplitSpansList(
|
|
||||||
spanList.stream()
|
|
||||||
.map(CodedWordSpan::code)
|
|
||||||
.collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString(),
|
|
||||||
spanList.stream()
|
|
||||||
.map(CodedWordSpan::spans)
|
|
||||||
.toList()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public record SplitSpansList(String codes, List<GammaCodedSequence> spans) {
|
|
||||||
public List<CodedWordSpan> unite() {
|
|
||||||
if (null == codes) {
|
|
||||||
return List.of();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return codes.chars().mapToObj(c -> new CodedWordSpan((byte) c, spans.get(codes.indexOf(c)))).toList();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,6 @@ package nu.marginalia.index.forward.spans;
|
|||||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
import nu.marginalia.sequence.CodedSequence;
|
import nu.marginalia.sequence.CodedSequence;
|
||||||
import nu.marginalia.sequence.SequenceOperations;
|
|
||||||
|
|
||||||
public class DocumentSpan {
|
public class DocumentSpan {
|
||||||
|
|
||||||
@ -58,17 +57,55 @@ public class DocumentSpan {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean overlapsRange(CodedSequence sequence) {
|
|
||||||
return SequenceOperations.intersectSequences(iterator(), sequence.iterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator over the start and end positions of each span in the document of this type */
|
/** Returns an iterator over the start and end positions of each span in the document of this type */
|
||||||
public IntIterator iterator() {
|
public IntIterator iterator() {
|
||||||
if (null == startsEnds) {
|
if (null == startsEnds) {
|
||||||
return IntList.of().iterator();
|
return IntList.of().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
return startsEnds.iterator();
|
return new DocumentSpanPositionsIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Iteator over the values between the start and end positions of each span in the document of this type */
|
||||||
|
class DocumentSpanPositionsIterator implements IntIterator {
|
||||||
|
private final IntIterator startStopIterator;
|
||||||
|
|
||||||
|
private int value = -1;
|
||||||
|
private int current = -1;
|
||||||
|
private int end = -1;
|
||||||
|
|
||||||
|
public DocumentSpanPositionsIterator() {
|
||||||
|
this.startStopIterator = startsEnds.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextInt() {
|
||||||
|
if (hasNext()) {
|
||||||
|
int ret = value;
|
||||||
|
value = -1;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (value >= 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (current >= 0 && current < end) {
|
||||||
|
value = ++current;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (startStopIterator.hasNext()) {
|
||||||
|
current = startStopIterator.nextInt();
|
||||||
|
end = startStopIterator.nextInt();
|
||||||
|
value = current;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int length() {
|
public int length() {
|
||||||
|
@ -4,7 +4,7 @@ import nu.marginalia.language.sentence.tag.HtmlTag;
|
|||||||
import nu.marginalia.sequence.CodedSequence;
|
import nu.marginalia.sequence.CodedSequence;
|
||||||
|
|
||||||
public class DocumentSpans {
|
public class DocumentSpans {
|
||||||
private static DocumentSpan EMPTY_SPAN = new DocumentSpan();
|
private static final DocumentSpan EMPTY_SPAN = new DocumentSpan();
|
||||||
|
|
||||||
public DocumentSpan title = EMPTY_SPAN;
|
public DocumentSpan title = EMPTY_SPAN;
|
||||||
public DocumentSpan heading = EMPTY_SPAN;
|
public DocumentSpan heading = EMPTY_SPAN;
|
||||||
|
@ -142,7 +142,19 @@ public class DocumentKeywordsBuilder {
|
|||||||
StringBuilder sb = new StringBuilder("[ ");
|
StringBuilder sb = new StringBuilder("[ ");
|
||||||
|
|
||||||
wordToMeta.forEach((word, meta) -> {
|
wordToMeta.forEach((word, meta) -> {
|
||||||
sb.append(word).append("->").append(WordFlags.decode(meta)).append(',').append(wordToPos.getOrDefault(word, new IntArrayList())).append(' ');
|
sb.append(word)
|
||||||
|
.append("->")
|
||||||
|
.append(WordFlags.decode(meta))
|
||||||
|
.append(',')
|
||||||
|
.append(wordToPos.getOrDefault(word, new IntArrayList()))
|
||||||
|
.append(' ');
|
||||||
|
});
|
||||||
|
|
||||||
|
wordSpans.forEach((tag, spans) -> {
|
||||||
|
sb.append(tag)
|
||||||
|
.append("->")
|
||||||
|
.append(spans)
|
||||||
|
.append(' ');
|
||||||
});
|
});
|
||||||
return sb.append(']').toString();
|
return sb.append(']').toString();
|
||||||
}
|
}
|
||||||
|
@ -209,7 +209,9 @@ public class IntegrationTest {
|
|||||||
|
|
||||||
var params = QueryProtobufCodec.convertRequest(request);
|
var params = QueryProtobufCodec.convertRequest(request);
|
||||||
|
|
||||||
var query = queryFactory.createQuery(params, ResultRankingParameters.sensibleDefaults());
|
var p = ResultRankingParameters.sensibleDefaults();
|
||||||
|
p.exportDebugData = true;
|
||||||
|
var query = queryFactory.createQuery(params, p);
|
||||||
|
|
||||||
|
|
||||||
var indexRequest = QueryProtobufCodec.convertQuery(request, query);
|
var indexRequest = QueryProtobufCodec.convertQuery(request, query);
|
||||||
|
Loading…
Reference in New Issue
Block a user