From adf846bfd2789c4d2966a95f1ceb8905d7171491 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Apr 2024 18:07:43 +0200 Subject: [PATCH] (index) Fix term coherence evaluation The code was incorrectly using the documentId instead of the combined id, resulting in almost all result sets being incorrectly seen as zero. --- .../index/results/IndexResultValuationContext.java | 2 +- .../index/results/model/TermCoherenceGroupList.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java index 5383cbb9..3a7f157b 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java @@ -64,7 +64,7 @@ public class IndexResultValuationContext { long docId = UrlIdCodec.removeRank(combinedId); - if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, docId)) + if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId)) return null; long docMetadata = statefulIndex.getDocumentMetadata(docId); diff --git a/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java index 2b6c24f5..4b119c60 100644 --- a/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java +++ b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java @@ -15,9 +15,9 @@ public record TermCoherenceGroupList(List words) { this.words = Collections.unmodifiableList(words); } - public boolean test(TermMetadataForCombinedDocumentIds documents, long docId) { + public boolean test(TermMetadataForCombinedDocumentIds documents, long combinedId) { for (var coherenceSet : words()) { - if (!coherenceSet.test(documents, docId)) { + if (!coherenceSet.test(documents, combinedId)) { return false; } } @@ -36,11 +36,11 @@ public record TermCoherenceGroupList(List words) { this(coh.stream().mapToLong(SearchTermsUtil::getWordId).toArray()); } - public boolean test(TermMetadataForCombinedDocumentIds documents, long docId) { + public boolean test(TermMetadataForCombinedDocumentIds documents, long combinedId) { long overlap = 0xFF_FFFF_FFFF_FFFFL; for (var word : words) { - overlap &= documents.getTermMetadata(word, docId); + overlap &= documents.getTermMetadata(word, combinedId); } return WordMetadata.decodePositions(overlap) != 0L;