diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java index 5a43df1b..2907992d 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java @@ -138,6 +138,7 @@ public class QueryProtobufCodec { rawItem.getHtmlFeatures(), keywordScores, rawItem.getResultsFromDomain(), + rawItem.getHasPriorityTerms(), Double.NaN // Not set ); } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java index 7cd95b96..ad8b8cb1 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java @@ -28,11 +28,17 @@ public class SearchResultItem implements Comparable { /** How many other potential results existed in the same domain */ public int resultsFromDomain; - public SearchResultItem(long combinedId, long encodedDocMetadata, int htmlFeatures) { + public boolean hasPrioTerm; + + public SearchResultItem(long combinedId, + long encodedDocMetadata, + int htmlFeatures, + boolean hasPrioTerm) { this.combinedId = combinedId; this.encodedDocMetadata = encodedDocMetadata; this.keywordScores = new ArrayList<>(); this.htmlFeatures = htmlFeatures; + this.hasPrioTerm = hasPrioTerm; } @@ -85,4 +91,6 @@ public class SearchResultItem implements Comparable { return Long.compare(this.combinedId, o.combinedId); } + + } diff --git a/code/functions/search-query/api/src/main/protobuf/query-api.proto b/code/functions/search-query/api/src/main/protobuf/query-api.proto index 3094699b..bae06e66 100644 --- a/code/functions/search-query/api/src/main/protobuf/query-api.proto +++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto @@ -101,13 +101,13 @@ message RpcRawResultItem { int64 encodedDocMetadata = 3; // bit encoded document metadata int32 htmlFeatures = 4; // bitmask encoding features of the document repeated RpcResultKeywordScore keywordScores = 5; + bool hasPriorityTerms = 6; // true if this word is important to the document } /* Information about how well a keyword matches a query */ message RpcResultKeywordScore { string keyword = 1; // the keyword int64 encodedWordMetadata = 2; // bit encoded word metadata - bool hasPriorityTerms = 3; // true if this word is important to the document } /* Query execution parameters */ diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java index fa0a8343..4810d625 100644 --- a/code/index/java/nu/marginalia/index/IndexGrpcService.java +++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java @@ -138,6 +138,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { rawItem.setResultsFromDomain(rawResult.resultsFromDomain); rawItem.setHtmlFeatures(rawResult.htmlFeatures); rawItem.setEncodedDocMetadata(rawResult.encodedDocMetadata); + rawItem.setHasPriorityTerms(rawResult.hasPrioTerm); for (var score : rawResult.keywordScores) { rawItem.addKeywordScores( diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java index 977a87e7..ce23c3f2 100644 --- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java +++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java @@ -43,6 +43,7 @@ public class IndexMetadataService { public QuerySearchTerms getSearchTerms(CompiledQuery compiledQuery, SearchQuery searchQuery) { LongArrayList termIdsList = new LongArrayList(); + LongArrayList termIdsPrio = new LongArrayList(); TObjectLongHashMap termToId = new TObjectLongHashMap<>(10, 0.75f, -1); @@ -52,8 +53,30 @@ public class IndexMetadataService { termToId.put(word, id); } + for (var term : searchQuery.searchTermsAdvice) { + if (termToId.containsKey(term)) { + continue; + } + + long id = SearchTermsUtil.getWordId(term); + termIdsList.add(id); + termToId.put(term, id); + } + + for (var term : searchQuery.searchTermsPriority) { + if (termToId.containsKey(term)) { + continue; + } + + long id = SearchTermsUtil.getWordId(term); + termIdsList.add(id); + termIdsPrio.add(id); + termToId.put(term, id); + } + return new QuerySearchTerms(termToId, new TermIdList(termIdsList), + new TermIdList(termIdsPrio), new TermCoherenceGroupList( searchQuery.searchTermCoherences.stream().map(TermCoherenceGroup::new).toList() ) diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java index 89b4c543..a9d6b4a6 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java @@ -52,7 +52,8 @@ public class IndexResultValuationContext { this.searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query); - this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids, searchTerms.termIdsAll); + this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids, + searchTerms.termIdsAll); } private final long flagsFilterMask = @@ -69,7 +70,10 @@ public class IndexResultValuationContext { long docMetadata = statefulIndex.getDocumentMetadata(docId); int htmlFeatures = statefulIndex.getHtmlFeatures(docId); - SearchResultItem searchResult = new SearchResultItem(docId, docMetadata, htmlFeatures); + SearchResultItem searchResult = new SearchResultItem(docId, + docMetadata, + htmlFeatures, + hasPrioTerm(combinedId)); long[] wordMetas = new long[compiledQuery.size()]; SearchResultKeywordScore[] scores = new SearchResultKeywordScore[compiledQuery.size()]; @@ -108,11 +112,24 @@ public class IndexResultValuationContext { 5000, // use a dummy value here as it's not present in the index rankingContext); + if (searchResult.hasPrioTerm) { + score = 0.75 * score; + } + searchResult.setScore(score); return searchResult; } + private boolean hasPrioTerm(long combinedId) { + for (var term : searchTerms.termIdsPrio.array()) { + if (termMetadataForCombinedDocumentIds.hasTermMeta(term, combinedId)) { + return true; + } + } + return false; + } + private boolean meetsQueryStrategyRequirements(CompiledQueryLong queryGraphScores, QueryStrategy queryStrategy) { diff --git a/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java b/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java index d72e0ea9..bbb7cf30 100644 --- a/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java +++ b/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java @@ -6,14 +6,17 @@ import nu.marginalia.index.results.model.ids.TermIdList; public class QuerySearchTerms { private final TObjectLongHashMap termToId; public final TermIdList termIdsAll; + public final TermIdList termIdsPrio; public final TermCoherenceGroupList coherences; public QuerySearchTerms(TObjectLongHashMap termToId, TermIdList termIdsAll, + TermIdList termIdsPrio, TermCoherenceGroupList coherences) { this.termToId = termToId; this.termIdsAll = termIdsAll; + this.termIdsPrio = termIdsPrio; this.coherences = coherences; } diff --git a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java index 9068dd69..3ef2f7ab 100644 --- a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java +++ b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java @@ -18,12 +18,21 @@ public class TermMetadataForCombinedDocumentIds { public long getTermMetadata(long termId, long combinedId) { var metaByCombinedId = termdocToMeta.get(termId); if (metaByCombinedId == null) { - logger.warn("Missing meta for term {}", termId); return 0; } return metaByCombinedId.get(combinedId); } + public boolean hasTermMeta(long termId, long combinedId) { + var metaByCombinedId = termdocToMeta.get(termId); + + if (metaByCombinedId == null) { + return false; + } + + return metaByCombinedId.get(combinedId) != 0; + } + public record DocumentsWithMetadata(Long2LongOpenHashMap data) { public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, DocMetadataList metadata) { this(new Long2LongOpenHashMap(combinedDocIdsAll.array(), metadata.array())); diff --git a/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java b/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java index c605a0a8..21f6312e 100644 --- a/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java +++ b/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java @@ -25,7 +25,7 @@ class IndexResultDomainDeduplicatorTest { } SearchResultItem forId(int domain, int ordinal) { - return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal), 0, 0, List.of(), 4, Double.NaN); + return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal), 0, 0, List.of(), 4, Double.NaN, false); } } \ No newline at end of file