From eb74d08f2a8f6257265956ecc95a6f4dbb931da3 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 19 Apr 2024 11:46:27 +0200 Subject: [PATCH] (qs) Additional info in query debug UI --- .../api/searchquery/IndexProtobufCodec.java | 6 ++- .../api/searchquery/QueryProtobufCodec.java | 52 ++++++++++++++++++- .../results/DecoratedSearchResultItem.java | 10 +++- .../results/ResultRankingParameters.java | 3 ++ .../results/debug/ResultRankingDetails.java | 7 +++ .../results/debug/ResultRankingInputs.java | 3 ++ .../results/debug/ResultRankingOutputs.java | 17 ++++++ .../api/src/main/protobuf/query-api.proto | 33 ++++++++++++ .../query_parser/QueryExpansion.java | 1 - .../nu/marginalia/index/IndexGrpcService.java | 50 ++++++++++++++++++ .../results/IndexResultValuationContext.java | 3 +- .../results/IndexResultValuatorService.java | 32 +++++++++--- .../ranking/results/ResultValuator.java | 40 +++++++++++++- .../ranking/results/ResultValuatorTest.java | 9 ++-- .../marginalia/query/QueryBasicInterface.java | 1 + .../resources/templates/qdebug.hdb | 1 + 16 files changed, 250 insertions(+), 18 deletions(-) create mode 100644 code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingDetails.java create mode 100644 code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingInputs.java create mode 100644 code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingOutputs.java diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java index 8a1c5209..d582d7ce 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java @@ -97,7 +97,8 @@ public class IndexProtobufCodec { params.getTcfJaccardWeight(), params.getTcfOverlapWeight(), ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()), - params.getTemporalBiasWeight() + params.getTemporalBiasWeight(), + params.getExportDebugData() ); } @@ -124,7 +125,8 @@ public class IndexProtobufCodec { .setBm25PrioWeight(rankingParams.bm25PrioWeight) .setTcfOverlapWeight(rankingParams.tcfOverlapWeight) .setTcfJaccardWeight(rankingParams.tcfJaccardWeight) - .setTemporalBiasWeight(rankingParams.temporalBiasWeight); + .setTemporalBiasWeight(rankingParams.temporalBiasWeight) + .setExportDebugData(rankingParams.exportDebugData); if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) { builder.setTemporalBias(temporalBias); diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java index 51d0a4d6..9830e219 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java @@ -6,6 +6,9 @@ import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.api.searchquery.model.results.SearchResultItem; import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.model.EdgeUrl; import nu.marginalia.api.searchquery.model.query.ProcessedQuery; @@ -126,7 +129,51 @@ public class QueryProtobufCodec { results.getDataHash(), results.getWordsTotal(), results.getBestPositions(), - results.getRankingScore() + results.getRankingScore(), + convertRankingDetails(results.getRankingDetails()) + ); + } + + private static ResultRankingDetails convertRankingDetails(RpcResultRankingDetails rankingDetails) { + if (rankingDetails == null) + return null; + var inputs = rankingDetails.getInputs(); + var outputs = rankingDetails.getOutput(); + + return new ResultRankingDetails( + convertRankingInputs(inputs), + convertRankingOutputs(outputs) + ); + + } + + private static ResultRankingOutputs convertRankingOutputs(RpcResultRankingOutputs outputs) { + return new ResultRankingOutputs( + outputs.getAverageSentenceLengthPenalty(), + outputs.getQualityPenalty(), + outputs.getRankingBonus(), + outputs.getTopologyBonus(), + outputs.getDocumentLengthPenalty(), + outputs.getTemporalBias(), + outputs.getFlagsPenalty(), + outputs.getOverallPart(), + outputs.getTcfOverlap(), + outputs.getTcfJaccard(), + outputs.getBM25F(), + outputs.getBM25N(), + outputs.getBM25P() + ); + } + + private static ResultRankingInputs convertRankingInputs(RpcResultRankingInputs inputs) { + return new ResultRankingInputs( + inputs.getRank(), + inputs.getAsl(), + inputs.getQuality(), + inputs.getSize(), + inputs.getFlagsPenalty(), + inputs.getTopology(), + inputs.getYear() ); } @@ -209,7 +256,8 @@ public class QueryProtobufCodec { rpcDecoratedResultItem.getDataHash(), rpcDecoratedResultItem.getWordsTotal(), rpcDecoratedResultItem.getBestPositions(), - rpcDecoratedResultItem.getRankingScore() + rpcDecoratedResultItem.getRankingScore(), + convertRankingDetails(rpcDecoratedResultItem.getRankingDetails()) ); } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java index df48ea64..0522e7bc 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java @@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.results; import lombok.Getter; import lombok.ToString; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails; import nu.marginalia.model.EdgeUrl; import org.jetbrains.annotations.NotNull; @@ -33,6 +34,9 @@ public class DecoratedSearchResultItem implements Comparable detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null; + + double score = resultValuator.calculateSearchResultValue(wordMetas, + result.encodedDocMetadata, + result.htmlFeatures, + docData.wordsTotal(), + rankingContext, + detailConsumer); + return new DecoratedSearchResultItem( result, docData.url(), @@ -167,15 +180,22 @@ public class IndexResultValuatorService { docData.dataHash(), docData.wordsTotal(), bestPositions(wordMetas), - - resultValuator.calculateSearchResultValue(wordMetas, - result.encodedDocMetadata, - result.htmlFeatures, - docData.wordsTotal(), - rankingContext) + score, + detailsExtractor.get() ); } + private static class ResultRankingDetailsExtractor { + private ResultRankingDetails value = null; + + public ResultRankingDetails get() { + return value; + } + public void set(ResultRankingDetails value) { + this.value = value; + } + } + private long bestPositions(CompiledQueryLong wordMetas) { LongSet positionsSet = CompiledQueryAggregates.positionsAggregate(wordMetas, WordMetadata::decodePositions); diff --git a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java index 16bfa4a9..4aec3049 100644 --- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java +++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java @@ -3,6 +3,9 @@ package nu.marginalia.ranking.results; import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong; import nu.marginalia.api.searchquery.model.results.ResultRankingContext; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs; import nu.marginalia.model.crawl.HtmlFeature; import nu.marginalia.model.crawl.PubDate; import nu.marginalia.model.idx.DocumentFlags; @@ -14,6 +17,9 @@ import com.google.inject.Singleton; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; +import java.util.function.Consumer; + @Singleton public class ResultValuator { final static double scalingFactor = 500.; @@ -31,7 +37,9 @@ public class ResultValuator { long documentMetadata, int features, int length, - ResultRankingContext ctx) + ResultRankingContext ctx, + @Nullable Consumer detailsConsumer + ) { if (wordMeta.isEmpty()) return Double.MAX_VALUE; @@ -84,6 +92,36 @@ public class ResultValuator { double overallPartPositive = Math.max(0, overallPart); double overallPartNegative = -Math.min(0, overallPart); + if (null != detailsConsumer) { + var details = new ResultRankingDetails( + new ResultRankingInputs( + rank, + asl, + quality, + size, + flagsPenalty, + topology, + year + ), + new ResultRankingOutputs( + averageSentenceLengthPenalty, + qualityPenalty, + rankingBonus, + topologyBonus, + documentLengthPenalty, + temporalBias, + flagsPenalty, + overallPart, + tcfOverlap, + tcfJaccard, + bM25F, + bM25N, + bM25P) + ); + + detailsConsumer.accept(details); + } + // Renormalize to 0...15, where 0 is the best possible score; // this is a historical artifact of the original ranking function return normalize( diff --git a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java index a1b66b04..de88e699 100644 --- a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java +++ b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java @@ -62,16 +62,17 @@ class ResultValuatorTest { when(dict.getTermFreq("bob")).thenReturn(10); ResultRankingContext context = new ResultRankingContext(100000, ResultRankingParameters.sensibleDefaults(), + new BitSet(), frequencyData, frequencyData); long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)); int features = 0; - double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context); - double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context); - double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context); - double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context); + double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null); + double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null); + double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context, null); + double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null); System.out.println(titleOnlyLowCount); System.out.println(titleLongOnlyLowCount); diff --git a/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java index 916f5176..152f6a78 100644 --- a/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java @@ -125,6 +125,7 @@ public class QueryBasicInterface { .bm25FullWeight(doubleFromRequest(request, "bm25FullWeight", sensibleDefaults.bm25FullWeight)) .bm25NgramWeight(doubleFromRequest(request, "bm25NgramWeight", sensibleDefaults.bm25NgramWeight)) .bm25PrioWeight(doubleFromRequest(request, "bm25PrioWeight", sensibleDefaults.bm25PrioWeight)) + .exportDebugData(true) .build(); } diff --git a/code/services-core/query-service/resources/templates/qdebug.hdb b/code/services-core/query-service/resources/templates/qdebug.hdb index f9c3917f..ca072f75 100644 --- a/code/services-core/query-service/resources/templates/qdebug.hdb +++ b/code/services-core/query-service/resources/templates/qdebug.hdb @@ -112,6 +112,7 @@

{{description}}

dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}
+
{{rankingDetails}}
{{/each}} {{/if}}