From 9b06433b8206ba1b21254d228350580c022a3f0e Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 19 Apr 2024 12:18:53 +0200 Subject: [PATCH] (qs) Additional info in query debug UI --- .../api/searchquery/IndexProtobufCodec.java | 44 +++++++++++++++++++ .../api/searchquery/QueryProtobufCodec.java | 4 +- .../results/debug/ResultRankingInputs.java | 4 +- .../api/src/main/protobuf/query-api.proto | 6 +-- .../nu/marginalia/index/IndexGrpcService.java | 43 +----------------- .../index/results/IndexMetadataService.java | 14 +++--- .../ranking/results/ResultValuator.java | 4 +- .../resources/templates/qdebug.hdb | 26 ++++++++++- 8 files changed, 88 insertions(+), 57 deletions(-) diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java index d582d7ce..af783a83 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java @@ -3,6 +3,9 @@ package nu.marginalia.api.searchquery; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.results.Bm25Parameters; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs; +import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimitType; @@ -139,4 +142,45 @@ public class IndexProtobufCodec { return builder.build(); } + + public static RpcResultRankingDetails convertRankingDetails(ResultRankingDetails rankingDetails) { + if (rankingDetails == null) { + return null; + } + + return RpcResultRankingDetails.newBuilder() + .setInputs(convertRankingInputs(rankingDetails.inputs())) + .setOutput(convertRankingOutput(rankingDetails.outputs())) + .build(); + } + + private static RpcResultRankingOutputs convertRankingOutput(ResultRankingOutputs outputs) { + return RpcResultRankingOutputs.newBuilder() + .setAverageSentenceLengthPenalty(outputs.averageSentenceLengthPenalty()) + .setQualityPenalty(outputs.qualityPenalty()) + .setRankingBonus(outputs.rankingBonus()) + .setTopologyBonus(outputs.topologyBonus()) + .setDocumentLengthPenalty(outputs.documentLengthPenalty()) + .setTemporalBias(outputs.temporalBias()) + .setFlagsPenalty(outputs.flagsPenalty()) + .setOverallPart(outputs.overallPart()) + .setTcfOverlap(outputs.tcfOverlap()) + .setTcfJaccard(outputs.tcfJaccard()) + .setBM25F(outputs.bM25F()) + .setBM25N(outputs.bM25N()) + .setBM25P(outputs.bM25P()) + .build(); + } + + private static RpcResultRankingInputs convertRankingInputs(ResultRankingInputs inputs) { + return RpcResultRankingInputs.newBuilder() + .setRank(inputs.rank()) + .setAsl(inputs.asl()) + .setQuality(inputs.quality()) + .setSize(inputs.size()) + .setTopology(inputs.topology()) + .setYear(inputs.year()) + .addAllFlags(inputs.flags()) + .build(); + } } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java index 9830e219..58a20a8a 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java @@ -171,9 +171,9 @@ public class QueryProtobufCodec { inputs.getAsl(), inputs.getQuality(), inputs.getSize(), - inputs.getFlagsPenalty(), inputs.getTopology(), - inputs.getYear() + inputs.getYear(), + inputs.getFlagsList() ); } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingInputs.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingInputs.java index d9aa139f..86169416 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingInputs.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/debug/ResultRankingInputs.java @@ -1,3 +1,5 @@ package nu.marginalia.api.searchquery.model.results.debug; -public record ResultRankingInputs(int rank, int asl, int quality, int size, int flagsPenalty, int topology, int year) {} +import java.util.List; + +public record ResultRankingInputs(int rank, int asl, int quality, int size, int topology, int year, List flags) {} diff --git a/code/functions/search-query/api/src/main/protobuf/query-api.proto b/code/functions/search-query/api/src/main/protobuf/query-api.proto index f6890239..eb4e48ba 100644 --- a/code/functions/search-query/api/src/main/protobuf/query-api.proto +++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto @@ -150,9 +150,9 @@ message RpcResultRankingInputs { int32 asl = 2; int32 quality = 3; int32 size = 4; - int32 flagsPenalty = 5; - int32 topology = 6; - int32 year = 7; + int32 topology = 5; + int32 year = 6; + repeated string flags = 7; } message RpcResultRankingOutputs { diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java index e37d2c0f..1e456d31 100644 --- a/code/index/java/nu/marginalia/index/IndexGrpcService.java +++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java @@ -163,9 +163,8 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { .setBestPositions(result.bestPositions) .setRawItem(rawItem); - var rankingDetails = convertRankingDetails(result.rankingDetails); + var rankingDetails = IndexProtobufCodec.convertRankingDetails(result.rankingDetails); if (rankingDetails != null) { - logger.info(queryMarker, "Ranking details: {}", rankingDetails); decoratedBuilder.setRankingDetails(rankingDetails); } @@ -183,46 +182,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { } } - private RpcResultRankingDetails convertRankingDetails(ResultRankingDetails rankingDetails) { - if (rankingDetails == null) { - return null; - } - - return RpcResultRankingDetails.newBuilder() - .setInputs(convertRankingInputs(rankingDetails.inputs())) - .setOutput(convertRankingOutput(rankingDetails.outputs())) - .build(); - } - - private RpcResultRankingOutputs convertRankingOutput(ResultRankingOutputs outputs) { - return RpcResultRankingOutputs.newBuilder() - .setAverageSentenceLengthPenalty(outputs.averageSentenceLengthPenalty()) - .setQualityPenalty(outputs.qualityPenalty()) - .setRankingBonus(outputs.rankingBonus()) - .setTopologyBonus(outputs.topologyBonus()) - .setDocumentLengthPenalty(outputs.documentLengthPenalty()) - .setTemporalBias(outputs.temporalBias()) - .setFlagsPenalty(outputs.flagsPenalty()) - .setOverallPart(outputs.overallPart()) - .setTcfOverlap(outputs.tcfOverlap()) - .setTcfJaccard(outputs.tcfJaccard()) - .setBM25F(outputs.bM25F()) - .setBM25N(outputs.bM25N()) - .setBM25P(outputs.bM25P()) - .build(); - } - - private RpcResultRankingInputs convertRankingInputs(ResultRankingInputs inputs) { - return RpcResultRankingInputs.newBuilder() - .setRank(inputs.rank()) - .setAsl(inputs.asl()) - .setQuality(inputs.quality()) - .setSize(inputs.size()) - .setFlagsPenalty(inputs.flagsPenalty()) - .setTopology(inputs.topology()) - .setYear(inputs.year()) - .build(); - } // exists for test access @SneakyThrows diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java index ce23c3f2..a43f9436 100644 --- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java +++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java @@ -65,13 +65,15 @@ public class IndexMetadataService { for (var term : searchQuery.searchTermsPriority) { if (termToId.containsKey(term)) { - continue; + long id = SearchTermsUtil.getWordId(term); + termIdsPrio.add(id); + } + else { + long id = SearchTermsUtil.getWordId(term); + termIdsList.add(id); + termIdsPrio.add(id); + termToId.put(term, id); } - - long id = SearchTermsUtil.getWordId(term); - termIdsList.add(id); - termIdsPrio.add(id); - termToId.put(term, id); } return new QuerySearchTerms(termToId, diff --git a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java index 4aec3049..1a89b80b 100644 --- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java +++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java @@ -99,9 +99,9 @@ public class ResultValuator { asl, quality, size, - flagsPenalty, topology, - year + year, + DocumentFlags.decode(documentMetadata).stream().map(Enum::name).toList() ), new ResultRankingOutputs( averageSentenceLengthPenalty, diff --git a/code/services-core/query-service/resources/templates/qdebug.hdb b/code/services-core/query-service/resources/templates/qdebug.hdb index ca072f75..4081317f 100644 --- a/code/services-core/query-service/resources/templates/qdebug.hdb +++ b/code/services-core/query-service/resources/templates/qdebug.hdb @@ -112,7 +112,31 @@

{{description}}

dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}
-
{{rankingDetails}}
+ {{#with rankingDetails.inputs}} +
Rank: {{rank}}
+
ASL: {{asl}}
+
Quality: {{quality}}
+
Size: {{size}}
+
Topology: {{topology}}
+
Year: {{year}}
+
Flags: {{#each flags}} {{.}} {{/each}}
+ {{/with}} + {{#with rankingDetails.outputs}} +
Average Sentence Length Penalty: {{averageSentenceLengthPenalty}}
+
Quality Penalty: {{qualityPenalty}}
+
Ranking Bonus: {{rankingBonus}}
+
Topology Bonus: {{topologyBonus}}
+
Document Length Penalty: {{documentLengthPenalty}}
+
Temporal Bias: {{temporalBias}}
+
Flags Penalty: {{flagsPenalty}}
+
Overall Part: {{overallPart}}
+
TCF Overlap: {{tcfOverlap}}
+
TCF Jaccard: {{tcfJaccard}}
+
BM25 Full: {{bM25F}}
+
BM25 Ngram: {{bM25N}}
+
BM25 Prio: {{bM25P}}
+ {{/with}} + {{/each}} {{/if}}