From 7641a02f31cc597ce69d928435b37ade0d4d57e5 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 18 Apr 2024 10:36:15 +0200 Subject: [PATCH] (query) Update ranking parameters with new variables for bm25 ngrams and tcf mutual jaccard The change also makes it so that as long as the values are defaults, they don't need to be sent over the wire and decoded. --- .../api/searchquery/IndexProtobufCodec.java | 12 ++++++++++-- .../model/results/ResultRankingParameters.java | 8 ++++++-- .../api/src/main/protobuf/query-api.proto | 10 ++++++---- .../functions/searchquery/svc/QueryFactory.java | 1 - .../marginalia/ranking/results/ResultValuator.java | 4 ++-- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java index 4d2cf7a6..bf0f4b64 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java @@ -79,6 +79,9 @@ public class IndexProtobufCodec { } public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) { + if (params == null) + return ResultRankingParameters.sensibleDefaults(); + return new ResultRankingParameters( new Bm25Parameters(params.getFullK(), params.getFullB()), new Bm25Parameters(params.getPrioK(), params.getPrioB()), @@ -89,8 +92,10 @@ public class IndexProtobufCodec { params.getShortSentenceThreshold(), params.getShortSentencePenalty(), params.getBm25FullWeight(), + params.getBm25NgramWeight(), params.getBm25PrioWeight(), - params.getTcfWeight(), + params.getTcfJaccardWeight(), + params.getTcfOverlapWeight(), ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()), params.getTemporalBiasWeight() ); @@ -111,9 +116,12 @@ public class IndexProtobufCodec { .setShortSentenceThreshold(rankingParams.shortSentenceThreshold) .setShortSentencePenalty(rankingParams.shortSentencePenalty) .setBm25FullWeight(rankingParams.bm25FullWeight) + .setBm25NgramWeight(rankingParams.bm25NgramWeight) .setBm25PrioWeight(rankingParams.bm25PrioWeight) - .setTcfWeight(rankingParams.tcfWeight) + .setTcfOverlapWeight(rankingParams.tcfOverlapWeight) + .setTcfJaccardWeight(rankingParams.tcfJaccardWeight) .setTemporalBiasWeight(rankingParams.temporalBiasWeight); + if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) { builder.setTemporalBias(temporalBias); } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java index a16ccf8b..04a5f8e2 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java @@ -32,8 +32,10 @@ public class ResultRankingParameters { public double shortSentencePenalty; public double bm25FullWeight; + public double bm25NgramWeight; public double bm25PrioWeight; - public double tcfWeight; + public double tcfJaccardWeight; + public double tcfOverlapWeight; public TemporalBias temporalBias; public double temporalBiasWeight; @@ -49,8 +51,10 @@ public class ResultRankingParameters { .shortSentenceThreshold(2) .shortSentencePenalty(5) .bm25FullWeight(1.) + .bm25NgramWeight(.25) .bm25PrioWeight(1.) - .tcfWeight(2.) + .tcfOverlapWeight(3.) + .tcfJaccardWeight(1) .temporalBias(TemporalBias.NONE) .temporalBiasWeight(1. / (5.)) .build(); diff --git a/code/functions/search-query/api/src/main/protobuf/query-api.proto b/code/functions/search-query/api/src/main/protobuf/query-api.proto index bae06e66..db6d4a35 100644 --- a/code/functions/search-query/api/src/main/protobuf/query-api.proto +++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto @@ -130,10 +130,12 @@ message RpcResultRankingParameters { int32 shortSentenceThreshold = 9; double shortSentencePenalty = 10; double bm25FullWeight = 11; - double bm25PrioWeight = 12; - double tcfWeight = 13; - RpcTemporalBias temporalBias = 14; - double temporalBiasWeight = 15; + double bm25NgramWeight = 12; + double bm25PrioWeight = 13; + double tcfOverlapWeight = 14; + double tcfJaccardWeight = 15; + RpcTemporalBias temporalBias = 16; + double temporalBiasWeight = 17; } /* Defines a single subquery */ diff --git a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java index 382f62a8..ab4018ef 100644 --- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java @@ -159,7 +159,6 @@ public class QueryFactory { .domains(domainIds) .queryLimits(limits) .searchSetIdentifier(params.identifier()) - .rankingParams(ResultRankingParameters.sensibleDefaults()) .queryStrategy(queryStrategy); SearchSpecification specs = specsBuilder.build(); diff --git a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java index a8718e3d..16bfa4a9 100644 --- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java +++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java @@ -74,8 +74,8 @@ public class ResultValuator { + temporalBias + flagsPenalty; - double tcfOverlap = 1.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateOverlap(wordMeta); - double tcfJaccard = 0.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx); + double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta); + double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx); double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx)); double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));