(query) Update ranking parameters with new variables for bm25 ngrams and tcf mutual jaccard

The change also makes it so that as long as the values are defaults, they don't need to be sent over the wire and decoded.
This commit is contained in:
Viktor Lofgren 2024-04-18 10:36:15 +02:00
parent ce16239e34
commit 7641a02f31
5 changed files with 24 additions and 11 deletions

View File

@ -79,6 +79,9 @@ public class IndexProtobufCodec {
} }
public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) { public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) {
if (params == null)
return ResultRankingParameters.sensibleDefaults();
return new ResultRankingParameters( return new ResultRankingParameters(
new Bm25Parameters(params.getFullK(), params.getFullB()), new Bm25Parameters(params.getFullK(), params.getFullB()),
new Bm25Parameters(params.getPrioK(), params.getPrioB()), new Bm25Parameters(params.getPrioK(), params.getPrioB()),
@ -89,8 +92,10 @@ public class IndexProtobufCodec {
params.getShortSentenceThreshold(), params.getShortSentenceThreshold(),
params.getShortSentencePenalty(), params.getShortSentencePenalty(),
params.getBm25FullWeight(), params.getBm25FullWeight(),
params.getBm25NgramWeight(),
params.getBm25PrioWeight(), params.getBm25PrioWeight(),
params.getTcfWeight(), params.getTcfJaccardWeight(),
params.getTcfOverlapWeight(),
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()), ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
params.getTemporalBiasWeight() params.getTemporalBiasWeight()
); );
@ -111,9 +116,12 @@ public class IndexProtobufCodec {
.setShortSentenceThreshold(rankingParams.shortSentenceThreshold) .setShortSentenceThreshold(rankingParams.shortSentenceThreshold)
.setShortSentencePenalty(rankingParams.shortSentencePenalty) .setShortSentencePenalty(rankingParams.shortSentencePenalty)
.setBm25FullWeight(rankingParams.bm25FullWeight) .setBm25FullWeight(rankingParams.bm25FullWeight)
.setBm25NgramWeight(rankingParams.bm25NgramWeight)
.setBm25PrioWeight(rankingParams.bm25PrioWeight) .setBm25PrioWeight(rankingParams.bm25PrioWeight)
.setTcfWeight(rankingParams.tcfWeight) .setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
.setTemporalBiasWeight(rankingParams.temporalBiasWeight); .setTemporalBiasWeight(rankingParams.temporalBiasWeight);
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) { if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
builder.setTemporalBias(temporalBias); builder.setTemporalBias(temporalBias);
} }

View File

@ -32,8 +32,10 @@ public class ResultRankingParameters {
public double shortSentencePenalty; public double shortSentencePenalty;
public double bm25FullWeight; public double bm25FullWeight;
public double bm25NgramWeight;
public double bm25PrioWeight; public double bm25PrioWeight;
public double tcfWeight; public double tcfJaccardWeight;
public double tcfOverlapWeight;
public TemporalBias temporalBias; public TemporalBias temporalBias;
public double temporalBiasWeight; public double temporalBiasWeight;
@ -49,8 +51,10 @@ public class ResultRankingParameters {
.shortSentenceThreshold(2) .shortSentenceThreshold(2)
.shortSentencePenalty(5) .shortSentencePenalty(5)
.bm25FullWeight(1.) .bm25FullWeight(1.)
.bm25NgramWeight(.25)
.bm25PrioWeight(1.) .bm25PrioWeight(1.)
.tcfWeight(2.) .tcfOverlapWeight(3.)
.tcfJaccardWeight(1)
.temporalBias(TemporalBias.NONE) .temporalBias(TemporalBias.NONE)
.temporalBiasWeight(1. / (5.)) .temporalBiasWeight(1. / (5.))
.build(); .build();

View File

@ -130,10 +130,12 @@ message RpcResultRankingParameters {
int32 shortSentenceThreshold = 9; int32 shortSentenceThreshold = 9;
double shortSentencePenalty = 10; double shortSentencePenalty = 10;
double bm25FullWeight = 11; double bm25FullWeight = 11;
double bm25PrioWeight = 12; double bm25NgramWeight = 12;
double tcfWeight = 13; double bm25PrioWeight = 13;
RpcTemporalBias temporalBias = 14; double tcfOverlapWeight = 14;
double temporalBiasWeight = 15; double tcfJaccardWeight = 15;
RpcTemporalBias temporalBias = 16;
double temporalBiasWeight = 17;
} }
/* Defines a single subquery */ /* Defines a single subquery */

View File

@ -159,7 +159,6 @@ public class QueryFactory {
.domains(domainIds) .domains(domainIds)
.queryLimits(limits) .queryLimits(limits)
.searchSetIdentifier(params.identifier()) .searchSetIdentifier(params.identifier())
.rankingParams(ResultRankingParameters.sensibleDefaults())
.queryStrategy(queryStrategy); .queryStrategy(queryStrategy);
SearchSpecification specs = specsBuilder.build(); SearchSpecification specs = specsBuilder.build();

View File

@ -74,8 +74,8 @@ public class ResultValuator {
+ temporalBias + temporalBias
+ flagsPenalty; + flagsPenalty;
double tcfOverlap = 1.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateOverlap(wordMeta); double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
double tcfJaccard = 0.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx); double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx)); double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx)); double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));