(query) Update ranking parameters with new variables for bm25 ngrams and tcf mutual jaccard

The change also makes it so that as long as the values are defaults, they don't need to be sent over the wire and decoded.
This commit is contained in:
Viktor Lofgren 2024-04-18 10:36:15 +02:00
parent ce16239e34
commit 7641a02f31
5 changed files with 24 additions and 11 deletions

View File

@ -79,6 +79,9 @@ public class IndexProtobufCodec {
}
public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) {
if (params == null)
return ResultRankingParameters.sensibleDefaults();
return new ResultRankingParameters(
new Bm25Parameters(params.getFullK(), params.getFullB()),
new Bm25Parameters(params.getPrioK(), params.getPrioB()),
@ -89,8 +92,10 @@ public class IndexProtobufCodec {
params.getShortSentenceThreshold(),
params.getShortSentencePenalty(),
params.getBm25FullWeight(),
params.getBm25NgramWeight(),
params.getBm25PrioWeight(),
params.getTcfWeight(),
params.getTcfJaccardWeight(),
params.getTcfOverlapWeight(),
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
params.getTemporalBiasWeight()
);
@ -111,9 +116,12 @@ public class IndexProtobufCodec {
.setShortSentenceThreshold(rankingParams.shortSentenceThreshold)
.setShortSentencePenalty(rankingParams.shortSentencePenalty)
.setBm25FullWeight(rankingParams.bm25FullWeight)
.setBm25NgramWeight(rankingParams.bm25NgramWeight)
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
.setTcfWeight(rankingParams.tcfWeight)
.setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
builder.setTemporalBias(temporalBias);
}

View File

@ -32,8 +32,10 @@ public class ResultRankingParameters {
public double shortSentencePenalty;
public double bm25FullWeight;
public double bm25NgramWeight;
public double bm25PrioWeight;
public double tcfWeight;
public double tcfJaccardWeight;
public double tcfOverlapWeight;
public TemporalBias temporalBias;
public double temporalBiasWeight;
@ -49,8 +51,10 @@ public class ResultRankingParameters {
.shortSentenceThreshold(2)
.shortSentencePenalty(5)
.bm25FullWeight(1.)
.bm25NgramWeight(.25)
.bm25PrioWeight(1.)
.tcfWeight(2.)
.tcfOverlapWeight(3.)
.tcfJaccardWeight(1)
.temporalBias(TemporalBias.NONE)
.temporalBiasWeight(1. / (5.))
.build();

View File

@ -130,10 +130,12 @@ message RpcResultRankingParameters {
int32 shortSentenceThreshold = 9;
double shortSentencePenalty = 10;
double bm25FullWeight = 11;
double bm25PrioWeight = 12;
double tcfWeight = 13;
RpcTemporalBias temporalBias = 14;
double temporalBiasWeight = 15;
double bm25NgramWeight = 12;
double bm25PrioWeight = 13;
double tcfOverlapWeight = 14;
double tcfJaccardWeight = 15;
RpcTemporalBias temporalBias = 16;
double temporalBiasWeight = 17;
}
/* Defines a single subquery */

View File

@ -159,7 +159,6 @@ public class QueryFactory {
.domains(domainIds)
.queryLimits(limits)
.searchSetIdentifier(params.identifier())
.rankingParams(ResultRankingParameters.sensibleDefaults())
.queryStrategy(queryStrategy);
SearchSpecification specs = specsBuilder.build();

View File

@ -74,8 +74,8 @@ public class ResultValuator {
+ temporalBias
+ flagsPenalty;
double tcfOverlap = 1.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateOverlap(wordMeta);
double tcfJaccard = 0.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));