mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(query) Update ranking parameters with new variables for bm25 ngrams and tcf mutual jaccard
The change also makes it so that as long as the values are defaults, they don't need to be sent over the wire and decoded.
This commit is contained in:
parent
ce16239e34
commit
7641a02f31
@ -79,6 +79,9 @@ public class IndexProtobufCodec {
|
||||
}
|
||||
|
||||
public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) {
|
||||
if (params == null)
|
||||
return ResultRankingParameters.sensibleDefaults();
|
||||
|
||||
return new ResultRankingParameters(
|
||||
new Bm25Parameters(params.getFullK(), params.getFullB()),
|
||||
new Bm25Parameters(params.getPrioK(), params.getPrioB()),
|
||||
@ -89,8 +92,10 @@ public class IndexProtobufCodec {
|
||||
params.getShortSentenceThreshold(),
|
||||
params.getShortSentencePenalty(),
|
||||
params.getBm25FullWeight(),
|
||||
params.getBm25NgramWeight(),
|
||||
params.getBm25PrioWeight(),
|
||||
params.getTcfWeight(),
|
||||
params.getTcfJaccardWeight(),
|
||||
params.getTcfOverlapWeight(),
|
||||
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
||||
params.getTemporalBiasWeight()
|
||||
);
|
||||
@ -111,9 +116,12 @@ public class IndexProtobufCodec {
|
||||
.setShortSentenceThreshold(rankingParams.shortSentenceThreshold)
|
||||
.setShortSentencePenalty(rankingParams.shortSentencePenalty)
|
||||
.setBm25FullWeight(rankingParams.bm25FullWeight)
|
||||
.setBm25NgramWeight(rankingParams.bm25NgramWeight)
|
||||
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
|
||||
.setTcfWeight(rankingParams.tcfWeight)
|
||||
.setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
|
||||
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
|
||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
|
||||
|
||||
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
||||
builder.setTemporalBias(temporalBias);
|
||||
}
|
||||
|
@ -32,8 +32,10 @@ public class ResultRankingParameters {
|
||||
public double shortSentencePenalty;
|
||||
|
||||
public double bm25FullWeight;
|
||||
public double bm25NgramWeight;
|
||||
public double bm25PrioWeight;
|
||||
public double tcfWeight;
|
||||
public double tcfJaccardWeight;
|
||||
public double tcfOverlapWeight;
|
||||
|
||||
public TemporalBias temporalBias;
|
||||
public double temporalBiasWeight;
|
||||
@ -49,8 +51,10 @@ public class ResultRankingParameters {
|
||||
.shortSentenceThreshold(2)
|
||||
.shortSentencePenalty(5)
|
||||
.bm25FullWeight(1.)
|
||||
.bm25NgramWeight(.25)
|
||||
.bm25PrioWeight(1.)
|
||||
.tcfWeight(2.)
|
||||
.tcfOverlapWeight(3.)
|
||||
.tcfJaccardWeight(1)
|
||||
.temporalBias(TemporalBias.NONE)
|
||||
.temporalBiasWeight(1. / (5.))
|
||||
.build();
|
||||
|
@ -130,10 +130,12 @@ message RpcResultRankingParameters {
|
||||
int32 shortSentenceThreshold = 9;
|
||||
double shortSentencePenalty = 10;
|
||||
double bm25FullWeight = 11;
|
||||
double bm25PrioWeight = 12;
|
||||
double tcfWeight = 13;
|
||||
RpcTemporalBias temporalBias = 14;
|
||||
double temporalBiasWeight = 15;
|
||||
double bm25NgramWeight = 12;
|
||||
double bm25PrioWeight = 13;
|
||||
double tcfOverlapWeight = 14;
|
||||
double tcfJaccardWeight = 15;
|
||||
RpcTemporalBias temporalBias = 16;
|
||||
double temporalBiasWeight = 17;
|
||||
}
|
||||
|
||||
/* Defines a single subquery */
|
||||
|
@ -159,7 +159,6 @@ public class QueryFactory {
|
||||
.domains(domainIds)
|
||||
.queryLimits(limits)
|
||||
.searchSetIdentifier(params.identifier())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.queryStrategy(queryStrategy);
|
||||
|
||||
SearchSpecification specs = specsBuilder.build();
|
||||
|
@ -74,8 +74,8 @@ public class ResultValuator {
|
||||
+ temporalBias
|
||||
+ flagsPenalty;
|
||||
|
||||
double tcfOverlap = 1.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateOverlap(wordMeta);
|
||||
double tcfJaccard = 0.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
|
||||
double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
|
||||
double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
|
||||
|
||||
double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
|
||||
double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
|
||||
|
Loading…
Reference in New Issue
Block a user