mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00
(query) Update ranking parameters with new variables for bm25 ngrams and tcf mutual jaccard
The change also makes it so that as long as the values are defaults, they don't need to be sent over the wire and decoded.
This commit is contained in:
parent
a09c84e1b8
commit
462aa9af26
@ -79,6 +79,9 @@ public class IndexProtobufCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) {
|
public static ResultRankingParameters convertRankingParameterss(RpcResultRankingParameters params) {
|
||||||
|
if (params == null)
|
||||||
|
return ResultRankingParameters.sensibleDefaults();
|
||||||
|
|
||||||
return new ResultRankingParameters(
|
return new ResultRankingParameters(
|
||||||
new Bm25Parameters(params.getFullK(), params.getFullB()),
|
new Bm25Parameters(params.getFullK(), params.getFullB()),
|
||||||
new Bm25Parameters(params.getPrioK(), params.getPrioB()),
|
new Bm25Parameters(params.getPrioK(), params.getPrioB()),
|
||||||
@ -89,8 +92,10 @@ public class IndexProtobufCodec {
|
|||||||
params.getShortSentenceThreshold(),
|
params.getShortSentenceThreshold(),
|
||||||
params.getShortSentencePenalty(),
|
params.getShortSentencePenalty(),
|
||||||
params.getBm25FullWeight(),
|
params.getBm25FullWeight(),
|
||||||
|
params.getBm25NgramWeight(),
|
||||||
params.getBm25PrioWeight(),
|
params.getBm25PrioWeight(),
|
||||||
params.getTcfWeight(),
|
params.getTcfJaccardWeight(),
|
||||||
|
params.getTcfOverlapWeight(),
|
||||||
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
||||||
params.getTemporalBiasWeight()
|
params.getTemporalBiasWeight()
|
||||||
);
|
);
|
||||||
@ -111,9 +116,12 @@ public class IndexProtobufCodec {
|
|||||||
.setShortSentenceThreshold(rankingParams.shortSentenceThreshold)
|
.setShortSentenceThreshold(rankingParams.shortSentenceThreshold)
|
||||||
.setShortSentencePenalty(rankingParams.shortSentencePenalty)
|
.setShortSentencePenalty(rankingParams.shortSentencePenalty)
|
||||||
.setBm25FullWeight(rankingParams.bm25FullWeight)
|
.setBm25FullWeight(rankingParams.bm25FullWeight)
|
||||||
|
.setBm25NgramWeight(rankingParams.bm25NgramWeight)
|
||||||
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
|
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
|
||||||
.setTcfWeight(rankingParams.tcfWeight)
|
.setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
|
||||||
|
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
|
||||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
|
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
|
||||||
|
|
||||||
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
||||||
builder.setTemporalBias(temporalBias);
|
builder.setTemporalBias(temporalBias);
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,10 @@ public class ResultRankingParameters {
|
|||||||
public double shortSentencePenalty;
|
public double shortSentencePenalty;
|
||||||
|
|
||||||
public double bm25FullWeight;
|
public double bm25FullWeight;
|
||||||
|
public double bm25NgramWeight;
|
||||||
public double bm25PrioWeight;
|
public double bm25PrioWeight;
|
||||||
public double tcfWeight;
|
public double tcfJaccardWeight;
|
||||||
|
public double tcfOverlapWeight;
|
||||||
|
|
||||||
public TemporalBias temporalBias;
|
public TemporalBias temporalBias;
|
||||||
public double temporalBiasWeight;
|
public double temporalBiasWeight;
|
||||||
@ -49,8 +51,10 @@ public class ResultRankingParameters {
|
|||||||
.shortSentenceThreshold(2)
|
.shortSentenceThreshold(2)
|
||||||
.shortSentencePenalty(5)
|
.shortSentencePenalty(5)
|
||||||
.bm25FullWeight(1.)
|
.bm25FullWeight(1.)
|
||||||
|
.bm25NgramWeight(.25)
|
||||||
.bm25PrioWeight(1.)
|
.bm25PrioWeight(1.)
|
||||||
.tcfWeight(2.)
|
.tcfOverlapWeight(3.)
|
||||||
|
.tcfJaccardWeight(1)
|
||||||
.temporalBias(TemporalBias.NONE)
|
.temporalBias(TemporalBias.NONE)
|
||||||
.temporalBiasWeight(1. / (5.))
|
.temporalBiasWeight(1. / (5.))
|
||||||
.build();
|
.build();
|
||||||
|
@ -130,10 +130,12 @@ message RpcResultRankingParameters {
|
|||||||
int32 shortSentenceThreshold = 9;
|
int32 shortSentenceThreshold = 9;
|
||||||
double shortSentencePenalty = 10;
|
double shortSentencePenalty = 10;
|
||||||
double bm25FullWeight = 11;
|
double bm25FullWeight = 11;
|
||||||
double bm25PrioWeight = 12;
|
double bm25NgramWeight = 12;
|
||||||
double tcfWeight = 13;
|
double bm25PrioWeight = 13;
|
||||||
RpcTemporalBias temporalBias = 14;
|
double tcfOverlapWeight = 14;
|
||||||
double temporalBiasWeight = 15;
|
double tcfJaccardWeight = 15;
|
||||||
|
RpcTemporalBias temporalBias = 16;
|
||||||
|
double temporalBiasWeight = 17;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Defines a single subquery */
|
/* Defines a single subquery */
|
||||||
|
@ -159,7 +159,6 @@ public class QueryFactory {
|
|||||||
.domains(domainIds)
|
.domains(domainIds)
|
||||||
.queryLimits(limits)
|
.queryLimits(limits)
|
||||||
.searchSetIdentifier(params.identifier())
|
.searchSetIdentifier(params.identifier())
|
||||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
|
||||||
.queryStrategy(queryStrategy);
|
.queryStrategy(queryStrategy);
|
||||||
|
|
||||||
SearchSpecification specs = specsBuilder.build();
|
SearchSpecification specs = specsBuilder.build();
|
||||||
|
@ -74,8 +74,8 @@ public class ResultValuator {
|
|||||||
+ temporalBias
|
+ temporalBias
|
||||||
+ flagsPenalty;
|
+ flagsPenalty;
|
||||||
|
|
||||||
double tcfOverlap = 1.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateOverlap(wordMeta);
|
double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
|
||||||
double tcfJaccard = 0.5 * rankingParams.tcfWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
|
double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
|
||||||
|
|
||||||
double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
|
double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
|
||||||
double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
|
double bM25N = 0.25 * rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
|
||||||
|
Loading…
Reference in New Issue
Block a user