Add a ranking parameter for biasing toward recent or old content.

This commit is contained in:
Viktor Lofgren 2023-04-20 16:00:59 +02:00
parent 96bac70b85
commit f12c6fd57e
2 changed files with 25 additions and 5 deletions

View File

@ -33,6 +33,9 @@ public class ResultRankingParameters {
public double bm25PrioWeight;
public double tcfWeight;
public TemporalBias temporalBias;
public double temporalBiasWeight;
public static ResultRankingParameters sensibleDefaults() {
return builder()
.fullParams(new Bm25Parameters(1.2, 0.5))
@ -46,6 +49,12 @@ public class ResultRankingParameters {
.bm25FullWeight(1.)
.bm25PrioWeight(1.)
.tcfWeight(2.)
.temporalBias(TemporalBias.NONE)
.temporalBiasWeight(1. / (10.))
.build();
}
public enum TemporalBias {
RECENT, OLD, NONE
};
}

View File

@ -1,7 +1,9 @@
package nu.marginalia.ranking;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.model.crawl.PubDate;
import nu.marginalia.model.idx.DocumentMetadata;
import nu.marginalia.ranking.factors.*;
@ -52,21 +54,30 @@ public class ResultValuator {
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
int quality = DocumentMetadata.decodeQuality(documentMetadata);
int topology = DocumentMetadata.decodeTopology(documentMetadata);
int year = DocumentMetadata.decodeYear(documentMetadata);
double averageSentenceLengthPenalty = (asl >= rankingParams.shortSentenceThreshold ? 0 : -rankingParams.shortSentencePenalty);
double qualityPenalty = -quality * rankingParams.qualityPenalty;
double rankingBonus = (255. - rank) * rankingParams.domainRankBonus;
double topologyBonus = Math.log(1 + topology);
double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty;
final double qualityPenalty = -quality * rankingParams.qualityPenalty;
final double rankingBonus = (255. - rank) * rankingParams.domainRankBonus;
final double topologyBonus = Math.log(1 + topology);
final double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty;
final double temporalBias;
if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.RECENT) {
temporalBias = - Math.abs(year - PubDate.MAX_YEAR) * rankingParams.temporalBiasWeight;
} else if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.OLD) {
temporalBias = - Math.abs(year - PubDate.MIN_YEAR) * rankingParams.temporalBiasWeight;
} else {
temporalBias = 0;
}
double overallPart = averageSentenceLengthPenalty
+ documentLengthPenalty
+ qualityPenalty
+ rankingBonus
+ topologyBonus
+ temporalBias
+ priorityTermBonus.calculate(scores);
for (int set = 0; set <= sets; set++) {