mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Add a ranking parameter for biasing toward recent or old content.
This commit is contained in:
parent
96bac70b85
commit
f12c6fd57e
@ -33,6 +33,9 @@ public class ResultRankingParameters {
|
||||
public double bm25PrioWeight;
|
||||
public double tcfWeight;
|
||||
|
||||
public TemporalBias temporalBias;
|
||||
public double temporalBiasWeight;
|
||||
|
||||
public static ResultRankingParameters sensibleDefaults() {
|
||||
return builder()
|
||||
.fullParams(new Bm25Parameters(1.2, 0.5))
|
||||
@ -46,6 +49,12 @@ public class ResultRankingParameters {
|
||||
.bm25FullWeight(1.)
|
||||
.bm25PrioWeight(1.)
|
||||
.tcfWeight(2.)
|
||||
.temporalBias(TemporalBias.NONE)
|
||||
.temporalBiasWeight(1. / (10.))
|
||||
.build();
|
||||
}
|
||||
|
||||
public enum TemporalBias {
|
||||
RECENT, OLD, NONE
|
||||
};
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
package nu.marginalia.ranking;
|
||||
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginalia.ranking.factors.*;
|
||||
|
||||
@ -52,21 +54,30 @@ public class ResultValuator {
|
||||
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
|
||||
int quality = DocumentMetadata.decodeQuality(documentMetadata);
|
||||
int topology = DocumentMetadata.decodeTopology(documentMetadata);
|
||||
int year = DocumentMetadata.decodeYear(documentMetadata);
|
||||
|
||||
double averageSentenceLengthPenalty = (asl >= rankingParams.shortSentenceThreshold ? 0 : -rankingParams.shortSentencePenalty);
|
||||
|
||||
double qualityPenalty = -quality * rankingParams.qualityPenalty;
|
||||
double rankingBonus = (255. - rank) * rankingParams.domainRankBonus;
|
||||
double topologyBonus = Math.log(1 + topology);
|
||||
double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty;
|
||||
|
||||
final double qualityPenalty = -quality * rankingParams.qualityPenalty;
|
||||
final double rankingBonus = (255. - rank) * rankingParams.domainRankBonus;
|
||||
final double topologyBonus = Math.log(1 + topology);
|
||||
final double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty;
|
||||
final double temporalBias;
|
||||
|
||||
if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.RECENT) {
|
||||
temporalBias = - Math.abs(year - PubDate.MAX_YEAR) * rankingParams.temporalBiasWeight;
|
||||
} else if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.OLD) {
|
||||
temporalBias = - Math.abs(year - PubDate.MIN_YEAR) * rankingParams.temporalBiasWeight;
|
||||
} else {
|
||||
temporalBias = 0;
|
||||
}
|
||||
|
||||
double overallPart = averageSentenceLengthPenalty
|
||||
+ documentLengthPenalty
|
||||
+ qualityPenalty
|
||||
+ rankingBonus
|
||||
+ topologyBonus
|
||||
+ temporalBias
|
||||
+ priorityTermBonus.calculate(scores);
|
||||
|
||||
for (int set = 0; set <= sets; set++) {
|
||||
|
Loading…
Reference in New Issue
Block a user