From f12c6fd57e5ce4f5077c0356954e530a69346cc5 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 20 Apr 2023 16:00:59 +0200 Subject: [PATCH] Add a ranking parameter for biasing toward recent or old content. --- .../results/ResultRankingParameters.java | 9 ++++++++ .../nu/marginalia/ranking/ResultValuator.java | 21 ++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java index b76d11d8..ff28c5d5 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java @@ -33,6 +33,9 @@ public class ResultRankingParameters { public double bm25PrioWeight; public double tcfWeight; + public TemporalBias temporalBias; + public double temporalBiasWeight; + public static ResultRankingParameters sensibleDefaults() { return builder() .fullParams(new Bm25Parameters(1.2, 0.5)) @@ -46,6 +49,12 @@ public class ResultRankingParameters { .bm25FullWeight(1.) .bm25PrioWeight(1.) .tcfWeight(2.) + .temporalBias(TemporalBias.NONE) + .temporalBiasWeight(1. / (10.)) .build(); } + + public enum TemporalBias { + RECENT, OLD, NONE + }; } diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java index 73aa2c0d..69a379fb 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java @@ -1,7 +1,9 @@ package nu.marginalia.ranking; import nu.marginalia.index.client.model.results.ResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.model.crawl.PubDate; import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.ranking.factors.*; @@ -52,21 +54,30 @@ public class ResultValuator { int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata); int quality = DocumentMetadata.decodeQuality(documentMetadata); int topology = DocumentMetadata.decodeTopology(documentMetadata); + int year = DocumentMetadata.decodeYear(documentMetadata); double averageSentenceLengthPenalty = (asl >= rankingParams.shortSentenceThreshold ? 0 : -rankingParams.shortSentencePenalty); - double qualityPenalty = -quality * rankingParams.qualityPenalty; - double rankingBonus = (255. - rank) * rankingParams.domainRankBonus; - double topologyBonus = Math.log(1 + topology); - double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty; - + final double qualityPenalty = -quality * rankingParams.qualityPenalty; + final double rankingBonus = (255. - rank) * rankingParams.domainRankBonus; + final double topologyBonus = Math.log(1 + topology); + final double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty; + final double temporalBias; + if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.RECENT) { + temporalBias = - Math.abs(year - PubDate.MAX_YEAR) * rankingParams.temporalBiasWeight; + } else if (rankingParams.temporalBias == ResultRankingParameters.TemporalBias.OLD) { + temporalBias = - Math.abs(year - PubDate.MIN_YEAR) * rankingParams.temporalBiasWeight; + } else { + temporalBias = 0; + } double overallPart = averageSentenceLengthPenalty + documentLengthPenalty + qualityPenalty + rankingBonus + topologyBonus + + temporalBias + priorityTermBonus.calculate(scores); for (int set = 0; set <= sets; set++) {