From 1f3b89cf28892cc280c12cbfcbe541d8d07fb0a5 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 3 Jan 2024 15:20:18 +0100 Subject: [PATCH] (index) Reduce the value of site and site-adjacent in BM25P calculations --- .../ranking/factors/Bm25Factor.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java index 13c99ecc..a11281db 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java +++ b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java @@ -53,9 +53,11 @@ public class Bm25Factor { } private static double evaluatePriorityScore(SearchResultKeywordScore keyword) { + int pcount = keyword.positionCount(); + double qcount = 0.; if ((keyword.encodedWordMetadata() & WordFlags.Site.asBit()) != 0) - qcount += 2.; + qcount += 0.5; if ((keyword.encodedWordMetadata() & WordFlags.SiteAdjacent.asBit()) != 0) qcount += 0.5; if ((keyword.encodedWordMetadata() & WordFlags.UrlPath.asBit()) != 0) @@ -66,12 +68,16 @@ public class Bm25Factor { qcount += 2.5; if ((keyword.encodedWordMetadata() & WordFlags.Title.asBit()) != 0) qcount += 1.5; - if ((keyword.encodedWordMetadata() & WordFlags.Subjects.asBit()) != 0) - qcount += 1.25; - if ((keyword.encodedWordMetadata() & WordFlags.NamesWords.asBit()) != 0) - qcount += 0.25; - if ((keyword.encodedWordMetadata() & WordFlags.TfIdfHigh.asBit()) != 0) - qcount += 0.5; + + if (pcount > 2) { + if ((keyword.encodedWordMetadata() & WordFlags.Subjects.asBit()) != 0) + qcount += 1.25; + if ((keyword.encodedWordMetadata() & WordFlags.NamesWords.asBit()) != 0) + qcount += 0.25; + if ((keyword.encodedWordMetadata() & WordFlags.TfIdfHigh.asBit()) != 0) + qcount += 0.5; + } + return qcount; }