From b21f8538a83ffa523c00658fedc9da5e9be75239 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sat, 3 Aug 2024 14:41:38 +0200 Subject: [PATCH] (index) Tune ranking for verbatim matches in the title, rewarding shorter titles --- .../results/IndexResultScoreCalculator.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java index 730cd718..74895cf2 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java @@ -280,6 +280,9 @@ public class IndexResultScoreCalculator { if (positions[i] != null && ctx.regularMask.get(i)) { searchableKeywordsCount ++; + boolean titleMatch = false; + boolean headingMatch = false; + var iter = positions[i].iterator(); while (iter.hasNext()) { @@ -288,11 +291,11 @@ public class IndexResultScoreCalculator { firstPosition = Math.max(firstPosition, pos); if (spans.title.containsPosition(pos)) { - unorderedMatchInTitleCount++; + titleMatch = true; weightedCounts[i] += 2.5f; } else if (spans.heading.containsPosition(pos)) { - unorderedMatchInHeadingCount++; + headingMatch = true; weightedCounts[i] += 2.5f; } else if (spans.code.containsPosition(pos)) @@ -302,10 +305,17 @@ public class IndexResultScoreCalculator { else if (spans.nav.containsPosition(pos)) weightedCounts[i] += 0.1f; } + + if (titleMatch) { + unorderedMatchInTitleCount++; + } + if (headingMatch) { + unorderedMatchInHeadingCount++; + } } } - if (!verbatimMatchInTitle && unorderedMatchInTitleCount == searchableKeywordsCount) { + if (!verbatimMatchInTitle && searchableKeywordsCount > 2 && unorderedMatchInTitleCount == searchableKeywordsCount) { coherenceScore += 2.5f * unorderedMatchInTitleCount; coherenceScore += 2.f * unorderedMatchInTitleCount / titleLength; }