diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java index f1f0c6c7..9daed76c 100644 --- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java @@ -71,6 +71,22 @@ public class DocumentSpan { return startsEnds.iterator(); } + public int length() { + if (null == startsEnds) { + return 0; + } + + int len = 0; + var iter = startsEnds.iterator(); + + while (iter.hasNext()) { + len -= iter.nextInt(); + len += iter.nextInt(); + } + + return len; + } + public int size() { return startsEnds.valueCount() / 2; } diff --git a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java index 1f802c2c..4d44b03a 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java @@ -197,13 +197,18 @@ public class IndexResultScoreCalculator { temporalBias = 0; } + final int titleLength = spans.title.length(); + float coherenceScore = 0.f; // Calculate a bonus for keyword coherences when large ones exist int largestOptional = coherences.largestOptional(); if (largestOptional >= 2) { if (largestOptional == coherences.testOptional(positions, spans.title)) { - coherenceScore = 2.0f * largestOptional; + // verbatim title match + coherenceScore = 4.0f * largestOptional; + // additional bonus if the match is most of the title's length + coherenceScore += 2.f * largestOptional / titleLength; } else if (largestOptional == coherences.testOptional(positions, spans.heading)) { coherenceScore = 1.5f * largestOptional;