(index) Tune ranking for verbatim matches in the title, rewarding shorter titles

This commit is contained in:
Viktor Lofgren 2024-08-03 14:07:02 +02:00
parent e48f52faba
commit ec5a17ad13
2 changed files with 22 additions and 1 deletions

View File

@ -71,6 +71,22 @@ public class DocumentSpan {
return startsEnds.iterator(); return startsEnds.iterator();
} }
public int length() {
if (null == startsEnds) {
return 0;
}
int len = 0;
var iter = startsEnds.iterator();
while (iter.hasNext()) {
len -= iter.nextInt();
len += iter.nextInt();
}
return len;
}
public int size() { public int size() {
return startsEnds.valueCount() / 2; return startsEnds.valueCount() / 2;
} }

View File

@ -197,13 +197,18 @@ public class IndexResultScoreCalculator {
temporalBias = 0; temporalBias = 0;
} }
final int titleLength = spans.title.length();
float coherenceScore = 0.f; float coherenceScore = 0.f;
// Calculate a bonus for keyword coherences when large ones exist // Calculate a bonus for keyword coherences when large ones exist
int largestOptional = coherences.largestOptional(); int largestOptional = coherences.largestOptional();
if (largestOptional >= 2) { if (largestOptional >= 2) {
if (largestOptional == coherences.testOptional(positions, spans.title)) { if (largestOptional == coherences.testOptional(positions, spans.title)) {
coherenceScore = 2.0f * largestOptional; // verbatim title match
coherenceScore = 4.0f * largestOptional;
// additional bonus if the match is most of the title's length
coherenceScore += 2.f * largestOptional / titleLength;
} }
else if (largestOptional == coherences.testOptional(positions, spans.heading)) { else if (largestOptional == coherences.testOptional(positions, spans.heading)) {
coherenceScore = 1.5f * largestOptional; coherenceScore = 1.5f * largestOptional;