(index) Add score bonuses for single-word anchor tag spans

Enhanced scoring logic to add bonuses when the query matches single-word anchor (atag) spans exactly. Implemented this by adding conditions in `IndexResultScoreCalculator.java` and creating a new method `containsRangeExact` in `DocumentSpan.java` to check for exact span matches.
This commit is contained in:
Viktor Lofgren 2024-11-25 14:44:41 +01:00
parent ff17473105
commit 0b6b5dab07
2 changed files with 49 additions and 4 deletions

View File

@ -103,13 +103,44 @@ public class DocumentSpan {
for (int pi = 0; pi < positions.size(); pi++) {
int position = positions.getInt(pi);
if (position < start) {
continue;
if (position >= start && position + len <= end) {
return true;
}
if (position + len <= end) {
if (sei + 2 < startsEnds.size()) {
start = startsEnds.getInt(sei++);
end = startsEnds.getInt(sei++);
}
else {
return false;
}
}
return false;
}
/** Returns true if for any position in the list, there exists a range
* (position[i], position[i]+len] that is overlapped by a span */
public boolean containsRangeExact(IntList positions, int len) {
if (null == startsEnds || startsEnds.size() < 2 || positions.isEmpty()) {
return false;
}
int sei = 0;
int start = startsEnds.getInt(sei++);
int end = startsEnds.getInt(sei++);
for (int pi = 0; pi < positions.size(); pi++) {
int position = positions.getInt(pi);
if (position == start && position + len == end) {
return true;
} else if (sei + 2 < startsEnds.size()) {
}
if (sei + 2 < startsEnds.size()) {
start = startsEnds.getInt(sei++);
end = startsEnds.getInt(sei++);
}

View File

@ -386,6 +386,13 @@ public class IndexResultScoreCalculator {
{
score += 4; // If the title is a single word and the same as the query, we give it a verbatim bonus
}
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
if (atagSpan.length() == fullGroup.size
&& atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
{
score += 4; // Add additional bonus if there's a single-word atag span
}
return;
}
@ -397,6 +404,13 @@ public class IndexResultScoreCalculator {
}
}
// Bonus if there's a perfect match with an atag span
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
if (atagSpan.length() == fullGroup.size && atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
{
score += 1;
}
// For optional groups, we scale the score by the size of the group relative to the full group
for (var optionalGroup : constraints.getOptionalGroups()) {
int groupSize = optionalGroup.size;