mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 04:58:59 +00:00
(index) Add score bonuses for single-word anchor tag spans
Enhanced scoring logic to add bonuses when the query matches single-word anchor (atag) spans exactly. Implemented this by adding conditions in `IndexResultScoreCalculator.java` and creating a new method `containsRangeExact` in `DocumentSpan.java` to check for exact span matches.
This commit is contained in:
parent
ff17473105
commit
0b6b5dab07
@ -103,13 +103,44 @@ public class DocumentSpan {
|
||||
|
||||
for (int pi = 0; pi < positions.size(); pi++) {
|
||||
int position = positions.getInt(pi);
|
||||
if (position < start) {
|
||||
continue;
|
||||
if (position >= start && position + len <= end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (position + len <= end) {
|
||||
if (sei + 2 < startsEnds.size()) {
|
||||
start = startsEnds.getInt(sei++);
|
||||
end = startsEnds.getInt(sei++);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns true if for any position in the list, there exists a range
|
||||
* (position[i], position[i]+len] that is overlapped by a span */
|
||||
public boolean containsRangeExact(IntList positions, int len) {
|
||||
if (null == startsEnds || startsEnds.size() < 2 || positions.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int sei = 0;
|
||||
|
||||
|
||||
int start = startsEnds.getInt(sei++);
|
||||
int end = startsEnds.getInt(sei++);
|
||||
|
||||
for (int pi = 0; pi < positions.size(); pi++) {
|
||||
int position = positions.getInt(pi);
|
||||
|
||||
if (position == start && position + len == end) {
|
||||
return true;
|
||||
} else if (sei + 2 < startsEnds.size()) {
|
||||
}
|
||||
|
||||
if (sei + 2 < startsEnds.size()) {
|
||||
start = startsEnds.getInt(sei++);
|
||||
end = startsEnds.getInt(sei++);
|
||||
}
|
||||
|
@ -386,6 +386,13 @@ public class IndexResultScoreCalculator {
|
||||
{
|
||||
score += 4; // If the title is a single word and the same as the query, we give it a verbatim bonus
|
||||
}
|
||||
|
||||
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
|
||||
if (atagSpan.length() == fullGroup.size
|
||||
&& atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
|
||||
{
|
||||
score += 4; // Add additional bonus if there's a single-word atag span
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -397,6 +404,13 @@ public class IndexResultScoreCalculator {
|
||||
}
|
||||
}
|
||||
|
||||
// Bonus if there's a perfect match with an atag span
|
||||
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
|
||||
if (atagSpan.length() == fullGroup.size && atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
|
||||
{
|
||||
score += 1;
|
||||
}
|
||||
|
||||
// For optional groups, we scale the score by the size of the group relative to the full group
|
||||
for (var optionalGroup : constraints.getOptionalGroups()) {
|
||||
int groupSize = optionalGroup.size;
|
||||
|
Loading…
Reference in New Issue
Block a user