mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Add score bonuses for single-word anchor tag spans
Enhanced scoring logic to add bonuses when the query matches single-word anchor (atag) spans exactly. Implemented this by adding conditions in `IndexResultScoreCalculator.java` and creating a new method `containsRangeExact` in `DocumentSpan.java` to check for exact span matches.
This commit is contained in:
parent
ff17473105
commit
0b6b5dab07
@ -103,13 +103,44 @@ public class DocumentSpan {
|
|||||||
|
|
||||||
for (int pi = 0; pi < positions.size(); pi++) {
|
for (int pi = 0; pi < positions.size(); pi++) {
|
||||||
int position = positions.getInt(pi);
|
int position = positions.getInt(pi);
|
||||||
if (position < start) {
|
if (position >= start && position + len <= end) {
|
||||||
continue;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (position + len <= end) {
|
if (sei + 2 < startsEnds.size()) {
|
||||||
|
start = startsEnds.getInt(sei++);
|
||||||
|
end = startsEnds.getInt(sei++);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if for any position in the list, there exists a range
|
||||||
|
* (position[i], position[i]+len] that is overlapped by a span */
|
||||||
|
public boolean containsRangeExact(IntList positions, int len) {
|
||||||
|
if (null == startsEnds || startsEnds.size() < 2 || positions.isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sei = 0;
|
||||||
|
|
||||||
|
|
||||||
|
int start = startsEnds.getInt(sei++);
|
||||||
|
int end = startsEnds.getInt(sei++);
|
||||||
|
|
||||||
|
for (int pi = 0; pi < positions.size(); pi++) {
|
||||||
|
int position = positions.getInt(pi);
|
||||||
|
|
||||||
|
if (position == start && position + len == end) {
|
||||||
return true;
|
return true;
|
||||||
} else if (sei + 2 < startsEnds.size()) {
|
}
|
||||||
|
|
||||||
|
if (sei + 2 < startsEnds.size()) {
|
||||||
start = startsEnds.getInt(sei++);
|
start = startsEnds.getInt(sei++);
|
||||||
end = startsEnds.getInt(sei++);
|
end = startsEnds.getInt(sei++);
|
||||||
}
|
}
|
||||||
|
@ -386,6 +386,13 @@ public class IndexResultScoreCalculator {
|
|||||||
{
|
{
|
||||||
score += 4; // If the title is a single word and the same as the query, we give it a verbatim bonus
|
score += 4; // If the title is a single word and the same as the query, we give it a verbatim bonus
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
|
||||||
|
if (atagSpan.length() == fullGroup.size
|
||||||
|
&& atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
|
||||||
|
{
|
||||||
|
score += 4; // Add additional bonus if there's a single-word atag span
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,6 +404,13 @@ public class IndexResultScoreCalculator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bonus if there's a perfect match with an atag span
|
||||||
|
var atagSpan = spans.getSpan(HtmlTag.ANCHOR);
|
||||||
|
if (atagSpan.length() == fullGroup.size && atagSpan.containsRangeExact(fullGroupIntersections, fullGroup.size))
|
||||||
|
{
|
||||||
|
score += 1;
|
||||||
|
}
|
||||||
|
|
||||||
// For optional groups, we scale the score by the size of the group relative to the full group
|
// For optional groups, we scale the score by the size of the group relative to the full group
|
||||||
for (var optionalGroup : constraints.getOptionalGroups()) {
|
for (var optionalGroup : constraints.getOptionalGroups()) {
|
||||||
int groupSize = optionalGroup.size;
|
int groupSize = optionalGroup.size;
|
||||||
|
Loading…
Reference in New Issue
Block a user