(index) Experimental ranking signals

This commit is contained in:
Viktor Lofgren 2024-08-03 10:33:41 +02:00
parent eba2844361
commit c2cedfa83c
2 changed files with 33 additions and 12 deletions

View File

@ -211,13 +211,28 @@ public class IndexResultScoreCalculator {
temporalBias = 0; temporalBias = 0;
} }
int numCoherenceAll = coherences.countOptional(positions); float coherenceScore = 0.f;
int bestCoherenceAll = coherences.testOptional(positions);
int bestCoherenceTitle = coherences.testOptional(positions, spans.title);
int bestCoherenceHeading = coherences.testOptional(positions, spans.heading);
boolean allInTitle = coherences.allOptionalInSpan(positions, spans.title); // Calculate a bonus for keyword coherences when large ones exist
boolean allInHeading = coherences.allOptionalInSpan(positions, spans.heading); int largestOptional = coherences.largestOptional();
if (largestOptional >= 2) {
int bestInTitle = coherences.testOptional(positions, spans.title);
int bestInHeading = coherences.testOptional(positions, spans.heading);
int best = coherences.testOptional(positions);
if (largestOptional == bestInTitle) {
coherenceScore = 2.0f * largestOptional;
}
else if (largestOptional == bestInHeading) {
coherenceScore = 1.5f * largestOptional;
}
else if (largestOptional == best) {
coherenceScore = 0.75f * largestOptional;
}
coherenceScore += (float) Math.pow(coherences.countOptional(positions) / (double) coherences.numOptional(), 2);
}
float[] weightedCounts = new float[compiledQuery.size()]; float[] weightedCounts = new float[compiledQuery.size()];
int firstPosition = Integer.MAX_VALUE; int firstPosition = Integer.MAX_VALUE;
@ -255,12 +270,7 @@ public class IndexResultScoreCalculator {
+ topologyBonus + topologyBonus
+ temporalBias + temporalBias
+ flagsPenalty + flagsPenalty
+ bestCoherenceAll + coherenceScore;
+ bestCoherenceTitle
+ bestCoherenceHeading
+ numCoherenceAll / 4.
+ (allInTitle ? 5.0 : 0)
+ (allInHeading ? 2.5 : 0);
double tcfAvgDist = rankingParams.tcfAvgDist * (1.0 / calculateAvgMinDistance(positionsQuery, ctx)); double tcfAvgDist = rankingParams.tcfAvgDist * (1.0 / calculateAvgMinDistance(positionsQuery, ctx));
double tcfFirstPosition = rankingParams.tcfFirstPosition * (1.0 / Math.max(1, firstPosition)); double tcfFirstPosition = rankingParams.tcfFirstPosition * (1.0 / Math.max(1, firstPosition));

View File

@ -82,6 +82,17 @@ public class TermCoherenceGroupList {
return true; return true;
} }
public int numOptional() {
return optionalGroups.size();
}
public int largestOptional() {
int best = 0;
for (var coherenceSet : optionalGroups) {
best = Math.max(coherenceSet.size, best);
}
return best;
}
public static final class TermCoherenceGroup { public static final class TermCoherenceGroup {
private final int[] offsets; private final int[] offsets;