Result selection algorithm tweaks

This commit is contained in:
Viktor Lofgren 2023-01-11 17:19:57 +01:00
parent 69ccf143ac
commit 085d985e61
3 changed files with 26 additions and 27 deletions

View File

@ -131,7 +131,8 @@ public class IndexResultValuator {
}
private double calculateTermCoherencePenalty(int urlId, TObjectIntHashMap<String> termToId, List<String> termList) {
long maskDirect = ~0;
long maskDirectGenerous = ~0;
long maskDirectRaw = ~0;
long maskAdjacent = ~0;
final int flagBitMask = EdgePageWordFlags.Title.asBit()
@ -148,21 +149,28 @@ public class IndexResultValuator {
positions = EdgePageWordMetadata.decodePositions(meta);
if (!EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
maskDirect &= positions;
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
maskDirectRaw &= positions;
if (positions == 0 && !EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
maskDirectGenerous &= positions;
}
}
if (maskAdjacent == 0) {
return 40;
}
if (maskDirect == 0) {
if (maskDirectGenerous == 0) {
return 20;
}
return Long.numberOfTrailingZeros(maskDirect)/5. - Long.bitCount(maskDirect);
if (maskDirectRaw == 0) {
return 2;
}
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
}

View File

@ -86,7 +86,7 @@ public class SearchResultValuator {
}
return bestScore * (0.3 + 0.7 * bestAllTermsFactor) * priorityTermBonus;
return bestScore * (0.1 + 0.9 * bestAllTermsFactor) * priorityTermBonus;
}
private boolean hasPriorityTerm(List<EdgeSearchResultKeywordScore> rawScores) {
@ -153,6 +153,7 @@ public class SearchResultValuator {
private double calculateTermCoherencePenalty(SearchResultsKeywordSet keywordSet, double f) {
long maskDirect = ~0;
long maskAdjacent = ~0;
byte excludeMask = (byte) (EdgePageWordFlags.Title.asBit() | EdgePageWordFlags.Subjects.asBit() | EdgePageWordFlags.Synthetic.asBit());
for (var keyword : keywordSet.keywords) {
@ -163,28 +164,28 @@ public class SearchResultValuator {
return f;
}
positions = meta.positions();
if (!EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
{
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
if (positions != 0 && !EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
{
maskDirect &= positions;
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
}
}
if (maskAdjacent == 0) {
return 1.2 * f;
return 2 * f;
}
if (maskDirect == 0) {
return 1.1 * f;
return 1.25 * f;
}
if (maskDirect != ~0L) {
double locationFactor = 0.65 + Math.max(0.,
0.35 * Long.numberOfTrailingZeros(maskDirect) / 16.
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 5.
double locationFactor = 0.5 + Math.max(0.,
0.5 * Long.numberOfTrailingZeros(maskDirect) / 16.
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 3.
);
return f * locationFactor;
@ -237,16 +238,6 @@ public class SearchResultValuator {
return f;
}
private double getLengthPenalty(int length) {
if (length < MIN_LENGTH) {
length = MIN_LENGTH;
}
if (length > AVG_LENGTH) {
length = AVG_LENGTH;
}
return (0.5 + 0.5 * length / AVG_LENGTH);
}
private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) {
double[] weights = new double[scores.length];

View File

@ -11,7 +11,7 @@
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option>
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option>
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Vintage</option>
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Web 1.0</option>
<option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option>
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">No Domain Ranking</option>
</optgroup>