mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Result selection algorithm tweaks
This commit is contained in:
parent
69ccf143ac
commit
085d985e61
@ -131,7 +131,8 @@ public class IndexResultValuator {
|
||||
}
|
||||
|
||||
private double calculateTermCoherencePenalty(int urlId, TObjectIntHashMap<String> termToId, List<String> termList) {
|
||||
long maskDirect = ~0;
|
||||
long maskDirectGenerous = ~0;
|
||||
long maskDirectRaw = ~0;
|
||||
long maskAdjacent = ~0;
|
||||
|
||||
final int flagBitMask = EdgePageWordFlags.Title.asBit()
|
||||
@ -148,21 +149,28 @@ public class IndexResultValuator {
|
||||
|
||||
positions = EdgePageWordMetadata.decodePositions(meta);
|
||||
|
||||
if (!EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
|
||||
maskDirect &= positions;
|
||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||
maskDirectRaw &= positions;
|
||||
|
||||
if (positions == 0 && !EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
|
||||
maskDirectGenerous &= positions;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (maskAdjacent == 0) {
|
||||
return 40;
|
||||
}
|
||||
|
||||
if (maskDirect == 0) {
|
||||
if (maskDirectGenerous == 0) {
|
||||
return 20;
|
||||
}
|
||||
|
||||
return Long.numberOfTrailingZeros(maskDirect)/5. - Long.bitCount(maskDirect);
|
||||
if (maskDirectRaw == 0) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
|
||||
}
|
||||
|
||||
|
||||
|
@ -86,7 +86,7 @@ public class SearchResultValuator {
|
||||
|
||||
}
|
||||
|
||||
return bestScore * (0.3 + 0.7 * bestAllTermsFactor) * priorityTermBonus;
|
||||
return bestScore * (0.1 + 0.9 * bestAllTermsFactor) * priorityTermBonus;
|
||||
}
|
||||
|
||||
private boolean hasPriorityTerm(List<EdgeSearchResultKeywordScore> rawScores) {
|
||||
@ -153,6 +153,7 @@ public class SearchResultValuator {
|
||||
private double calculateTermCoherencePenalty(SearchResultsKeywordSet keywordSet, double f) {
|
||||
long maskDirect = ~0;
|
||||
long maskAdjacent = ~0;
|
||||
|
||||
byte excludeMask = (byte) (EdgePageWordFlags.Title.asBit() | EdgePageWordFlags.Subjects.asBit() | EdgePageWordFlags.Synthetic.asBit());
|
||||
|
||||
for (var keyword : keywordSet.keywords) {
|
||||
@ -163,28 +164,28 @@ public class SearchResultValuator {
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
positions = meta.positions();
|
||||
|
||||
if (!EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
|
||||
{
|
||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||
if (positions != 0 && !EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
|
||||
{
|
||||
maskDirect &= positions;
|
||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (maskAdjacent == 0) {
|
||||
return 1.2 * f;
|
||||
return 2 * f;
|
||||
}
|
||||
|
||||
if (maskDirect == 0) {
|
||||
return 1.1 * f;
|
||||
return 1.25 * f;
|
||||
}
|
||||
|
||||
|
||||
if (maskDirect != ~0L) {
|
||||
double locationFactor = 0.65 + Math.max(0.,
|
||||
0.35 * Long.numberOfTrailingZeros(maskDirect) / 16.
|
||||
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 5.
|
||||
double locationFactor = 0.5 + Math.max(0.,
|
||||
0.5 * Long.numberOfTrailingZeros(maskDirect) / 16.
|
||||
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 3.
|
||||
);
|
||||
|
||||
return f * locationFactor;
|
||||
@ -237,16 +238,6 @@ public class SearchResultValuator {
|
||||
return f;
|
||||
}
|
||||
|
||||
private double getLengthPenalty(int length) {
|
||||
if (length < MIN_LENGTH) {
|
||||
length = MIN_LENGTH;
|
||||
}
|
||||
if (length > AVG_LENGTH) {
|
||||
length = AVG_LENGTH;
|
||||
}
|
||||
return (0.5 + 0.5 * length / AVG_LENGTH);
|
||||
}
|
||||
|
||||
private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) {
|
||||
double[] weights = new double[scores.length];
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
|
||||
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option>
|
||||
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option>
|
||||
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Vintage</option>
|
||||
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Web 1.0</option>
|
||||
<option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option>
|
||||
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">No Domain Ranking</option>
|
||||
</optgroup>
|
||||
|
Loading…
Reference in New Issue
Block a user