mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Result selection algorithm tweaks
This commit is contained in:
parent
69ccf143ac
commit
085d985e61
@ -131,7 +131,8 @@ public class IndexResultValuator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private double calculateTermCoherencePenalty(int urlId, TObjectIntHashMap<String> termToId, List<String> termList) {
|
private double calculateTermCoherencePenalty(int urlId, TObjectIntHashMap<String> termToId, List<String> termList) {
|
||||||
long maskDirect = ~0;
|
long maskDirectGenerous = ~0;
|
||||||
|
long maskDirectRaw = ~0;
|
||||||
long maskAdjacent = ~0;
|
long maskAdjacent = ~0;
|
||||||
|
|
||||||
final int flagBitMask = EdgePageWordFlags.Title.asBit()
|
final int flagBitMask = EdgePageWordFlags.Title.asBit()
|
||||||
@ -148,21 +149,28 @@ public class IndexResultValuator {
|
|||||||
|
|
||||||
positions = EdgePageWordMetadata.decodePositions(meta);
|
positions = EdgePageWordMetadata.decodePositions(meta);
|
||||||
|
|
||||||
if (!EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
|
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||||
maskDirect &= positions;
|
maskDirectRaw &= positions;
|
||||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
|
||||||
|
if (positions == 0 && !EdgePageWordMetadata.hasAnyFlags(meta, flagBitMask)) {
|
||||||
|
maskDirectGenerous &= positions;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskAdjacent == 0) {
|
if (maskAdjacent == 0) {
|
||||||
return 40;
|
return 40;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskDirect == 0) {
|
if (maskDirectGenerous == 0) {
|
||||||
return 20;
|
return 20;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Long.numberOfTrailingZeros(maskDirect)/5. - Long.bitCount(maskDirect);
|
if (maskDirectRaw == 0) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ public class SearchResultValuator {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return bestScore * (0.3 + 0.7 * bestAllTermsFactor) * priorityTermBonus;
|
return bestScore * (0.1 + 0.9 * bestAllTermsFactor) * priorityTermBonus;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean hasPriorityTerm(List<EdgeSearchResultKeywordScore> rawScores) {
|
private boolean hasPriorityTerm(List<EdgeSearchResultKeywordScore> rawScores) {
|
||||||
@ -153,6 +153,7 @@ public class SearchResultValuator {
|
|||||||
private double calculateTermCoherencePenalty(SearchResultsKeywordSet keywordSet, double f) {
|
private double calculateTermCoherencePenalty(SearchResultsKeywordSet keywordSet, double f) {
|
||||||
long maskDirect = ~0;
|
long maskDirect = ~0;
|
||||||
long maskAdjacent = ~0;
|
long maskAdjacent = ~0;
|
||||||
|
|
||||||
byte excludeMask = (byte) (EdgePageWordFlags.Title.asBit() | EdgePageWordFlags.Subjects.asBit() | EdgePageWordFlags.Synthetic.asBit());
|
byte excludeMask = (byte) (EdgePageWordFlags.Title.asBit() | EdgePageWordFlags.Subjects.asBit() | EdgePageWordFlags.Synthetic.asBit());
|
||||||
|
|
||||||
for (var keyword : keywordSet.keywords) {
|
for (var keyword : keywordSet.keywords) {
|
||||||
@ -163,28 +164,28 @@ public class SearchResultValuator {
|
|||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
positions = meta.positions();
|
positions = meta.positions();
|
||||||
|
|
||||||
if (!EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
|
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
||||||
{
|
if (positions != 0 && !EdgePageWordMetadata.hasAnyFlags(meta.flags(), excludeMask))
|
||||||
|
{
|
||||||
maskDirect &= positions;
|
maskDirect &= positions;
|
||||||
maskAdjacent &= (positions | (positions << 1) | (positions >>> 1));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskAdjacent == 0) {
|
if (maskAdjacent == 0) {
|
||||||
return 1.2 * f;
|
return 2 * f;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskDirect == 0) {
|
if (maskDirect == 0) {
|
||||||
return 1.1 * f;
|
return 1.25 * f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (maskDirect != ~0L) {
|
if (maskDirect != ~0L) {
|
||||||
double locationFactor = 0.65 + Math.max(0.,
|
double locationFactor = 0.5 + Math.max(0.,
|
||||||
0.35 * Long.numberOfTrailingZeros(maskDirect) / 16.
|
0.5 * Long.numberOfTrailingZeros(maskDirect) / 16.
|
||||||
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 5.
|
- Math.sqrt(Long.bitCount(maskDirect) - 1) / 3.
|
||||||
);
|
);
|
||||||
|
|
||||||
return f * locationFactor;
|
return f * locationFactor;
|
||||||
@ -237,16 +238,6 @@ public class SearchResultValuator {
|
|||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getLengthPenalty(int length) {
|
|
||||||
if (length < MIN_LENGTH) {
|
|
||||||
length = MIN_LENGTH;
|
|
||||||
}
|
|
||||||
if (length > AVG_LENGTH) {
|
|
||||||
length = AVG_LENGTH;
|
|
||||||
}
|
|
||||||
return (0.5 + 0.5 * length / AVG_LENGTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) {
|
private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) {
|
||||||
double[] weights = new double[scores.length];
|
double[] weights = new double[scores.length];
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
|
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
|
||||||
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option>
|
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option>
|
||||||
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option>
|
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option>
|
||||||
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Vintage</option>
|
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Web 1.0</option>
|
||||||
<option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option>
|
<option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option>
|
||||||
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">No Domain Ranking</option>
|
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">No Domain Ranking</option>
|
||||||
</optgroup>
|
</optgroup>
|
||||||
|
Loading…
Reference in New Issue
Block a user