mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Fix for valuation bug in index code that wouldn't sort bad-ish items properly.
This commit is contained in:
parent
f3babde415
commit
1252f95da5
@ -5,6 +5,7 @@ import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ToString
|
||||
@Getter
|
||||
@ -38,4 +39,16 @@ public class EdgeSearchSubquery {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (!searchTermsInclude.isEmpty()) sb.append("include=").append(searchTermsInclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||
if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||
if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||
if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
package nu.marginalia.index.query.limit;
|
||||
|
||||
import lombok.ToString;
|
||||
|
||||
public record SpecificationLimit(SpecificationLimitType type, int value) {
|
||||
public static SpecificationLimit none() {
|
||||
return new SpecificationLimit(SpecificationLimitType.NONE, 0);
|
||||
@ -28,4 +30,12 @@ public record SpecificationLimit(SpecificationLimitType type, int value) {
|
||||
return parameter <= value;
|
||||
throw new AssertionError("Unknown type " + type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (type == SpecificationLimitType.NONE)
|
||||
return type.toString();
|
||||
|
||||
else return "%s:%d".formatted(type, value);
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ public class IndexResultValuator {
|
||||
|
||||
long docMetadata = metadataService.getDocumentMetadata(urlIdInt);
|
||||
|
||||
double bestScore = 0;
|
||||
double bestScore = 1000;
|
||||
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) {
|
||||
bestScore = Math.min(bestScore,
|
||||
evaluateSubquery(searchResult,
|
||||
@ -127,7 +127,7 @@ public class IndexResultValuator {
|
||||
setScore += score.termValue();
|
||||
|
||||
if (!filterRequired(metadata, queryParams.queryStrategy())) {
|
||||
setScore += 1000;
|
||||
return 1000;
|
||||
}
|
||||
|
||||
if (termIdx == 0) {
|
||||
@ -191,15 +191,15 @@ public class IndexResultValuator {
|
||||
double avgTfIdf = termCount / tfIdfSum;
|
||||
|
||||
if (maskAdjacent == 0) {
|
||||
return Math.max(-2, 40 - 0.5 * avgTfIdf);
|
||||
return Math.min(5, Math.max(-2, 40 - 0.5 * avgTfIdf));
|
||||
}
|
||||
|
||||
if (maskDirectGenerous == 0) {
|
||||
return Math.max(-1, 20 - 0.3 * avgTfIdf);
|
||||
return Math.min(5, Math.max(-1, 20 - 0.3 * avgTfIdf));
|
||||
}
|
||||
|
||||
if (maskDirectRaw == 0) {
|
||||
return Math.max(-1, 15 - 0.2 * avgTfIdf);
|
||||
return Math.min(5, Math.max(-1, 15 - 0.2 * avgTfIdf));
|
||||
}
|
||||
|
||||
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
|
||||
|
@ -126,16 +126,21 @@ public class IndexQueryService {
|
||||
private TLongList evaluateSubqueries(SearchParameters params) {
|
||||
final TLongList results = new TLongArrayList(params.fetchSize);
|
||||
|
||||
logger.info(queryMarker, "{}", params.queryParams);
|
||||
for (var sq : params.subqueries) {
|
||||
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(sq);
|
||||
|
||||
|
||||
|
||||
if (searchTerms.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.addAll(
|
||||
executeSubquery(searchTerms, params)
|
||||
);
|
||||
var resultsForSq = executeSubquery(searchTerms, params);
|
||||
|
||||
logger.info(queryMarker, "{} from {}", resultsForSq.size(), sq);
|
||||
|
||||
results.addAll(resultsForSq);
|
||||
|
||||
if (!params.hasTimeLeft()) {
|
||||
logger.info("Query timed out {}, ({}), -{}",
|
||||
@ -188,11 +193,15 @@ public class IndexQueryService {
|
||||
results.forEach(id -> {
|
||||
var item = evaluator.evaluateResult(id);
|
||||
|
||||
items.add(item);
|
||||
if (item.getScore() < 100) {
|
||||
items.add(item);
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
logger.info(queryMarker, "After filtering: {} -> {}", results.size(), items.size());
|
||||
|
||||
|
||||
return items;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user