Fix for valuation bug in index code that wouldn't sort bad-ish items properly.

This commit is contained in:
Viktor Lofgren 2023-03-07 21:26:04 +01:00
parent f3babde415
commit 1252f95da5
4 changed files with 41 additions and 9 deletions

View File

@ -5,6 +5,7 @@ import lombok.Getter;
import lombok.ToString;
import java.util.List;
import java.util.stream.Collectors;
@ToString
@Getter
@ -38,4 +39,16 @@ public class EdgeSearchSubquery {
return this;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (!searchTermsInclude.isEmpty()) sb.append("include=").append(searchTermsInclude.stream().collect(Collectors.joining(",", "[", "] ")));
if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] ")));
if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] ")));
if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] ")));
return sb.toString();
}
}

View File

@ -1,5 +1,7 @@
package nu.marginalia.index.query.limit;
import lombok.ToString;
public record SpecificationLimit(SpecificationLimitType type, int value) {
public static SpecificationLimit none() {
return new SpecificationLimit(SpecificationLimitType.NONE, 0);
@ -28,4 +30,12 @@ public record SpecificationLimit(SpecificationLimitType type, int value) {
return parameter <= value;
throw new AssertionError("Unknown type " + type);
}
@Override
public String toString() {
if (type == SpecificationLimitType.NONE)
return type.toString();
else return "%s:%d".formatted(type, value);
}
}

View File

@ -80,7 +80,7 @@ public class IndexResultValuator {
long docMetadata = metadataService.getDocumentMetadata(urlIdInt);
double bestScore = 0;
double bestScore = 1000;
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) {
bestScore = Math.min(bestScore,
evaluateSubquery(searchResult,
@ -127,7 +127,7 @@ public class IndexResultValuator {
setScore += score.termValue();
if (!filterRequired(metadata, queryParams.queryStrategy())) {
setScore += 1000;
return 1000;
}
if (termIdx == 0) {
@ -191,15 +191,15 @@ public class IndexResultValuator {
double avgTfIdf = termCount / tfIdfSum;
if (maskAdjacent == 0) {
return Math.max(-2, 40 - 0.5 * avgTfIdf);
return Math.min(5, Math.max(-2, 40 - 0.5 * avgTfIdf));
}
if (maskDirectGenerous == 0) {
return Math.max(-1, 20 - 0.3 * avgTfIdf);
return Math.min(5, Math.max(-1, 20 - 0.3 * avgTfIdf));
}
if (maskDirectRaw == 0) {
return Math.max(-1, 15 - 0.2 * avgTfIdf);
return Math.min(5, Math.max(-1, 15 - 0.2 * avgTfIdf));
}
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);

View File

@ -126,16 +126,21 @@ public class IndexQueryService {
private TLongList evaluateSubqueries(SearchParameters params) {
final TLongList results = new TLongArrayList(params.fetchSize);
logger.info(queryMarker, "{}", params.queryParams);
for (var sq : params.subqueries) {
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(sq);
if (searchTerms.isEmpty()) {
continue;
}
results.addAll(
executeSubquery(searchTerms, params)
);
var resultsForSq = executeSubquery(searchTerms, params);
logger.info(queryMarker, "{} from {}", resultsForSq.size(), sq);
results.addAll(resultsForSq);
if (!params.hasTimeLeft()) {
logger.info("Query timed out {}, ({}), -{}",
@ -188,11 +193,15 @@ public class IndexQueryService {
results.forEach(id -> {
var item = evaluator.evaluateResult(id);
items.add(item);
if (item.getScore() < 100) {
items.add(item);
}
return true;
});
logger.info(queryMarker, "After filtering: {} -> {}", results.size(), items.size());
return items;
}