From 1252f95da58da7cad93d5d265d690a9a9bf07ea0 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 7 Mar 2023 21:26:04 +0100 Subject: [PATCH] Fix for valuation bug in index code that wouldn't sort bad-ish items properly. --- .../client/model/query/EdgeSearchSubquery.java | 13 +++++++++++++ .../index/query/limit/SpecificationLimit.java | 10 ++++++++++ .../index/results/IndexResultValuator.java | 10 +++++----- .../marginalia/index/svc/IndexQueryService.java | 17 +++++++++++++---- 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java index 33416001..547a6c3e 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java @@ -5,6 +5,7 @@ import lombok.Getter; import lombok.ToString; import java.util.List; +import java.util.stream.Collectors; @ToString @Getter @@ -38,4 +39,16 @@ public class EdgeSearchSubquery { return this; } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + if (!searchTermsInclude.isEmpty()) sb.append("include=").append(searchTermsInclude.stream().collect(Collectors.joining(",", "[", "] "))); + if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] "))); + if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] "))); + if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] "))); + + return sb.toString(); + } + + } diff --git a/code/index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java b/code/index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java index 1af0b10a..1c4a6aef 100644 --- a/code/index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java +++ b/code/index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java @@ -1,5 +1,7 @@ package nu.marginalia.index.query.limit; +import lombok.ToString; + public record SpecificationLimit(SpecificationLimitType type, int value) { public static SpecificationLimit none() { return new SpecificationLimit(SpecificationLimitType.NONE, 0); @@ -28,4 +30,12 @@ public record SpecificationLimit(SpecificationLimitType type, int value) { return parameter <= value; throw new AssertionError("Unknown type " + type); } + + @Override + public String toString() { + if (type == SpecificationLimitType.NONE) + return type.toString(); + + else return "%s:%d".formatted(type, value); + } } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java index bb4835e2..bef63252 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java @@ -80,7 +80,7 @@ public class IndexResultValuator { long docMetadata = metadataService.getDocumentMetadata(urlIdInt); - double bestScore = 0; + double bestScore = 1000; for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) { bestScore = Math.min(bestScore, evaluateSubquery(searchResult, @@ -127,7 +127,7 @@ public class IndexResultValuator { setScore += score.termValue(); if (!filterRequired(metadata, queryParams.queryStrategy())) { - setScore += 1000; + return 1000; } if (termIdx == 0) { @@ -191,15 +191,15 @@ public class IndexResultValuator { double avgTfIdf = termCount / tfIdfSum; if (maskAdjacent == 0) { - return Math.max(-2, 40 - 0.5 * avgTfIdf); + return Math.min(5, Math.max(-2, 40 - 0.5 * avgTfIdf)); } if (maskDirectGenerous == 0) { - return Math.max(-1, 20 - 0.3 * avgTfIdf); + return Math.min(5, Math.max(-1, 20 - 0.3 * avgTfIdf)); } if (maskDirectRaw == 0) { - return Math.max(-1, 15 - 0.2 * avgTfIdf); + return Math.min(5, Math.max(-1, 15 - 0.2 * avgTfIdf)); } return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index ffd6331d..ae9a2361 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -126,16 +126,21 @@ public class IndexQueryService { private TLongList evaluateSubqueries(SearchParameters params) { final TLongList results = new TLongArrayList(params.fetchSize); + logger.info(queryMarker, "{}", params.queryParams); for (var sq : params.subqueries) { final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(sq); + + if (searchTerms.isEmpty()) { continue; } - results.addAll( - executeSubquery(searchTerms, params) - ); + var resultsForSq = executeSubquery(searchTerms, params); + + logger.info(queryMarker, "{} from {}", resultsForSq.size(), sq); + + results.addAll(resultsForSq); if (!params.hasTimeLeft()) { logger.info("Query timed out {}, ({}), -{}", @@ -188,11 +193,15 @@ public class IndexQueryService { results.forEach(id -> { var item = evaluator.evaluateResult(id); - items.add(item); + if (item.getScore() < 100) { + items.add(item); + } return true; }); + logger.info(queryMarker, "After filtering: {} -> {}", results.size(), items.size()); + return items; }