mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Fix for valuation bug in index code that wouldn't sort bad-ish items properly.
This commit is contained in:
parent
f3babde415
commit
1252f95da5
@ -5,6 +5,7 @@ import lombok.Getter;
|
|||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@ToString
|
@ToString
|
||||||
@Getter
|
@Getter
|
||||||
@ -38,4 +39,16 @@ public class EdgeSearchSubquery {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
if (!searchTermsInclude.isEmpty()) sb.append("include=").append(searchTermsInclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
|
if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
|
if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
|
if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package nu.marginalia.index.query.limit;
|
package nu.marginalia.index.query.limit;
|
||||||
|
|
||||||
|
import lombok.ToString;
|
||||||
|
|
||||||
public record SpecificationLimit(SpecificationLimitType type, int value) {
|
public record SpecificationLimit(SpecificationLimitType type, int value) {
|
||||||
public static SpecificationLimit none() {
|
public static SpecificationLimit none() {
|
||||||
return new SpecificationLimit(SpecificationLimitType.NONE, 0);
|
return new SpecificationLimit(SpecificationLimitType.NONE, 0);
|
||||||
@ -28,4 +30,12 @@ public record SpecificationLimit(SpecificationLimitType type, int value) {
|
|||||||
return parameter <= value;
|
return parameter <= value;
|
||||||
throw new AssertionError("Unknown type " + type);
|
throw new AssertionError("Unknown type " + type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
if (type == SpecificationLimitType.NONE)
|
||||||
|
return type.toString();
|
||||||
|
|
||||||
|
else return "%s:%d".formatted(type, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,7 @@ public class IndexResultValuator {
|
|||||||
|
|
||||||
long docMetadata = metadataService.getDocumentMetadata(urlIdInt);
|
long docMetadata = metadataService.getDocumentMetadata(urlIdInt);
|
||||||
|
|
||||||
double bestScore = 0;
|
double bestScore = 1000;
|
||||||
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) {
|
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) {
|
||||||
bestScore = Math.min(bestScore,
|
bestScore = Math.min(bestScore,
|
||||||
evaluateSubquery(searchResult,
|
evaluateSubquery(searchResult,
|
||||||
@ -127,7 +127,7 @@ public class IndexResultValuator {
|
|||||||
setScore += score.termValue();
|
setScore += score.termValue();
|
||||||
|
|
||||||
if (!filterRequired(metadata, queryParams.queryStrategy())) {
|
if (!filterRequired(metadata, queryParams.queryStrategy())) {
|
||||||
setScore += 1000;
|
return 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (termIdx == 0) {
|
if (termIdx == 0) {
|
||||||
@ -191,15 +191,15 @@ public class IndexResultValuator {
|
|||||||
double avgTfIdf = termCount / tfIdfSum;
|
double avgTfIdf = termCount / tfIdfSum;
|
||||||
|
|
||||||
if (maskAdjacent == 0) {
|
if (maskAdjacent == 0) {
|
||||||
return Math.max(-2, 40 - 0.5 * avgTfIdf);
|
return Math.min(5, Math.max(-2, 40 - 0.5 * avgTfIdf));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskDirectGenerous == 0) {
|
if (maskDirectGenerous == 0) {
|
||||||
return Math.max(-1, 20 - 0.3 * avgTfIdf);
|
return Math.min(5, Math.max(-1, 20 - 0.3 * avgTfIdf));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maskDirectRaw == 0) {
|
if (maskDirectRaw == 0) {
|
||||||
return Math.max(-1, 15 - 0.2 * avgTfIdf);
|
return Math.min(5, Math.max(-1, 15 - 0.2 * avgTfIdf));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
|
return Long.numberOfTrailingZeros(maskDirectGenerous)/5. - Long.bitCount(maskDirectGenerous);
|
||||||
|
@ -126,16 +126,21 @@ public class IndexQueryService {
|
|||||||
private TLongList evaluateSubqueries(SearchParameters params) {
|
private TLongList evaluateSubqueries(SearchParameters params) {
|
||||||
final TLongList results = new TLongArrayList(params.fetchSize);
|
final TLongList results = new TLongArrayList(params.fetchSize);
|
||||||
|
|
||||||
|
logger.info(queryMarker, "{}", params.queryParams);
|
||||||
for (var sq : params.subqueries) {
|
for (var sq : params.subqueries) {
|
||||||
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(sq);
|
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(sq);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (searchTerms.isEmpty()) {
|
if (searchTerms.isEmpty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
results.addAll(
|
var resultsForSq = executeSubquery(searchTerms, params);
|
||||||
executeSubquery(searchTerms, params)
|
|
||||||
);
|
logger.info(queryMarker, "{} from {}", resultsForSq.size(), sq);
|
||||||
|
|
||||||
|
results.addAll(resultsForSq);
|
||||||
|
|
||||||
if (!params.hasTimeLeft()) {
|
if (!params.hasTimeLeft()) {
|
||||||
logger.info("Query timed out {}, ({}), -{}",
|
logger.info("Query timed out {}, ({}), -{}",
|
||||||
@ -188,11 +193,15 @@ public class IndexQueryService {
|
|||||||
results.forEach(id -> {
|
results.forEach(id -> {
|
||||||
var item = evaluator.evaluateResult(id);
|
var item = evaluator.evaluateResult(id);
|
||||||
|
|
||||||
items.add(item);
|
if (item.getScore() < 100) {
|
||||||
|
items.add(item);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
logger.info(queryMarker, "After filtering: {} -> {}", results.size(), items.size());
|
||||||
|
|
||||||
|
|
||||||
return items;
|
return items;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user