(index) Clean up and optimize valuator

This commit is contained in:
Viktor Lofgren 2023-08-24 18:34:06 +02:00
parent 56eb83319d
commit b911665691
3 changed files with 17 additions and 19 deletions

View File

@ -6,7 +6,6 @@ import static java.lang.Boolean.compare;
import static java.lang.Double.compare; import static java.lang.Double.compare;
public record SearchResultPreliminaryScore( public record SearchResultPreliminaryScore(
boolean disqualified,
boolean hasPriorityTerm, boolean hasPriorityTerm,
double searchRankingScore) double searchRankingScore)
implements Comparable<SearchResultPreliminaryScore> implements Comparable<SearchResultPreliminaryScore>
@ -25,7 +24,4 @@ public record SearchResultPreliminaryScore(
return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore); return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
} }
public boolean isDisqualified() {
return disqualified;
}
} }

View File

@ -4,6 +4,7 @@ import gnu.trove.list.TLongList;
import gnu.trove.set.hash.TLongHashSet; import gnu.trove.set.hash.TLongHashSet;
import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore; import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore;
import nu.marginalia.index.client.model.results.ResultRankingContext; import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.model.id.UrlIdCodec;
import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
@ -13,6 +14,7 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.IndexQueryParams; import nu.marginalia.index.query.IndexQueryParams;
import nu.marginalia.ranking.ResultValuator; import nu.marginalia.ranking.ResultValuator;
import javax.annotation.Nullable;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -53,10 +55,13 @@ public class IndexResultValuator {
private final long flagsFilterMask = private final long flagsFilterMask =
WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit(); WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit();
@Nullable
public SearchResultItem calculatePreliminaryScore(long id) { public SearchResultItem calculatePreliminaryScore(long id) {
SearchResultItem searchResult = new SearchResultItem(id); final long docId = UrlIdCodec.removeRank(id);
final long docId = searchResult.getDocumentId();
if (!termMetadataForDocuments.testCoherence(docId, searchTerms.coherences))
return null;
long docMetadata = metadataService.getDocumentMetadata(docId); long docMetadata = metadataService.getDocumentMetadata(docId);
int htmlFeatures = metadataService.getHtmlFeatures(docId); int htmlFeatures = metadataService.getHtmlFeatures(docId);
@ -65,8 +70,12 @@ public class IndexResultValuator {
boolean anyAllSynthetic = false; boolean anyAllSynthetic = false;
int maxPositionsSet = 0; int maxPositionsSet = 0;
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) { SearchResultItem searchResult = new SearchResultItem(id);
for (int querySetId = 0;
querySetId < searchTermVariants.size();
querySetId++)
{
var termList = searchTermVariants.get(querySetId); var termList = searchTermVariants.get(querySetId);
SearchResultKeywordScore[] termScoresForSet = new SearchResultKeywordScore[termList.size()]; SearchResultKeywordScore[] termScoresForSet = new SearchResultKeywordScore[termList.size()];
@ -115,22 +124,15 @@ public class IndexResultValuator {
anyAllSynthetic |= synthetic; anyAllSynthetic |= synthetic;
} }
final boolean hasPriorityTerm = resultsWithPriorityTerms.contains(id); if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
return null;
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores, double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores,
5000, 5000, // use a dummy value here as it's not present in the index
rankingContext); rankingContext);
boolean disqualified = false;
if (!termMetadataForDocuments.testCoherence(docId, searchTerms.coherences))
disqualified = true;
else if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
disqualified = true;
searchResult.setScore(new SearchResultPreliminaryScore( searchResult.setScore(new SearchResultPreliminaryScore(
disqualified, resultsWithPriorityTerms.contains(id),
hasPriorityTerm,
score score
)); ));

View File

@ -258,7 +258,7 @@ public class IndexQueryService {
return Arrays.stream(resultIds.toArray()) return Arrays.stream(resultIds.toArray())
.parallel() .parallel()
.mapToObj(evaluator::calculatePreliminaryScore) .mapToObj(evaluator::calculatePreliminaryScore)
.filter(score -> !score.getScore().isDisqualified()) .filter(Objects::nonNull)
.collect(Collectors.toList()); .collect(Collectors.toList());
} }