mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Clean up and optimize valuator
This commit is contained in:
parent
56eb83319d
commit
b911665691
@ -6,7 +6,6 @@ import static java.lang.Boolean.compare;
|
|||||||
import static java.lang.Double.compare;
|
import static java.lang.Double.compare;
|
||||||
|
|
||||||
public record SearchResultPreliminaryScore(
|
public record SearchResultPreliminaryScore(
|
||||||
boolean disqualified,
|
|
||||||
boolean hasPriorityTerm,
|
boolean hasPriorityTerm,
|
||||||
double searchRankingScore)
|
double searchRankingScore)
|
||||||
implements Comparable<SearchResultPreliminaryScore>
|
implements Comparable<SearchResultPreliminaryScore>
|
||||||
@ -25,7 +24,4 @@ public record SearchResultPreliminaryScore(
|
|||||||
return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
|
return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isDisqualified() {
|
|
||||||
return disqualified;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import gnu.trove.list.TLongList;
|
|||||||
import gnu.trove.set.hash.TLongHashSet;
|
import gnu.trove.set.hash.TLongHashSet;
|
||||||
import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore;
|
import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore;
|
||||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||||
|
import nu.marginalia.model.id.UrlIdCodec;
|
||||||
import nu.marginalia.model.idx.WordFlags;
|
import nu.marginalia.model.idx.WordFlags;
|
||||||
import nu.marginalia.model.idx.WordMetadata;
|
import nu.marginalia.model.idx.WordMetadata;
|
||||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||||
@ -13,6 +14,7 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
|
|||||||
import nu.marginalia.index.query.IndexQueryParams;
|
import nu.marginalia.index.query.IndexQueryParams;
|
||||||
import nu.marginalia.ranking.ResultValuator;
|
import nu.marginalia.ranking.ResultValuator;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@ -53,10 +55,13 @@ public class IndexResultValuator {
|
|||||||
private final long flagsFilterMask =
|
private final long flagsFilterMask =
|
||||||
WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit();
|
WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit();
|
||||||
|
|
||||||
|
@Nullable
|
||||||
public SearchResultItem calculatePreliminaryScore(long id) {
|
public SearchResultItem calculatePreliminaryScore(long id) {
|
||||||
|
|
||||||
SearchResultItem searchResult = new SearchResultItem(id);
|
final long docId = UrlIdCodec.removeRank(id);
|
||||||
final long docId = searchResult.getDocumentId();
|
|
||||||
|
if (!termMetadataForDocuments.testCoherence(docId, searchTerms.coherences))
|
||||||
|
return null;
|
||||||
|
|
||||||
long docMetadata = metadataService.getDocumentMetadata(docId);
|
long docMetadata = metadataService.getDocumentMetadata(docId);
|
||||||
int htmlFeatures = metadataService.getHtmlFeatures(docId);
|
int htmlFeatures = metadataService.getHtmlFeatures(docId);
|
||||||
@ -65,8 +70,12 @@ public class IndexResultValuator {
|
|||||||
boolean anyAllSynthetic = false;
|
boolean anyAllSynthetic = false;
|
||||||
int maxPositionsSet = 0;
|
int maxPositionsSet = 0;
|
||||||
|
|
||||||
for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) {
|
SearchResultItem searchResult = new SearchResultItem(id);
|
||||||
|
|
||||||
|
for (int querySetId = 0;
|
||||||
|
querySetId < searchTermVariants.size();
|
||||||
|
querySetId++)
|
||||||
|
{
|
||||||
var termList = searchTermVariants.get(querySetId);
|
var termList = searchTermVariants.get(querySetId);
|
||||||
|
|
||||||
SearchResultKeywordScore[] termScoresForSet = new SearchResultKeywordScore[termList.size()];
|
SearchResultKeywordScore[] termScoresForSet = new SearchResultKeywordScore[termList.size()];
|
||||||
@ -115,22 +124,15 @@ public class IndexResultValuator {
|
|||||||
anyAllSynthetic |= synthetic;
|
anyAllSynthetic |= synthetic;
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean hasPriorityTerm = resultsWithPriorityTerms.contains(id);
|
if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores,
|
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores,
|
||||||
5000,
|
5000, // use a dummy value here as it's not present in the index
|
||||||
rankingContext);
|
rankingContext);
|
||||||
|
|
||||||
boolean disqualified = false;
|
|
||||||
|
|
||||||
if (!termMetadataForDocuments.testCoherence(docId, searchTerms.coherences))
|
|
||||||
disqualified = true;
|
|
||||||
else if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
|
|
||||||
disqualified = true;
|
|
||||||
|
|
||||||
searchResult.setScore(new SearchResultPreliminaryScore(
|
searchResult.setScore(new SearchResultPreliminaryScore(
|
||||||
disqualified,
|
resultsWithPriorityTerms.contains(id),
|
||||||
hasPriorityTerm,
|
|
||||||
score
|
score
|
||||||
));
|
));
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ public class IndexQueryService {
|
|||||||
return Arrays.stream(resultIds.toArray())
|
return Arrays.stream(resultIds.toArray())
|
||||||
.parallel()
|
.parallel()
|
||||||
.mapToObj(evaluator::calculatePreliminaryScore)
|
.mapToObj(evaluator::calculatePreliminaryScore)
|
||||||
.filter(score -> !score.getScore().isDisqualified())
|
.filter(Objects::nonNull)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user