(index) Repair positions bitmask for search result presentation

This commit is contained in:
Viktor Lofgren 2024-08-22 11:28:23 +02:00
parent 266d6e4bea
commit 9eb1f120fc
6 changed files with 44 additions and 11 deletions

View File

@ -257,6 +257,7 @@ public class QueryProtobufCodec {
rawItem.getHtmlFeatures(), rawItem.getHtmlFeatures(),
keywordScores, keywordScores,
rawItem.getHasPriorityTerms(), rawItem.getHasPriorityTerms(),
0, // Not set
null, // Not set null, // Not set
Double.NaN // Not set Double.NaN // Not set
); );

View File

@ -28,14 +28,18 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
public boolean hasPrioTerm; public boolean hasPrioTerm;
public long bestPositions;
public DebugRankingFactors debugRankingFactors; public DebugRankingFactors debugRankingFactors;
public SearchResultItem(long combinedId, public SearchResultItem(long combinedId,
long encodedDocMetadata, long encodedDocMetadata,
int htmlFeatures, int htmlFeatures,
double score) { double score,
long bestPositions) {
this.combinedId = combinedId; this.combinedId = combinedId;
this.encodedDocMetadata = encodedDocMetadata; this.encodedDocMetadata = encodedDocMetadata;
this.bestPositions = bestPositions;
this.keywordScores = new ArrayList<>(); this.keywordScores = new ArrayList<>();
this.htmlFeatures = htmlFeatures; this.htmlFeatures = htmlFeatures;
this.scoreValue = score; this.scoreValue = score;

View File

@ -179,7 +179,7 @@ public class IndexResultRankingService {
LongOpenHashSet seenDocumentHashes = new LongOpenHashSet(resultsList.size()); LongOpenHashSet seenDocumentHashes = new LongOpenHashSet(resultsList.size());
// Decorate the results with the document details // Decorate the results with the document details
for (var result : resultsList) { for (SearchResultItem result : resultsList) {
final long id = result.getDocumentId(); final long id = result.getDocumentId();
final DocdbUrlDetail docData = detailsById.get(id); final DocdbUrlDetail docData = detailsById.get(id);
@ -219,7 +219,7 @@ public class IndexResultRankingService {
.setUrl(docData.url().toString()) .setUrl(docData.url().toString())
.setUrlQuality(docData.urlQuality()) .setUrlQuality(docData.urlQuality())
.setWordsTotal(docData.wordsTotal()) .setWordsTotal(docData.wordsTotal())
.setBestPositions(0 /* FIXME */) .setBestPositions(result.getBestPositions())
.setResultsFromDomain(domainCountFilter.getCount(result)) .setResultsFromDomain(domainCountFilter.getCount(result))
.setRawItem(rawItem); .setRawItem(rawItem);

View File

@ -106,7 +106,35 @@ public class IndexResultScoreCalculator {
searchTerms.phraseConstraints, searchTerms.phraseConstraints,
rankingContext); rankingContext);
return new SearchResultItem(combinedId, docMetadata, htmlFeatures, score); return new SearchResultItem(combinedId,
docMetadata,
htmlFeatures,
score,
calculatePositionsMask(positions)
);
}
/** Calculate a bitmask illustrating the intersected positions of the search terms in the document.
* This is used in the GUI.
* */
private long calculatePositionsMask(CodedSequence[] positions) {
IntIterator[] iters = new IntIterator[rankingContext.regularMask.cardinality()];
for (int i = 0, j = 0; i < positions.length; i++) {
if (rankingContext.regularMask.get(i)) {
iters[j++] = positions[i].iterator();
}
}
IntIterator intersection = SequenceOperations.findIntersections(iters).intIterator();
long result = 0;
int bit = 0;
while (intersection.hasNext() && bit < 64) {
bit = (int) (Math.sqrt(intersection.nextInt()));
result |= 1L << bit;
}
return result;
} }
private boolean meetsQueryStrategyRequirements(CompiledQueryLong queryGraphScores, private boolean meetsQueryStrategyRequirements(CompiledQueryLong queryGraphScores,

View File

@ -55,7 +55,7 @@ public class SequenceOperations {
public static IntList findIntersections(IntIterator... sequences) { public static IntList findIntersections(IntIterator... sequences) {
if (sequences.length <= 1) if (sequences.length < 1)
return IntList.of(); return IntList.of();
// Initialize values and find the maximum value // Initialize values and find the maximum value

View File

@ -2,14 +2,12 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.ints.Int2LongArrayMap;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.bbpc.BrailleBlockPunchCards; import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultItem; import nu.marginalia.bbpc.BrailleBlockPunchCards;
import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.results.UrlDeduplicator; import nu.marginalia.search.results.UrlDeduplicator;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -17,7 +15,9 @@ import org.slf4j.LoggerFactory;
import org.slf4j.Marker; import org.slf4j.Marker;
import org.slf4j.MarkerFactory; import org.slf4j.MarkerFactory;
import java.util.*; import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
@Singleton @Singleton
public class SearchQueryIndexService { public class SearchQueryIndexService {
@ -99,7 +99,7 @@ public class SearchQueryIndexService {
} }
private String getPositionsString(DecoratedSearchResultItem resultItem) { private String getPositionsString(DecoratedSearchResultItem resultItem) {
return BrailleBlockPunchCards.printBits(resultItem.bestPositions, 56); return BrailleBlockPunchCards.printBits(resultItem.bestPositions, 64);
} }
} }