(index) Improve recall for small queries

Partially reverse the previous commit and add a query head for the priority index when there are few query interpretations.
This commit is contained in:
Viktor Lofgren 2024-04-16 22:51:03 +02:00
parent 44c1e1d6d9
commit e0224085b4
3 changed files with 27 additions and 2 deletions

View File

@ -35,13 +35,24 @@ public class CombinedIndexReader {
} }
public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) { public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
return new IndexQueryBuilderImpl(reverseIndexFullReader, query); return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query);
} }
public QueryFilterStepIf hasWordFull(long termId) { public QueryFilterStepIf hasWordFull(long termId) {
return reverseIndexFullReader.also(termId); return reverseIndexFullReader.also(termId);
} }
public QueryFilterStepIf hasWordPrio(long termId) {
return reverseIndexPriorityReader.also(termId);
}
/** Creates a query builder for terms in the priority index */
public IndexQueryBuilder findPriorityWord(long wordId) {
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
.withSourceTerms(wordId);
}
/** Creates a query builder for terms in the full index */ /** Creates a query builder for terms in the full index */
public IndexQueryBuilder findFullWord(long wordId) { public IndexQueryBuilder findFullWord(long wordId) {
return newQueryBuilder( return newQueryBuilder(

View File

@ -11,6 +11,7 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
public class IndexQueryBuilderImpl implements IndexQueryBuilder { public class IndexQueryBuilderImpl implements IndexQueryBuilder {
private final IndexQuery query; private final IndexQuery query;
private final ReverseIndexReader reverseIndexFullReader; private final ReverseIndexReader reverseIndexFullReader;
private final ReverseIndexReader reverseIndexPrioReader;
/* Keep track of already added include terms to avoid redundant checks. /* Keep track of already added include terms to avoid redundant checks.
* *
@ -21,10 +22,12 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
private final TLongHashSet alreadyConsideredTerms = new TLongHashSet(); private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader, IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
ReverseIndexReader reverseIndexPrioReader,
IndexQuery query) IndexQuery query)
{ {
this.query = query; this.query = query;
this.reverseIndexFullReader = reverseIndexFullReader; this.reverseIndexFullReader = reverseIndexFullReader;
this.reverseIndexPrioReader = reverseIndexPrioReader;
} }
public IndexQueryBuilder withSourceTerms(long... sourceTerms) { public IndexQueryBuilder withSourceTerms(long... sourceTerms) {

View File

@ -4,6 +4,9 @@ import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.longs.*; import it.unimi.dsi.fastutil.longs.*;
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates; import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
import nu.marginalia.index.query.filter.QueryFilterAllOf;
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
import nu.marginalia.index.query.filter.QueryFilterStepIf;
import nu.marginalia.index.results.model.ids.CombinedDocIdList; import nu.marginalia.index.results.model.ids.CombinedDocIdList;
import nu.marginalia.index.results.model.ids.DocMetadataList; import nu.marginalia.index.results.model.ids.DocMetadataList;
import nu.marginalia.index.model.QueryParams; import nu.marginalia.index.model.QueryParams;
@ -138,8 +141,16 @@ public class StatefulIndex {
for (int i = 1; i < elements.size(); i++) { for (int i = 1; i < elements.size(); i++) {
head.addInclusionFilter(combinedIndexReader.hasWordFull(elements.getLong(i))); head.addInclusionFilter(combinedIndexReader.hasWordFull(elements.getLong(i)));
} }
queryHeads.add(head); queryHeads.add(head);
// If there are few paths, we can afford to check the priority index as well
if (paths.size() < 4) {
var prioHead = combinedIndexReader.findPriorityWord(elements.getLong(0));
for (int i = 1; i < elements.size(); i++) {
prioHead.addInclusionFilter(combinedIndexReader.hasWordPrio(elements.getLong(i)));
}
queryHeads.add(prioHead);
}
} }
// Add additional conditions to the query heads // Add additional conditions to the query heads