(index) Improve recall for small queries

Partially reverse the previous commit and add a query head for the priority index when there are few query interpretations.
This commit is contained in:
Viktor Lofgren 2024-04-16 22:51:03 +02:00
parent 44c1e1d6d9
commit e0224085b4
3 changed files with 27 additions and 2 deletions

View File

@ -35,13 +35,24 @@ public class CombinedIndexReader {
}
public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query);
}
public QueryFilterStepIf hasWordFull(long termId) {
return reverseIndexFullReader.also(termId);
}
public QueryFilterStepIf hasWordPrio(long termId) {
return reverseIndexPriorityReader.also(termId);
}
/** Creates a query builder for terms in the priority index */
public IndexQueryBuilder findPriorityWord(long wordId) {
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
.withSourceTerms(wordId);
}
/** Creates a query builder for terms in the full index */
public IndexQueryBuilder findFullWord(long wordId) {
return newQueryBuilder(

View File

@ -11,6 +11,7 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
public class IndexQueryBuilderImpl implements IndexQueryBuilder {
private final IndexQuery query;
private final ReverseIndexReader reverseIndexFullReader;
private final ReverseIndexReader reverseIndexPrioReader;
/* Keep track of already added include terms to avoid redundant checks.
*
@ -21,10 +22,12 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
ReverseIndexReader reverseIndexPrioReader,
IndexQuery query)
{
this.query = query;
this.reverseIndexFullReader = reverseIndexFullReader;
this.reverseIndexPrioReader = reverseIndexPrioReader;
}
public IndexQueryBuilder withSourceTerms(long... sourceTerms) {

View File

@ -4,6 +4,9 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.longs.*;
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
import nu.marginalia.index.query.filter.QueryFilterAllOf;
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
import nu.marginalia.index.query.filter.QueryFilterStepIf;
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
import nu.marginalia.index.results.model.ids.DocMetadataList;
import nu.marginalia.index.model.QueryParams;
@ -138,8 +141,16 @@ public class StatefulIndex {
for (int i = 1; i < elements.size(); i++) {
head.addInclusionFilter(combinedIndexReader.hasWordFull(elements.getLong(i)));
}
queryHeads.add(head);
// If there are few paths, we can afford to check the priority index as well
if (paths.size() < 4) {
var prioHead = combinedIndexReader.findPriorityWord(elements.getLong(0));
for (int i = 1; i < elements.size(); i++) {
prioHead.addInclusionFilter(combinedIndexReader.hasWordPrio(elements.getLong(i)));
}
queryHeads.add(prioHead);
}
}
// Add additional conditions to the query heads