(index) Clean up the code

This commit is contained in:
Viktor Lofgren 2024-04-11 18:50:21 +02:00
parent 7dd8c78c6b
commit 55f627ed4c
6 changed files with 64 additions and 56 deletions

View File

@ -68,8 +68,12 @@ public class ReverseIndexReader {
}
long wordOffset(long wordId) {
long idx = wordsBTreeReader.findEntry(wordId);
/** Calculate the offset of the word in the documents.
* If the return-value is negative, the term does not exist
* in the index.
*/
long wordOffset(long termId) {
long idx = wordsBTreeReader.findEntry(termId);
if (idx < 0)
return -1L;
@ -77,37 +81,43 @@ public class ReverseIndexReader {
return words.get(wordsDataOffset + idx + 1);
}
public EntrySource documents(long wordId) {
public EntrySource documents(long termId) {
if (null == words) {
logger.warn("Reverse index is not ready, dropping query");
return new EmptyEntrySource();
}
long offset = wordOffset(wordId);
long offset = wordOffset(termId);
if (offset < 0) return new EmptyEntrySource();
if (offset < 0) // No documents
return new EmptyEntrySource();
return new ReverseIndexEntrySource(name, createReaderNew(offset), 2, wordId);
return new ReverseIndexEntrySource(name, createReaderNew(offset), 2, termId);
}
public QueryFilterStepIf also(long wordId) {
long offset = wordOffset(wordId);
/** Create a filter step requiring the specified termId to exist in the documents */
public QueryFilterStepIf also(long termId) {
long offset = wordOffset(termId);
if (offset < 0) return new QueryFilterNoPass();
if (offset < 0) // No documents
return new QueryFilterNoPass();
return new ReverseIndexRetainFilter(createReaderNew(offset), name, wordId);
return new ReverseIndexRetainFilter(createReaderNew(offset), name, termId);
}
public QueryFilterStepIf not(long wordId) {
long offset = wordOffset(wordId);
/** Create a filter step requiring the specified termId to be absent from the documents */
public QueryFilterStepIf not(long termId) {
long offset = wordOffset(termId);
if (offset < 0) return new QueryFilterLetThrough();
if (offset < 0) // No documents
return new QueryFilterLetThrough();
return new ReverseIndexRejectFilter(createReaderNew(offset));
}
public int numDocuments(long wordId) {
long offset = wordOffset(wordId);
/** Return the number of documents with the termId in the index */
public int numDocuments(long termId) {
long offset = wordOffset(termId);
if (offset < 0)
return 0;
@ -115,15 +125,20 @@ public class ReverseIndexReader {
return createReaderNew(offset).numEntries();
}
/** Create a BTreeReader for the document offset associated with a termId */
private BTreeReader createReaderNew(long offset) {
return new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, offset);
return new BTreeReader(
documents,
ReverseIndexParameters.docsBTreeContext,
offset);
}
public long[] getTermMeta(long wordId, long[] docIds) {
long offset = wordOffset(wordId);
public long[] getTermMeta(long termId, long[] docIds) {
long offset = wordOffset(termId);
if (offset < 0) {
logger.debug("Missing offset for word {}", wordId);
// This is likely a bug in the code, but we can't throw an exception here
logger.debug("Missing offset for word {}", termId);
return new long[docIds.length];
}
@ -136,10 +151,9 @@ public class ReverseIndexReader {
private boolean isUniqueAndSorted(long[] ids) {
if (ids.length == 0)
return true;
long prev = ids[0];
for (int i = 1; i < ids.length; i++) {
if(ids[i] <= prev)
if(ids[i] <= ids[i-1])
return false;
}

View File

@ -41,6 +41,7 @@ public class CombinedIndexReader {
public QueryFilterStepIf hasWordFull(long termId) {
return reverseIndexFullReader.also(termId);
}
public QueryFilterStepIf hasWordPrio(long termId) {
return reverseIndexPriorityReader.also(termId);
}

View File

@ -36,7 +36,7 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
return this;
}
public IndexQueryBuilder alsoFull(long termId) {
public IndexQueryBuilder also(long termId) {
if (alreadyConsideredTerms.add(termId)) {
query.addInclusionFilter(reverseIndexFullReader.also(termId));
@ -45,16 +45,7 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
return this;
}
public IndexQueryBuilder alsoPrio(long termId) {
if (alreadyConsideredTerms.add(termId)) {
query.addInclusionFilter(reverseIndexPrioReader.also(termId));
}
return this;
}
public IndexQueryBuilder notFull(long termId) {
public IndexQueryBuilder not(long termId) {
query.addInclusionFilter(reverseIndexFullReader.not(termId));

View File

@ -75,7 +75,7 @@ public class QueryBranchWalker {
// in practice only when an index doesn't contain all the search terms, so we can just
// skip those paths
if (!remainingPaths.isEmpty()) {
logger.info("Dropping: {}", remainingPaths);
logger.debug("Dropping: {}", remainingPaths);
}
return ret;

View File

@ -125,59 +125,65 @@ public class StatefulIndex {
// the term is missing from the index and can never be found
paths.removeIf(containsAll(termPriority).negate());
List<QueryBranchWalker> helpers = QueryBranchWalker.create(termPriority, paths);
List<QueryBranchWalker> walkers = QueryBranchWalker.create(termPriority, paths);
for (var helper : helpers) {
for (var walker : walkers) {
for (var builder : List.of(
combinedIndexReader.findPriorityWord(helper.termId),
combinedIndexReader.findFullWord(helper.termId)
combinedIndexReader.findPriorityWord(walker.termId),
combinedIndexReader.findFullWord(walker.termId)
))
{
queryHeads.add(builder);
if (helper.atEnd())
continue;
if (walker.atEnd())
continue; // Single term search query
// Add filter steps for the remaining combinations of terms
List<QueryFilterStepIf> filterSteps = new ArrayList<>();
for (var step : helper.next()) {
for (var step : walker.next()) {
filterSteps.add(createFilter(step, 0));
}
builder.addInclusionFilterAny(filterSteps);
}
}
List<IndexQuery> ret = new ArrayList<>(10);
// Add additional conditions to the query heads
for (var query : queryHeads) {
// Advice terms are a special case, mandatory but not ranked, and exempt from re-writing
for (long term : terms.advice()) {
query = query.alsoFull(term);
query = query.also(term);
}
for (long term : terms.excludes()) {
query = query.notFull(term);
query = query.not(term);
}
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
// items in the buffer
ret.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build());
query.addInclusionFilter(combinedIndexReader.filterForParams(params));
}
return ret;
return queryHeads
.stream()
.map(IndexQueryBuilder::build)
.toList();
}
/** Recursively create a filter step based on the QBW and its children */
private QueryFilterStepIf createFilter(QueryBranchWalker walker, int depth) {
// Create a filter for the current termId
final QueryFilterStepIf ownFilterCondition = ownFilterCondition(walker, depth);
var childSteps = walker.next();
if (childSteps.isEmpty())
if (childSteps.isEmpty()) // no children, and so we're satisfied with just a single filter condition
return ownFilterCondition;
// If there are children, we append the filter conditions for each child as an anyOf condition
// to the current filter condition
List<QueryFilterStepIf> combinedFilters = new ArrayList<>();
for (var step : childSteps) {
@ -186,6 +192,7 @@ public class StatefulIndex {
combinedFilters.add(new QueryFilterAllOf(ownFilterCondition, childFilter));
}
// Flatten the filter conditions if there's only one branch
if (combinedFilters.size() == 1)
return combinedFilters.getFirst();
else
@ -196,7 +203,7 @@ public class StatefulIndex {
private QueryFilterStepIf ownFilterCondition(QueryBranchWalker walker, int depth) {
if (depth < 2) {
// At shallow depths we prioritize terms that appear in the priority index,
// to increase the odds we find "good" results before the sand runs out
// to increase the odds we find "good" results before the execution timer runs out
return new QueryFilterAnyOf(
combinedIndexReader.hasWordPrio(walker.termId),
combinedIndexReader.hasWordFull(walker.termId)

View File

@ -11,16 +11,11 @@ import java.util.List;
public interface IndexQueryBuilder {
/** Filters documents that also contain termId, within the full index.
*/
IndexQueryBuilder alsoFull(long termId);
/**
* Filters documents that also contain the termId, within the priority index.
*/
IndexQueryBuilder alsoPrio(long termIds);
IndexQueryBuilder also(long termId);
/** Excludes documents that contain termId, within the full index
*/
IndexQueryBuilder notFull(long termId);
IndexQueryBuilder not(long termId);
IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);
IndexQueryBuilder addInclusionFilterAny(List<QueryFilterStepIf> filterStep);