diff --git a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java index 27a631f5..ba6748fc 100644 --- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java +++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java @@ -35,24 +35,13 @@ public class CombinedIndexReader { } public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) { - return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query); + return new IndexQueryBuilderImpl(reverseIndexFullReader, query); } public QueryFilterStepIf hasWordFull(long termId) { return reverseIndexFullReader.also(termId); } - public QueryFilterStepIf hasWordPrio(long termId) { - return reverseIndexPriorityReader.also(termId); - } - - - /** Creates a query builder for terms in the priority index */ - public IndexQueryBuilder findPriorityWord(long wordId) { - return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId))) - .withSourceTerms(wordId); - } - /** Creates a query builder for terms in the full index */ public IndexQueryBuilder findFullWord(long wordId) { return newQueryBuilder( diff --git a/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java index 0f63fdbc..92dce62a 100644 --- a/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java +++ b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java @@ -11,7 +11,6 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf; public class IndexQueryBuilderImpl implements IndexQueryBuilder { private final IndexQuery query; private final ReverseIndexReader reverseIndexFullReader; - private final ReverseIndexReader reverseIndexPrioReader; /* Keep track of already added include terms to avoid redundant checks. * @@ -22,12 +21,10 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder { private final TLongHashSet alreadyConsideredTerms = new TLongHashSet(); IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader, - ReverseIndexReader reverseIndexPrioReader, IndexQuery query) { this.query = query; this.reverseIndexFullReader = reverseIndexFullReader; - this.reverseIndexPrioReader = reverseIndexPrioReader; } public IndexQueryBuilder withSourceTerms(long... sourceTerms) { diff --git a/code/index/java/nu/marginalia/index/index/QueryBranchWalker.java b/code/index/java/nu/marginalia/index/index/QueryBranchWalker.java deleted file mode 100644 index ffaa5176..00000000 --- a/code/index/java/nu/marginalia/index/index/QueryBranchWalker.java +++ /dev/null @@ -1,108 +0,0 @@ -package nu.marginalia.index.index; - -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongArraySet; -import it.unimi.dsi.fastutil.longs.LongSet; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -/** Helper class for index query construction */ -public class QueryBranchWalker { - private static final Logger logger = LoggerFactory.getLogger(QueryBranchWalker.class); - public final long[] priorityOrder; - public final List paths; - public final long termId; - - private QueryBranchWalker(long[] priorityOrder, List paths, long termId) { - this.priorityOrder = priorityOrder; - this.paths = paths; - this.termId = termId; - } - - public boolean atEnd() { - return priorityOrder.length == 0; - } - - /** Group the provided paths by the lowest termId they contain per the provided priorityOrder, - * into a list of QueryBranchWalkers. This can be performed iteratively on the resultant QBW:s - * to traverse the tree via the next() method. - *

- * The paths can be extracted through the {@link nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates CompiledQueryAggregates} - * queriesAggregate method. - */ - public static List create(long[] priorityOrder, List paths) { - if (paths.isEmpty()) - return List.of(); - - List ret = new ArrayList<>(); - List remainingPaths = new LinkedList<>(paths); - remainingPaths.removeIf(LongSet::isEmpty); - - List pathsForPrio = new ArrayList<>(); - - for (int i = 0; i < priorityOrder.length; i++) { - long termId = priorityOrder[i]; - - var it = remainingPaths.iterator(); - - while (it.hasNext()) { - var path = it.next(); - - if (path.contains(termId)) { - // Remove the current termId from the path - path.remove(termId); - - // Add it to the set of paths associated with the termId - pathsForPrio.add(path); - - // Remove it from consideration - it.remove(); - } - } - - if (!pathsForPrio.isEmpty()) { - long[] newPrios = keepRelevantPriorities(priorityOrder, pathsForPrio); - ret.add(new QueryBranchWalker(newPrios, new ArrayList<>(pathsForPrio), termId)); - pathsForPrio.clear(); - } - } - - // This happens if the priorityOrder array doesn't contain all items in the paths, - // in practice only when an index doesn't contain all the search terms, so we can just - // skip those paths - if (!remainingPaths.isEmpty()) { - logger.debug("Dropping: {}", remainingPaths); - } - - return ret; - } - - /** From the provided priorityOrder array, keep the elements that are present in any set in paths */ - private static long[] keepRelevantPriorities(long[] priorityOrder, List paths) { - LongArrayList remainingPrios = new LongArrayList(paths.size()); - - // these sets are typically very small so array set is a good choice - LongSet allElements = new LongArraySet(priorityOrder.length); - for (var path : paths) { - allElements.addAll(path); - } - - for (var p : priorityOrder) { - if (allElements.contains(p)) - remainingPrios.add(p); - } - - return remainingPrios.elements(); - } - - /** Convenience method that applies the create() method - * to the priority order and paths associated with this instance */ - public List next() { - return create(priorityOrder, paths); - } - -} diff --git a/code/index/java/nu/marginalia/index/index/StatefulIndex.java b/code/index/java/nu/marginalia/index/index/StatefulIndex.java index 5c54a15b..dcaf5d7a 100644 --- a/code/index/java/nu/marginalia/index/index/StatefulIndex.java +++ b/code/index/java/nu/marginalia/index/index/StatefulIndex.java @@ -4,9 +4,6 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import it.unimi.dsi.fastutil.longs.*; import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates; -import nu.marginalia.index.query.filter.QueryFilterAllOf; -import nu.marginalia.index.query.filter.QueryFilterAnyOf; -import nu.marginalia.index.query.filter.QueryFilterStepIf; import nu.marginalia.index.results.model.ids.CombinedDocIdList; import nu.marginalia.index.results.model.ids.DocMetadataList; import nu.marginalia.index.model.QueryParams; @@ -168,48 +165,6 @@ public class StatefulIndex { .toList(); } - /** Recursively create a filter step based on the QBW and its children */ - private QueryFilterStepIf createFilter(QueryBranchWalker walker, int depth) { - - // Create a filter for the current termId - final QueryFilterStepIf ownFilterCondition = ownFilterCondition(walker, depth); - - var childSteps = walker.next(); - if (childSteps.isEmpty()) // no children, and so we're satisfied with just a single filter condition - return ownFilterCondition; - - // If there are children, we append the filter conditions for each child as an anyOf condition - // to the current filter condition - - List combinedFilters = new ArrayList<>(); - - for (var step : childSteps) { - // Recursion will be limited to a fairly shallow stack depth due to how the queries are constructed. - var childFilter = createFilter(step, depth+1); - combinedFilters.add(new QueryFilterAllOf(ownFilterCondition, childFilter)); - } - - // Flatten the filter conditions if there's only one branch - if (combinedFilters.size() == 1) - return combinedFilters.getFirst(); - else - return new QueryFilterAnyOf(combinedFilters); - } - - /** Create a filter condition based on the termId associated with the QBW */ - private QueryFilterStepIf ownFilterCondition(QueryBranchWalker walker, int depth) { - if (depth < 2) { - // At shallow depths we prioritize terms that appear in the priority index, - // to increase the odds we find "good" results before the execution timer runs out - return new QueryFilterAnyOf( - combinedIndexReader.hasWordPrio(walker.termId), - combinedIndexReader.hasWordFull(walker.termId) - ); - } else { - return combinedIndexReader.hasWordFull(walker.termId); - } - } - private Predicate containsAll(long[] permitted) { LongSet permittedTerms = new LongOpenHashSet(permitted); return permittedTerms::containsAll; diff --git a/code/index/test/nu/marginalia/index/index/QueryBranchWalkerTest.java b/code/index/test/nu/marginalia/index/index/QueryBranchWalkerTest.java deleted file mode 100644 index 8d2f45c8..00000000 --- a/code/index/test/nu/marginalia/index/index/QueryBranchWalkerTest.java +++ /dev/null @@ -1,59 +0,0 @@ -package nu.marginalia.index.index; - -import it.unimi.dsi.fastutil.longs.LongArraySet; -import it.unimi.dsi.fastutil.longs.LongSet; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.junit.jupiter.api.Assertions.*; - -class QueryBranchWalkerTest { - @Test - public void testNoOverlap() { - var paths = QueryBranchWalker.create( - new long[] { 1, 2 }, - List.of(set(1), set(2)) - ); - assertEquals(2, paths.size()); - assertEquals(Set.of(1L, 2L), paths.stream().map(path -> path.termId).collect(Collectors.toSet())); - } - - @Test - public void testCond() { - var paths = QueryBranchWalker.create( - new long[] { 1, 2, 3, 4 }, - List.of(set(1,2,3), set(1,4,3)) - ); - assertEquals(1, paths.size()); - assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet())); - System.out.println(Arrays.toString(paths.getFirst().priorityOrder)); - assertArrayEquals(new long[] { 2, 3, 4 }, paths.getFirst().priorityOrder); - - var next = paths.getFirst().next(); - assertEquals(2, next.size()); - assertEquals(Set.of(2L, 3L), next.stream().map(path -> path.termId).collect(Collectors.toSet())); - Map byId = next.stream().collect(Collectors.toMap(w -> w.termId, w->w)); - assertArrayEquals(new long[] { 3L }, byId.get(2L).priorityOrder ); - assertArrayEquals(new long[] { 4L }, byId.get(3L).priorityOrder ); - } - - @Test - public void testNoOverlapFirst() { - var paths = QueryBranchWalker.create( - new long[] { 1, 2, 3 }, - List.of(set(1, 2), set(1, 3)) - ); - assertEquals(1, paths.size()); - assertArrayEquals(new long[] { 2, 3 }, paths.getFirst().priorityOrder); - assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet())); - } - - LongSet set(long... args) { - return new LongArraySet(args); - } -} \ No newline at end of file