(index) Remove dead code

Since the performance fix in 3359f72239 had a huge positive impact without reducing result quality, it's possible to remove the QueryBranchWalker and associated code.
2025-02-23 21:18:58 +00:00 · 2024-04-16 19:59:27 +02:00 · 2024-04-16 19:59:27 +02:00 · 7fa3e86e64
commit 7fa3e86e64
parent 3359f72239
5 changed files with 1 additions and 227 deletions
--- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
+++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
@ -35,24 +35,13 @@ public class CombinedIndexReader {
    }

    public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
-        return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query);
+        return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
    }

    public QueryFilterStepIf hasWordFull(long termId) {
        return reverseIndexFullReader.also(termId);
    }

-    public QueryFilterStepIf hasWordPrio(long termId) {
-        return reverseIndexPriorityReader.also(termId);
-    }
-
-
-    /** Creates a query builder for terms in the priority index */
-    public IndexQueryBuilder findPriorityWord(long wordId) {
-        return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
-                .withSourceTerms(wordId);
-    }
-
    /** Creates a query builder for terms in the full index */
    public IndexQueryBuilder findFullWord(long wordId) {
        return newQueryBuilder(
--- a/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java
+++ b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java
@ -11,7 +11,6 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
 public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
    private final IndexQuery query;
    private final ReverseIndexReader reverseIndexFullReader;
-    private final ReverseIndexReader reverseIndexPrioReader;

    /* Keep track of already added include terms to avoid redundant checks.
     *
@ -22,12 +21,10 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
    private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();

    IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
-                          ReverseIndexReader reverseIndexPrioReader,
                          IndexQuery query)
    {
        this.query = query;
        this.reverseIndexFullReader = reverseIndexFullReader;
-        this.reverseIndexPrioReader = reverseIndexPrioReader;
    }

    public IndexQueryBuilder withSourceTerms(long... sourceTerms) {
--- a/code/index/java/nu/marginalia/index/index/QueryBranchWalker.java
+++ b/code/index/java/nu/marginalia/index/index/QueryBranchWalker.java
@ -1,108 +0,0 @@
-package nu.marginalia.index.index;
-
-import it.unimi.dsi.fastutil.longs.LongArrayList;
-import it.unimi.dsi.fastutil.longs.LongArraySet;
-import it.unimi.dsi.fastutil.longs.LongSet;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-
-/** Helper class for index query construction */
-public class QueryBranchWalker {
-    private static final Logger logger = LoggerFactory.getLogger(QueryBranchWalker.class);
-    public final long[] priorityOrder;
-    public final List<LongSet> paths;
-    public final long termId;
-
-    private QueryBranchWalker(long[] priorityOrder, List<LongSet> paths, long termId) {
-        this.priorityOrder = priorityOrder;
-        this.paths = paths;
-        this.termId = termId;
-    }
-
-    public boolean atEnd() {
-        return priorityOrder.length == 0;
-    }
-
-    /** Group the provided paths by the lowest termId they contain per the provided priorityOrder,
-     * into a list of QueryBranchWalkers.  This can be performed iteratively on the resultant QBW:s
-     * to traverse the tree via the next() method.
-     * <p></p>
-     * The paths can be extracted through the {@link nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates CompiledQueryAggregates}
-     * queriesAggregate method.
-     */
-    public static List<QueryBranchWalker> create(long[] priorityOrder, List<LongSet> paths) {
-        if (paths.isEmpty())
-            return List.of();
-
-        List<QueryBranchWalker> ret = new ArrayList<>();
-        List<LongSet> remainingPaths = new LinkedList<>(paths);
-        remainingPaths.removeIf(LongSet::isEmpty);
-
-        List<LongSet> pathsForPrio = new ArrayList<>();
-
-        for (int i = 0; i < priorityOrder.length; i++) {
-            long termId = priorityOrder[i];
-
-            var it = remainingPaths.iterator();
-
-            while (it.hasNext()) {
-                var path = it.next();
-
-                if (path.contains(termId)) {
-                    // Remove the current termId from the path
-                    path.remove(termId);
-
-                    // Add it to the set of paths associated with the termId
-                    pathsForPrio.add(path);
-
-                    // Remove it from consideration
-                    it.remove();
-                }
-            }
-
-            if (!pathsForPrio.isEmpty()) {
-                long[] newPrios = keepRelevantPriorities(priorityOrder, pathsForPrio);
-                ret.add(new QueryBranchWalker(newPrios, new ArrayList<>(pathsForPrio), termId));
-                pathsForPrio.clear();
-            }
-        }
-
-        // This happens if the priorityOrder array doesn't contain all items in the paths,
-        // in practice only when an index doesn't contain all the search terms, so we can just
-        // skip those paths
-        if (!remainingPaths.isEmpty()) {
-            logger.debug("Dropping: {}", remainingPaths);
-        }
-
-        return ret;
-    }
-
-    /** From the provided priorityOrder array, keep the elements that are present in any set in paths */
-    private static long[] keepRelevantPriorities(long[] priorityOrder, List<LongSet> paths) {
-        LongArrayList remainingPrios = new LongArrayList(paths.size());
-
-        // these sets are typically very small so array set is a good choice
-        LongSet allElements = new LongArraySet(priorityOrder.length);
-        for (var path : paths) {
-            allElements.addAll(path);
-        }
-
-        for (var p : priorityOrder) {
-            if (allElements.contains(p))
-                remainingPrios.add(p);
-        }
-
-        return remainingPrios.elements();
-    }
-
-    /** Convenience method that applies the create() method
-     * to the priority order and paths associated with this instance */
-    public List<QueryBranchWalker> next() {
-        return create(priorityOrder, paths);
-    }
-
-}
--- a/code/index/java/nu/marginalia/index/index/StatefulIndex.java
+++ b/code/index/java/nu/marginalia/index/index/StatefulIndex.java
@ -4,9 +4,6 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import it.unimi.dsi.fastutil.longs.*;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
-import nu.marginalia.index.query.filter.QueryFilterAllOf;
-import nu.marginalia.index.query.filter.QueryFilterAnyOf;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.results.model.ids.DocMetadataList;
 import nu.marginalia.index.model.QueryParams;
@ -168,48 +165,6 @@ public class StatefulIndex {
                .toList();
    }

-    /** Recursively create a filter step based on the QBW and its children */
-    private QueryFilterStepIf createFilter(QueryBranchWalker walker, int depth) {
-
-        // Create a filter for the current termId
-        final QueryFilterStepIf ownFilterCondition = ownFilterCondition(walker, depth);
-
-        var childSteps = walker.next();
-        if (childSteps.isEmpty()) // no children, and so we're satisfied with just a single filter condition
-            return ownFilterCondition;
-
-        // If there are children, we append the filter conditions for each child as an anyOf condition
-        // to the current filter condition
-
-        List<QueryFilterStepIf> combinedFilters = new ArrayList<>();
-
-        for (var step : childSteps) {
-            // Recursion will be limited to a fairly shallow stack depth due to how the queries are constructed.
-            var childFilter = createFilter(step, depth+1);
-            combinedFilters.add(new QueryFilterAllOf(ownFilterCondition, childFilter));
-        }
-
-        // Flatten the filter conditions if there's only one branch
-        if (combinedFilters.size() == 1)
-            return combinedFilters.getFirst();
-        else
-            return new QueryFilterAnyOf(combinedFilters);
-    }
-
-    /** Create a filter condition based on the termId associated with the QBW */
-    private QueryFilterStepIf ownFilterCondition(QueryBranchWalker walker, int depth) {
-        if (depth < 2) {
-            // At shallow depths we prioritize terms that appear in the priority index,
-            // to increase the odds we find "good" results before the execution timer runs out
-            return new QueryFilterAnyOf(
-                    combinedIndexReader.hasWordPrio(walker.termId),
-                    combinedIndexReader.hasWordFull(walker.termId)
-            );
-        } else {
-            return combinedIndexReader.hasWordFull(walker.termId);
-        }
-    }
-
    private Predicate<LongSet> containsAll(long[] permitted) {
        LongSet permittedTerms = new LongOpenHashSet(permitted);
        return permittedTerms::containsAll;
--- a/code/index/test/nu/marginalia/index/index/QueryBranchWalkerTest.java
+++ b/code/index/test/nu/marginalia/index/index/QueryBranchWalkerTest.java
@ -1,59 +0,0 @@
-package nu.marginalia.index.index;
-
-import it.unimi.dsi.fastutil.longs.LongArraySet;
-import it.unimi.dsi.fastutil.longs.LongSet;
-import org.junit.jupiter.api.Test;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-class QueryBranchWalkerTest {
-    @Test
-    public void testNoOverlap() {
-        var paths = QueryBranchWalker.create(
-                new long[] { 1, 2 },
-                List.of(set(1), set(2))
-        );
-        assertEquals(2, paths.size());
-        assertEquals(Set.of(1L, 2L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
-    }
-
-    @Test
-    public void testCond() {
-        var paths = QueryBranchWalker.create(
-                new long[] { 1, 2, 3, 4 },
-                List.of(set(1,2,3), set(1,4,3))
-        );
-        assertEquals(1, paths.size());
-        assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
-        System.out.println(Arrays.toString(paths.getFirst().priorityOrder));
-        assertArrayEquals(new long[] { 2, 3, 4 }, paths.getFirst().priorityOrder);
-
-        var next = paths.getFirst().next();
-        assertEquals(2, next.size());
-        assertEquals(Set.of(2L, 3L), next.stream().map(path -> path.termId).collect(Collectors.toSet()));
-        Map<Long, QueryBranchWalker> byId = next.stream().collect(Collectors.toMap(w -> w.termId, w->w));
-        assertArrayEquals(new long[] { 3L }, byId.get(2L).priorityOrder );
-        assertArrayEquals(new long[] { 4L }, byId.get(3L).priorityOrder );
-    }
-
-    @Test
-    public void testNoOverlapFirst() {
-        var paths = QueryBranchWalker.create(
-                new long[] { 1, 2, 3 },
-                List.of(set(1, 2), set(1, 3))
-        );
-        assertEquals(1, paths.size());
-        assertArrayEquals(new long[] { 2, 3 }, paths.getFirst().priorityOrder);
-        assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
-    }
-
-    LongSet set(long... args) {
-        return new LongArraySet(args);
-    }
-}