mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Remove dead code
Since the performance fix in 3359f72239
had a huge positive impact without reducing result quality, it's possible to remove the QueryBranchWalker and associated code.
This commit is contained in:
parent
3359f72239
commit
7fa3e86e64
@ -35,24 +35,13 @@ public class CombinedIndexReader {
|
||||
}
|
||||
|
||||
public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
|
||||
return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query);
|
||||
return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
|
||||
}
|
||||
|
||||
public QueryFilterStepIf hasWordFull(long termId) {
|
||||
return reverseIndexFullReader.also(termId);
|
||||
}
|
||||
|
||||
public QueryFilterStepIf hasWordPrio(long termId) {
|
||||
return reverseIndexPriorityReader.also(termId);
|
||||
}
|
||||
|
||||
|
||||
/** Creates a query builder for terms in the priority index */
|
||||
public IndexQueryBuilder findPriorityWord(long wordId) {
|
||||
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
|
||||
.withSourceTerms(wordId);
|
||||
}
|
||||
|
||||
/** Creates a query builder for terms in the full index */
|
||||
public IndexQueryBuilder findFullWord(long wordId) {
|
||||
return newQueryBuilder(
|
||||
|
@ -11,7 +11,6 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
private final IndexQuery query;
|
||||
private final ReverseIndexReader reverseIndexFullReader;
|
||||
private final ReverseIndexReader reverseIndexPrioReader;
|
||||
|
||||
/* Keep track of already added include terms to avoid redundant checks.
|
||||
*
|
||||
@ -22,12 +21,10 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
|
||||
|
||||
IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
|
||||
ReverseIndexReader reverseIndexPrioReader,
|
||||
IndexQuery query)
|
||||
{
|
||||
this.query = query;
|
||||
this.reverseIndexFullReader = reverseIndexFullReader;
|
||||
this.reverseIndexPrioReader = reverseIndexPrioReader;
|
||||
}
|
||||
|
||||
public IndexQueryBuilder withSourceTerms(long... sourceTerms) {
|
||||
|
@ -1,108 +0,0 @@
|
||||
package nu.marginalia.index.index;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongArraySet;
|
||||
import it.unimi.dsi.fastutil.longs.LongSet;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/** Helper class for index query construction */
|
||||
public class QueryBranchWalker {
|
||||
private static final Logger logger = LoggerFactory.getLogger(QueryBranchWalker.class);
|
||||
public final long[] priorityOrder;
|
||||
public final List<LongSet> paths;
|
||||
public final long termId;
|
||||
|
||||
private QueryBranchWalker(long[] priorityOrder, List<LongSet> paths, long termId) {
|
||||
this.priorityOrder = priorityOrder;
|
||||
this.paths = paths;
|
||||
this.termId = termId;
|
||||
}
|
||||
|
||||
public boolean atEnd() {
|
||||
return priorityOrder.length == 0;
|
||||
}
|
||||
|
||||
/** Group the provided paths by the lowest termId they contain per the provided priorityOrder,
|
||||
* into a list of QueryBranchWalkers. This can be performed iteratively on the resultant QBW:s
|
||||
* to traverse the tree via the next() method.
|
||||
* <p></p>
|
||||
* The paths can be extracted through the {@link nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates CompiledQueryAggregates}
|
||||
* queriesAggregate method.
|
||||
*/
|
||||
public static List<QueryBranchWalker> create(long[] priorityOrder, List<LongSet> paths) {
|
||||
if (paths.isEmpty())
|
||||
return List.of();
|
||||
|
||||
List<QueryBranchWalker> ret = new ArrayList<>();
|
||||
List<LongSet> remainingPaths = new LinkedList<>(paths);
|
||||
remainingPaths.removeIf(LongSet::isEmpty);
|
||||
|
||||
List<LongSet> pathsForPrio = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < priorityOrder.length; i++) {
|
||||
long termId = priorityOrder[i];
|
||||
|
||||
var it = remainingPaths.iterator();
|
||||
|
||||
while (it.hasNext()) {
|
||||
var path = it.next();
|
||||
|
||||
if (path.contains(termId)) {
|
||||
// Remove the current termId from the path
|
||||
path.remove(termId);
|
||||
|
||||
// Add it to the set of paths associated with the termId
|
||||
pathsForPrio.add(path);
|
||||
|
||||
// Remove it from consideration
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
if (!pathsForPrio.isEmpty()) {
|
||||
long[] newPrios = keepRelevantPriorities(priorityOrder, pathsForPrio);
|
||||
ret.add(new QueryBranchWalker(newPrios, new ArrayList<>(pathsForPrio), termId));
|
||||
pathsForPrio.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// This happens if the priorityOrder array doesn't contain all items in the paths,
|
||||
// in practice only when an index doesn't contain all the search terms, so we can just
|
||||
// skip those paths
|
||||
if (!remainingPaths.isEmpty()) {
|
||||
logger.debug("Dropping: {}", remainingPaths);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** From the provided priorityOrder array, keep the elements that are present in any set in paths */
|
||||
private static long[] keepRelevantPriorities(long[] priorityOrder, List<LongSet> paths) {
|
||||
LongArrayList remainingPrios = new LongArrayList(paths.size());
|
||||
|
||||
// these sets are typically very small so array set is a good choice
|
||||
LongSet allElements = new LongArraySet(priorityOrder.length);
|
||||
for (var path : paths) {
|
||||
allElements.addAll(path);
|
||||
}
|
||||
|
||||
for (var p : priorityOrder) {
|
||||
if (allElements.contains(p))
|
||||
remainingPrios.add(p);
|
||||
}
|
||||
|
||||
return remainingPrios.elements();
|
||||
}
|
||||
|
||||
/** Convenience method that applies the create() method
|
||||
* to the priority order and paths associated with this instance */
|
||||
public List<QueryBranchWalker> next() {
|
||||
return create(priorityOrder, paths);
|
||||
}
|
||||
|
||||
}
|
@ -4,9 +4,6 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import it.unimi.dsi.fastutil.longs.*;
|
||||
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
||||
import nu.marginalia.index.query.filter.QueryFilterAllOf;
|
||||
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
import nu.marginalia.index.results.model.ids.DocMetadataList;
|
||||
import nu.marginalia.index.model.QueryParams;
|
||||
@ -168,48 +165,6 @@ public class StatefulIndex {
|
||||
.toList();
|
||||
}
|
||||
|
||||
/** Recursively create a filter step based on the QBW and its children */
|
||||
private QueryFilterStepIf createFilter(QueryBranchWalker walker, int depth) {
|
||||
|
||||
// Create a filter for the current termId
|
||||
final QueryFilterStepIf ownFilterCondition = ownFilterCondition(walker, depth);
|
||||
|
||||
var childSteps = walker.next();
|
||||
if (childSteps.isEmpty()) // no children, and so we're satisfied with just a single filter condition
|
||||
return ownFilterCondition;
|
||||
|
||||
// If there are children, we append the filter conditions for each child as an anyOf condition
|
||||
// to the current filter condition
|
||||
|
||||
List<QueryFilterStepIf> combinedFilters = new ArrayList<>();
|
||||
|
||||
for (var step : childSteps) {
|
||||
// Recursion will be limited to a fairly shallow stack depth due to how the queries are constructed.
|
||||
var childFilter = createFilter(step, depth+1);
|
||||
combinedFilters.add(new QueryFilterAllOf(ownFilterCondition, childFilter));
|
||||
}
|
||||
|
||||
// Flatten the filter conditions if there's only one branch
|
||||
if (combinedFilters.size() == 1)
|
||||
return combinedFilters.getFirst();
|
||||
else
|
||||
return new QueryFilterAnyOf(combinedFilters);
|
||||
}
|
||||
|
||||
/** Create a filter condition based on the termId associated with the QBW */
|
||||
private QueryFilterStepIf ownFilterCondition(QueryBranchWalker walker, int depth) {
|
||||
if (depth < 2) {
|
||||
// At shallow depths we prioritize terms that appear in the priority index,
|
||||
// to increase the odds we find "good" results before the execution timer runs out
|
||||
return new QueryFilterAnyOf(
|
||||
combinedIndexReader.hasWordPrio(walker.termId),
|
||||
combinedIndexReader.hasWordFull(walker.termId)
|
||||
);
|
||||
} else {
|
||||
return combinedIndexReader.hasWordFull(walker.termId);
|
||||
}
|
||||
}
|
||||
|
||||
private Predicate<LongSet> containsAll(long[] permitted) {
|
||||
LongSet permittedTerms = new LongOpenHashSet(permitted);
|
||||
return permittedTerms::containsAll;
|
||||
|
@ -1,59 +0,0 @@
|
||||
package nu.marginalia.index.index;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArraySet;
|
||||
import it.unimi.dsi.fastutil.longs.LongSet;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class QueryBranchWalkerTest {
|
||||
@Test
|
||||
public void testNoOverlap() {
|
||||
var paths = QueryBranchWalker.create(
|
||||
new long[] { 1, 2 },
|
||||
List.of(set(1), set(2))
|
||||
);
|
||||
assertEquals(2, paths.size());
|
||||
assertEquals(Set.of(1L, 2L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCond() {
|
||||
var paths = QueryBranchWalker.create(
|
||||
new long[] { 1, 2, 3, 4 },
|
||||
List.of(set(1,2,3), set(1,4,3))
|
||||
);
|
||||
assertEquals(1, paths.size());
|
||||
assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
||||
System.out.println(Arrays.toString(paths.getFirst().priorityOrder));
|
||||
assertArrayEquals(new long[] { 2, 3, 4 }, paths.getFirst().priorityOrder);
|
||||
|
||||
var next = paths.getFirst().next();
|
||||
assertEquals(2, next.size());
|
||||
assertEquals(Set.of(2L, 3L), next.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
||||
Map<Long, QueryBranchWalker> byId = next.stream().collect(Collectors.toMap(w -> w.termId, w->w));
|
||||
assertArrayEquals(new long[] { 3L }, byId.get(2L).priorityOrder );
|
||||
assertArrayEquals(new long[] { 4L }, byId.get(3L).priorityOrder );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoOverlapFirst() {
|
||||
var paths = QueryBranchWalker.create(
|
||||
new long[] { 1, 2, 3 },
|
||||
List.of(set(1, 2), set(1, 3))
|
||||
);
|
||||
assertEquals(1, paths.size());
|
||||
assertArrayEquals(new long[] { 2, 3 }, paths.getFirst().priorityOrder);
|
||||
assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
LongSet set(long... args) {
|
||||
return new LongArraySet(args);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user