mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00
(index) Remove dead code
Since the performance fix in 3359f72239
had a huge positive impact without reducing result quality, it's possible to remove the QueryBranchWalker and associated code.
This commit is contained in:
parent
3359f72239
commit
7fa3e86e64
@ -35,24 +35,13 @@ public class CombinedIndexReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
|
public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
|
||||||
return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query);
|
return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
public QueryFilterStepIf hasWordFull(long termId) {
|
public QueryFilterStepIf hasWordFull(long termId) {
|
||||||
return reverseIndexFullReader.also(termId);
|
return reverseIndexFullReader.also(termId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public QueryFilterStepIf hasWordPrio(long termId) {
|
|
||||||
return reverseIndexPriorityReader.also(termId);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Creates a query builder for terms in the priority index */
|
|
||||||
public IndexQueryBuilder findPriorityWord(long wordId) {
|
|
||||||
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
|
|
||||||
.withSourceTerms(wordId);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Creates a query builder for terms in the full index */
|
/** Creates a query builder for terms in the full index */
|
||||||
public IndexQueryBuilder findFullWord(long wordId) {
|
public IndexQueryBuilder findFullWord(long wordId) {
|
||||||
return newQueryBuilder(
|
return newQueryBuilder(
|
||||||
|
@ -11,7 +11,6 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
|||||||
public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||||
private final IndexQuery query;
|
private final IndexQuery query;
|
||||||
private final ReverseIndexReader reverseIndexFullReader;
|
private final ReverseIndexReader reverseIndexFullReader;
|
||||||
private final ReverseIndexReader reverseIndexPrioReader;
|
|
||||||
|
|
||||||
/* Keep track of already added include terms to avoid redundant checks.
|
/* Keep track of already added include terms to avoid redundant checks.
|
||||||
*
|
*
|
||||||
@ -22,12 +21,10 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
|||||||
private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
|
private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
|
||||||
|
|
||||||
IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
|
IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader,
|
||||||
ReverseIndexReader reverseIndexPrioReader,
|
|
||||||
IndexQuery query)
|
IndexQuery query)
|
||||||
{
|
{
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.reverseIndexFullReader = reverseIndexFullReader;
|
this.reverseIndexFullReader = reverseIndexFullReader;
|
||||||
this.reverseIndexPrioReader = reverseIndexPrioReader;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public IndexQueryBuilder withSourceTerms(long... sourceTerms) {
|
public IndexQueryBuilder withSourceTerms(long... sourceTerms) {
|
||||||
|
@ -1,108 +0,0 @@
|
|||||||
package nu.marginalia.index.index;
|
|
||||||
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongArraySet;
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongSet;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/** Helper class for index query construction */
|
|
||||||
public class QueryBranchWalker {
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(QueryBranchWalker.class);
|
|
||||||
public final long[] priorityOrder;
|
|
||||||
public final List<LongSet> paths;
|
|
||||||
public final long termId;
|
|
||||||
|
|
||||||
private QueryBranchWalker(long[] priorityOrder, List<LongSet> paths, long termId) {
|
|
||||||
this.priorityOrder = priorityOrder;
|
|
||||||
this.paths = paths;
|
|
||||||
this.termId = termId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean atEnd() {
|
|
||||||
return priorityOrder.length == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Group the provided paths by the lowest termId they contain per the provided priorityOrder,
|
|
||||||
* into a list of QueryBranchWalkers. This can be performed iteratively on the resultant QBW:s
|
|
||||||
* to traverse the tree via the next() method.
|
|
||||||
* <p></p>
|
|
||||||
* The paths can be extracted through the {@link nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates CompiledQueryAggregates}
|
|
||||||
* queriesAggregate method.
|
|
||||||
*/
|
|
||||||
public static List<QueryBranchWalker> create(long[] priorityOrder, List<LongSet> paths) {
|
|
||||||
if (paths.isEmpty())
|
|
||||||
return List.of();
|
|
||||||
|
|
||||||
List<QueryBranchWalker> ret = new ArrayList<>();
|
|
||||||
List<LongSet> remainingPaths = new LinkedList<>(paths);
|
|
||||||
remainingPaths.removeIf(LongSet::isEmpty);
|
|
||||||
|
|
||||||
List<LongSet> pathsForPrio = new ArrayList<>();
|
|
||||||
|
|
||||||
for (int i = 0; i < priorityOrder.length; i++) {
|
|
||||||
long termId = priorityOrder[i];
|
|
||||||
|
|
||||||
var it = remainingPaths.iterator();
|
|
||||||
|
|
||||||
while (it.hasNext()) {
|
|
||||||
var path = it.next();
|
|
||||||
|
|
||||||
if (path.contains(termId)) {
|
|
||||||
// Remove the current termId from the path
|
|
||||||
path.remove(termId);
|
|
||||||
|
|
||||||
// Add it to the set of paths associated with the termId
|
|
||||||
pathsForPrio.add(path);
|
|
||||||
|
|
||||||
// Remove it from consideration
|
|
||||||
it.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!pathsForPrio.isEmpty()) {
|
|
||||||
long[] newPrios = keepRelevantPriorities(priorityOrder, pathsForPrio);
|
|
||||||
ret.add(new QueryBranchWalker(newPrios, new ArrayList<>(pathsForPrio), termId));
|
|
||||||
pathsForPrio.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This happens if the priorityOrder array doesn't contain all items in the paths,
|
|
||||||
// in practice only when an index doesn't contain all the search terms, so we can just
|
|
||||||
// skip those paths
|
|
||||||
if (!remainingPaths.isEmpty()) {
|
|
||||||
logger.debug("Dropping: {}", remainingPaths);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** From the provided priorityOrder array, keep the elements that are present in any set in paths */
|
|
||||||
private static long[] keepRelevantPriorities(long[] priorityOrder, List<LongSet> paths) {
|
|
||||||
LongArrayList remainingPrios = new LongArrayList(paths.size());
|
|
||||||
|
|
||||||
// these sets are typically very small so array set is a good choice
|
|
||||||
LongSet allElements = new LongArraySet(priorityOrder.length);
|
|
||||||
for (var path : paths) {
|
|
||||||
allElements.addAll(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (var p : priorityOrder) {
|
|
||||||
if (allElements.contains(p))
|
|
||||||
remainingPrios.add(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
return remainingPrios.elements();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Convenience method that applies the create() method
|
|
||||||
* to the priority order and paths associated with this instance */
|
|
||||||
public List<QueryBranchWalker> next() {
|
|
||||||
return create(priorityOrder, paths);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -4,9 +4,6 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import it.unimi.dsi.fastutil.longs.*;
|
import it.unimi.dsi.fastutil.longs.*;
|
||||||
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
||||||
import nu.marginalia.index.query.filter.QueryFilterAllOf;
|
|
||||||
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
|
|
||||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
|
||||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||||
import nu.marginalia.index.results.model.ids.DocMetadataList;
|
import nu.marginalia.index.results.model.ids.DocMetadataList;
|
||||||
import nu.marginalia.index.model.QueryParams;
|
import nu.marginalia.index.model.QueryParams;
|
||||||
@ -168,48 +165,6 @@ public class StatefulIndex {
|
|||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Recursively create a filter step based on the QBW and its children */
|
|
||||||
private QueryFilterStepIf createFilter(QueryBranchWalker walker, int depth) {
|
|
||||||
|
|
||||||
// Create a filter for the current termId
|
|
||||||
final QueryFilterStepIf ownFilterCondition = ownFilterCondition(walker, depth);
|
|
||||||
|
|
||||||
var childSteps = walker.next();
|
|
||||||
if (childSteps.isEmpty()) // no children, and so we're satisfied with just a single filter condition
|
|
||||||
return ownFilterCondition;
|
|
||||||
|
|
||||||
// If there are children, we append the filter conditions for each child as an anyOf condition
|
|
||||||
// to the current filter condition
|
|
||||||
|
|
||||||
List<QueryFilterStepIf> combinedFilters = new ArrayList<>();
|
|
||||||
|
|
||||||
for (var step : childSteps) {
|
|
||||||
// Recursion will be limited to a fairly shallow stack depth due to how the queries are constructed.
|
|
||||||
var childFilter = createFilter(step, depth+1);
|
|
||||||
combinedFilters.add(new QueryFilterAllOf(ownFilterCondition, childFilter));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flatten the filter conditions if there's only one branch
|
|
||||||
if (combinedFilters.size() == 1)
|
|
||||||
return combinedFilters.getFirst();
|
|
||||||
else
|
|
||||||
return new QueryFilterAnyOf(combinedFilters);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Create a filter condition based on the termId associated with the QBW */
|
|
||||||
private QueryFilterStepIf ownFilterCondition(QueryBranchWalker walker, int depth) {
|
|
||||||
if (depth < 2) {
|
|
||||||
// At shallow depths we prioritize terms that appear in the priority index,
|
|
||||||
// to increase the odds we find "good" results before the execution timer runs out
|
|
||||||
return new QueryFilterAnyOf(
|
|
||||||
combinedIndexReader.hasWordPrio(walker.termId),
|
|
||||||
combinedIndexReader.hasWordFull(walker.termId)
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return combinedIndexReader.hasWordFull(walker.termId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Predicate<LongSet> containsAll(long[] permitted) {
|
private Predicate<LongSet> containsAll(long[] permitted) {
|
||||||
LongSet permittedTerms = new LongOpenHashSet(permitted);
|
LongSet permittedTerms = new LongOpenHashSet(permitted);
|
||||||
return permittedTerms::containsAll;
|
return permittedTerms::containsAll;
|
||||||
|
@ -1,59 +0,0 @@
|
|||||||
package nu.marginalia.index.index;
|
|
||||||
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongArraySet;
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongSet;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
class QueryBranchWalkerTest {
|
|
||||||
@Test
|
|
||||||
public void testNoOverlap() {
|
|
||||||
var paths = QueryBranchWalker.create(
|
|
||||||
new long[] { 1, 2 },
|
|
||||||
List.of(set(1), set(2))
|
|
||||||
);
|
|
||||||
assertEquals(2, paths.size());
|
|
||||||
assertEquals(Set.of(1L, 2L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCond() {
|
|
||||||
var paths = QueryBranchWalker.create(
|
|
||||||
new long[] { 1, 2, 3, 4 },
|
|
||||||
List.of(set(1,2,3), set(1,4,3))
|
|
||||||
);
|
|
||||||
assertEquals(1, paths.size());
|
|
||||||
assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
|
||||||
System.out.println(Arrays.toString(paths.getFirst().priorityOrder));
|
|
||||||
assertArrayEquals(new long[] { 2, 3, 4 }, paths.getFirst().priorityOrder);
|
|
||||||
|
|
||||||
var next = paths.getFirst().next();
|
|
||||||
assertEquals(2, next.size());
|
|
||||||
assertEquals(Set.of(2L, 3L), next.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
|
||||||
Map<Long, QueryBranchWalker> byId = next.stream().collect(Collectors.toMap(w -> w.termId, w->w));
|
|
||||||
assertArrayEquals(new long[] { 3L }, byId.get(2L).priorityOrder );
|
|
||||||
assertArrayEquals(new long[] { 4L }, byId.get(3L).priorityOrder );
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testNoOverlapFirst() {
|
|
||||||
var paths = QueryBranchWalker.create(
|
|
||||||
new long[] { 1, 2, 3 },
|
|
||||||
List.of(set(1, 2), set(1, 3))
|
|
||||||
);
|
|
||||||
assertEquals(1, paths.size());
|
|
||||||
assertArrayEquals(new long[] { 2, 3 }, paths.getFirst().priorityOrder);
|
|
||||||
assertEquals(Set.of(1L), paths.stream().map(path -> path.termId).collect(Collectors.toSet()));
|
|
||||||
}
|
|
||||||
|
|
||||||
LongSet set(long... args) {
|
|
||||||
return new LongArraySet(args);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user