diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java index 957dec5c..a47c4684 100644 --- a/code/index/java/nu/marginalia/index/IndexGrpcService.java +++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java @@ -6,12 +6,13 @@ import io.grpc.stub.StreamObserver; import io.prometheus.client.Counter; import io.prometheus.client.Gauge; import io.prometheus.client.Histogram; +import it.unimi.dsi.fastutil.longs.LongArrayList; import lombok.SneakyThrows; import nu.marginalia.api.searchquery.*; import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSubquery; import nu.marginalia.api.searchquery.model.results.*; -import nu.marginalia.index.index.IndexQueryService; +import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.index.model.SearchParameters; import nu.marginalia.index.model.SearchTerms; @@ -79,7 +80,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { private final StatefulIndex index; private final SearchSetsService searchSetsService; - private final IndexQueryService indexQueryService; private final IndexResultValuatorService resultValuator; private final String nodeName; @@ -90,7 +90,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { public IndexGrpcService(ServiceConfiguration serviceConfiguration, StatefulIndex index, SearchSetsService searchSetsService, - IndexQueryService indexQueryService, IndexResultValuatorService resultValuator) { var nodeId = serviceConfiguration.node(); @@ -98,7 +97,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { this.index = index; this.searchSetsService = searchSetsService; this.resultValuator = resultValuator; - this.indexQueryService = indexQueryService; } // GRPC endpoint @@ -222,13 +220,14 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { } /** This class is responsible for executing a search query. It uses a thread pool to - * execute the subqueries in parallel, and then uses another thread pool to rank the - * results in parallel. The results are then combined into a bounded priority queue, - * and finally the best results are returned. + * execute the subqueries and their valuation in parallel. The results are then combined + * into a bounded priority queue, and finally the best results are returned. */ private class QueryExecution { private static final Executor workerPool = Executors.newWorkStealingPool(indexValuationThreads*4); + /** The queue where the results from the index lookup threads are placed, + * pending ranking by the result ranker threads */ private final ArrayBlockingQueue resultCandidateQueue = new ArrayBlockingQueue<>(8); @@ -291,7 +290,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { /** This class is responsible for executing a subquery and adding the results to the * resultCandidateQueue, which depending on the state of the valuator threads may - * or may not block*/ + * or may not block */ class IndexLookup implements Runnable { private final IndexQuery query; private final IndexSearchBudget budget; @@ -306,11 +305,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { public void run() { try { - indexQueryService.evaluateSubquery( - query, - budget, - this::drain - ); + executeSearch(); } finally { synchronized (remainingIndexTasks) { @@ -321,7 +316,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { } } - private void drain(CombinedDocIdList resultIds) { + private void executeSearch() { + final LongArrayList results = new LongArrayList(512); + + // These queries are different indices for one subquery + final LongQueryBuffer buffer = new LongQueryBuffer(512); + + while (query.hasMore() && budget.hasTimeLeft()) + { + buffer.reset(); + query.getMoreResults(buffer); + + results.addElements(0, buffer.data, 0, buffer.end); + + if (results.size() < 512) { + enqueueResults(new CombinedDocIdList(results)); + results.clear(); + } + } + + if (!results.isEmpty()) { + enqueueResults(new CombinedDocIdList(results)); + } + } + + private void enqueueResults(CombinedDocIdList resultIds) { long remainingTime = budget.timeLeft(); try { @@ -353,30 +372,9 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { public void run() { try { - while (parameters.budget.timeLeft() > 0) { - - long start = System.currentTimeMillis(); - - CombinedDocIdList resultIds = resultCandidateQueue.poll( - Math.clamp(parameters.budget.timeLeft(), 1, 5), - TimeUnit.MILLISECONDS); - - if (resultIds == null) { - if (remainingIndexTasks.get() == 0 - && resultCandidateQueue.isEmpty()) - break; - else - continue; - } - - stallTime.addAndGet(System.currentTimeMillis() - start); - - var bestResults = resultValuator.rankResults(parameters, rankingContext, resultIds); - - resultHeap.addAll(bestResults); - } + while (parameters.budget.timeLeft() > 0 && execute()); } - catch (Exception e) { + catch (InterruptedException e) { logger.warn("Interrupted while waiting to poll resultIds from queue", e); } finally { @@ -386,6 +384,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { } } } + + private boolean execute() throws InterruptedException { + long start = System.currentTimeMillis(); + + // Do a relatively short poll to ensure we terminate in a timely manner + // in the event all work is done + final long pollTime = Math.clamp(parameters.budget.timeLeft(), 1, 5); + CombinedDocIdList resultIds = resultCandidateQueue.poll(pollTime, TimeUnit.MILLISECONDS); + + if (resultIds == null) { + // check if we are done and can terminate + if (remainingIndexTasks.get() == 0 && resultCandidateQueue.isEmpty()) { + return false; + } + } + else { + stallTime.addAndGet(System.currentTimeMillis() - start); + + resultHeap.addAll( + resultValuator.rankResults(parameters, rankingContext, resultIds) + ); + } + + return true; // keep going + } } } diff --git a/code/index/java/nu/marginalia/index/ResultPriorityQueue.java b/code/index/java/nu/marginalia/index/ResultPriorityQueue.java index d84c43d0..804b7e9c 100644 --- a/code/index/java/nu/marginalia/index/ResultPriorityQueue.java +++ b/code/index/java/nu/marginalia/index/ResultPriorityQueue.java @@ -6,6 +6,18 @@ import org.jetbrains.annotations.NotNull; import java.util.*; +/** A priority queue for search results. This class is not thread-safe, + * in general, except for concurrent use of the addAll method. + *

+ * The class implements a subset of the Collection interface, and + * is intended to be used as a priority queue for search results, + * with a maximum size. + *

+ * Since the expected use case is to add a large number of items + * and then iterate over the items, the class is optimized for + * this scenario, and does not implement other mutating methods + * than addAll(). + */ public class ResultPriorityQueue implements Iterable, Collection { private final int limit; @@ -34,16 +46,12 @@ public class ResultPriorityQueue implements Iterable, @Override public boolean add(SearchResultItem searchResultItem) { - throw new UnsupportedOperationException("Use addAll instead ya dingus"); + throw new UnsupportedOperationException("Use addAll instead"); } @Override public boolean remove(Object o) { - if (o instanceof SearchResultItem sri) { - idsInSet.remove(sri.getDocumentId()); - return idsInSet.remove(sri.getDocumentId()); - } - throw new IllegalArgumentException("Object is not a SearchResultItem"); + throw new UnsupportedOperationException(); } @Override @@ -77,17 +85,18 @@ public class ResultPriorityQueue implements Iterable, @Override public boolean removeAll(@NotNull Collection c) { - return backingList.removeAll(c); + throw new UnsupportedOperationException(); } @Override public boolean retainAll(@NotNull Collection c) { - return backingList.retainAll(c); + throw new UnsupportedOperationException(); } @Override public void clear() { - + backingList.clear(); + idsInSet.clear(); } public int size() { diff --git a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java index 01a94362..ea78739c 100644 --- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java +++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java @@ -5,7 +5,6 @@ import nu.marginalia.index.forward.ForwardIndexReader; import nu.marginalia.index.model.QueryParams; import nu.marginalia.index.query.IndexQuery; import nu.marginalia.index.query.IndexQueryBuilder; -import nu.marginalia.index.query.IndexQueryPriority; import nu.marginalia.index.query.filter.QueryFilterStepIf; import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.index.results.model.ids.CombinedDocIdList; @@ -16,7 +15,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Duration; -import java.util.List; import java.util.concurrent.TimeUnit; /** A reader for the combined forward and reverse indexes */ @@ -42,22 +40,15 @@ public class CombinedIndexReader { /** Creates a query builder for terms in the priority index */ - public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, - long wordId, - int fetchSizeMultiplier) { - return newQueryBuilder(new IndexQuery( - List.of(reverseIndexPriorityReader.documents(wordId)), - priority, - fetchSizeMultiplier)) + public IndexQueryBuilder findPriorityWord(long wordId) { + return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId))) .withSourceTerms(wordId); } /** Creates a query builder for terms in the full index */ - public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) { + public IndexQueryBuilder findFullWord(long wordId) { return newQueryBuilder( - new IndexQuery(List.of(reverseIndexFullReader.documents(wordId)), - priority, - fetchSizeMultiplier)) + new IndexQuery(reverseIndexFullReader.documents(wordId))) .withSourceTerms(wordId); } diff --git a/code/index/java/nu/marginalia/index/index/IndexQueryService.java b/code/index/java/nu/marginalia/index/index/IndexQueryService.java deleted file mode 100644 index 72a13910..00000000 --- a/code/index/java/nu/marginalia/index/index/IndexQueryService.java +++ /dev/null @@ -1,64 +0,0 @@ -package nu.marginalia.index.index; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import nu.marginalia.api.searchquery.model.query.SearchSubquery; -import nu.marginalia.array.buffer.LongQueryBuffer; -import nu.marginalia.index.model.QueryParams; -import nu.marginalia.index.model.SearchTerms; -import nu.marginalia.index.query.IndexQuery; -import nu.marginalia.index.query.IndexSearchBudget; -import nu.marginalia.index.results.model.ids.CombinedDocIdList; -import org.roaringbitmap.longlong.Roaring64Bitmap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.Marker; -import org.slf4j.MarkerFactory; - -import java.util.function.Consumer; - -@Singleton -public class IndexQueryService { - private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); - - private static final Logger logger = LoggerFactory.getLogger(IndexQueryService.class); - private final StatefulIndex index; - - @Inject - public IndexQueryService(StatefulIndex index) { - this.index = index; - } - - /** Execute subqueries and return a list of document ids. The index is queried for each subquery, - * at different priorty depths until timeout is reached or the results are all visited. - * Then the results are combined. - * */ - public void evaluateSubquery(IndexQuery query, - IndexSearchBudget timeout, - Consumer drain) - { - final LongArrayList results = new LongArrayList(512); - - // These queries are different indices for one subquery - final LongQueryBuffer buffer = new LongQueryBuffer(512); - - while (query.hasMore() && timeout.hasTimeLeft()) - { - buffer.reset(); - query.getMoreResults(buffer); - - results.addElements(0, buffer.data, 0, buffer.end); - - if (results.size() < 512) { - drain.accept(new CombinedDocIdList(results)); - results.clear(); - } - } - - if (!results.isEmpty()) { - drain.accept(new CombinedDocIdList(results)); - } - } - -} diff --git a/code/index/java/nu/marginalia/index/index/StatefulIndex.java b/code/index/java/nu/marginalia/index/index/StatefulIndex.java index ae2a8f6e..a49e740e 100644 --- a/code/index/java/nu/marginalia/index/index/StatefulIndex.java +++ b/code/index/java/nu/marginalia/index/index/StatefulIndex.java @@ -8,8 +8,6 @@ import nu.marginalia.index.model.QueryParams; import nu.marginalia.index.IndexFactory; import nu.marginalia.index.model.SearchTerms; import nu.marginalia.index.query.*; -import nu.marginalia.index.query.filter.QueryFilterStepFromPredicate; -import nu.marginalia.index.results.model.ids.TermIdList; import nu.marginalia.service.control.ServiceEventLog; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -22,7 +20,6 @@ import java.util.List; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.function.LongPredicate; /** This class delegates SearchIndexReader and deals with the stateful nature of the index, * i.e. it may be possible to reconstruct the index and load a new set of data. @@ -122,19 +119,13 @@ public class StatefulIndex { List queryHeads = new ArrayList<>(10); List queries = new ArrayList<>(10); - // Fetch more results than specified for short queries, as the query itself is cheap and the - // priority index may contain a considerable amount of less interesting results - final int fetchSizeMultiplier; - if (orderedIncludes.length == 1) fetchSizeMultiplier = 4; - else fetchSizeMultiplier = 1; - - // To ensure that good results are processed first, create query heads for the priority index that filter for terms - // that contain pairs of two search terms + // To ensure that good results are discovered, create separate query heads for the priority index that + // filter for terms that contain pairs of two search terms if (orderedIncludesPrio.length > 1) { for (int i = 0; i + 1 < orderedIncludesPrio.length; i++) { for (int j = i + 1; j < orderedIncludesPrio.length; j++) { var entrySource = combinedIndexReader - .findPriorityWord(IndexQueryPriority.BEST, orderedIncludesPrio[i], fetchSizeMultiplier) + .findPriorityWord(orderedIncludesPrio[i]) .alsoPrio(orderedIncludesPrio[j]); queryHeads.add(entrySource); } @@ -143,18 +134,20 @@ public class StatefulIndex { // Next consider entries that appear only once in the priority index for (var wordId : orderedIncludesPrio) { - queryHeads.add(combinedIndexReader.findPriorityWord(IndexQueryPriority.GOOD, wordId, fetchSizeMultiplier)); + queryHeads.add(combinedIndexReader.findPriorityWord(wordId)); } - // Finally consider terms in the full index, but only do this for sufficiently long queries - // as short queries tend to be too underspecified to produce anything other than CPU warmth - queryHeads.add(combinedIndexReader.findFullWord(IndexQueryPriority.FALLBACK, orderedIncludes[0], fetchSizeMultiplier)); + // Finally consider terms in the full index + queryHeads.add(combinedIndexReader.findFullWord(orderedIncludes[0])); for (var query : queryHeads) { if (query == null) { return Collections.emptyList(); } + // Note that we can add all includes as filters, even though + // they may not be present in the query head, as the query builder + // will ignore redundant include filters: for (long orderedInclude : orderedIncludes) { query = query.alsoFull(orderedInclude); } @@ -163,7 +156,7 @@ public class StatefulIndex { query = query.notFull(term); } - // Run these last, as they'll worst-case cause as many page faults as there are + // Run these filter steps last, as they'll worst-case cause as many page faults as there are // items in the buffer queries.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build()); } diff --git a/code/index/java/nu/marginalia/index/model/QueryParams.java b/code/index/java/nu/marginalia/index/model/QueryParams.java index 56e40551..af189451 100644 --- a/code/index/java/nu/marginalia/index/model/QueryParams.java +++ b/code/index/java/nu/marginalia/index/model/QueryParams.java @@ -4,23 +4,95 @@ import nu.marginalia.index.searchset.SearchSet; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; -/** IndexQueryParams is a set of parameters for a query. - * - * @param qualityLimit The quality limit. - * @param year The year limit. - * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. - * @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria. - * @param searchSet The search set. Limits the search to a set of domains. - * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring - * the keywords to appear in the title, or in the domain. +import java.util.Objects; + +/** + * IndexQueryParams is a set of parameters for a query. */ -public record QueryParams(SpecificationLimit qualityLimit, - SpecificationLimit year, - SpecificationLimit size, - SpecificationLimit rank, - SearchSet searchSet, - QueryStrategy queryStrategy - ) -{ +public final class QueryParams { + private final SpecificationLimit qualityLimit; + private final SpecificationLimit year; + private final SpecificationLimit size; + private final SpecificationLimit rank; + private final SearchSet searchSet; + private final QueryStrategy queryStrategy; + + /** + * @param qualityLimit The quality limit. + * @param year The year limit. + * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. + * @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria. + * @param searchSet The search set. Limits the search to a set of domains. + * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring + * the keywords to appear in the title, or in the domain. + */ + public QueryParams(SpecificationLimit qualityLimit, + SpecificationLimit year, + SpecificationLimit size, + SpecificationLimit rank, + SearchSet searchSet, + QueryStrategy queryStrategy + ) { + this.qualityLimit = qualityLimit; + this.year = year; + this.size = size; + this.rank = rank; + this.searchSet = searchSet; + this.queryStrategy = queryStrategy; + } + + public SpecificationLimit qualityLimit() { + return qualityLimit; + } + + public SpecificationLimit year() { + return year; + } + + public SpecificationLimit size() { + return size; + } + + public SpecificationLimit rank() { + return rank; + } + + public SearchSet searchSet() { + return searchSet; + } + + public QueryStrategy queryStrategy() { + return queryStrategy; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (QueryParams) obj; + return Objects.equals(this.qualityLimit, that.qualityLimit) && + Objects.equals(this.year, that.year) && + Objects.equals(this.size, that.size) && + Objects.equals(this.rank, that.rank) && + Objects.equals(this.searchSet, that.searchSet) && + Objects.equals(this.queryStrategy, that.queryStrategy); + } + + @Override + public int hashCode() { + return Objects.hash(qualityLimit, year, size, rank, searchSet, queryStrategy); + } + + @Override + public String toString() { + return "QueryParams[" + + "qualityLimit=" + qualityLimit + ", " + + "year=" + year + ", " + + "size=" + size + ", " + + "rank=" + rank + ", " + + "searchSet=" + searchSet + ", " + + "queryStrategy=" + queryStrategy + ']'; + } + } diff --git a/code/index/java/nu/marginalia/index/model/SearchParameters.java b/code/index/java/nu/marginalia/index/model/SearchParameters.java index 0594bd68..7db25341 100644 --- a/code/index/java/nu/marginalia/index/model/SearchParameters.java +++ b/code/index/java/nu/marginalia/index/model/SearchParameters.java @@ -1,13 +1,10 @@ package nu.marginalia.index.model; -import gnu.trove.set.hash.TLongHashSet; +import nu.marginalia.api.searchquery.IndexProtobufCodec; import nu.marginalia.api.searchquery.RpcIndexQuery; import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSubquery; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; -import nu.marginalia.api.searchquery.IndexProtobufCodec; -import nu.marginalia.index.index.StatefulIndex; -import nu.marginalia.index.query.IndexQuery; import nu.marginalia.index.query.IndexSearchBudget; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.searchset.SearchSet; @@ -85,20 +82,8 @@ public class SearchParameters { rankingParams = IndexProtobufCodec.convertRankingParameterss(request.getParameters()); } - public boolean hasTimeLeft() { - return budget.hasTimeLeft(); - } - public long getDataCost() { return dataCost; } - private static class CachedObjects { - private static final ThreadLocal consideredCache = ThreadLocal.withInitial(() -> new TLongHashSet(4096)); - private static TLongHashSet getConsideredUrlsMap() { - var ret = consideredCache.get(); - ret.clear(); - return ret; - } - } } diff --git a/code/index/java/nu/marginalia/index/model/SearchTerms.java b/code/index/java/nu/marginalia/index/model/SearchTerms.java index dc62ae11..c32b1aa3 100644 --- a/code/index/java/nu/marginalia/index/model/SearchTerms.java +++ b/code/index/java/nu/marginalia/index/model/SearchTerms.java @@ -8,21 +8,33 @@ import nu.marginalia.api.searchquery.model.query.SearchSubquery; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import static nu.marginalia.index.model.SearchTermsUtil.getWordId; -public record SearchTerms( - LongList includes, - LongList excludes, - LongList priority, - List coherences - ) -{ +public final class SearchTerms { + private final LongList includes; + private final LongList excludes; + private final LongList priority; + private final List coherences; + + public SearchTerms( + LongList includes, + LongList excludes, + LongList priority, + List coherences + ) { + this.includes = includes; + this.excludes = excludes; + this.priority = priority; + this.coherences = coherences; + } + public SearchTerms(SearchSubquery subquery) { this(new LongArrayList(), - new LongArrayList(), - new LongArrayList(), - new ArrayList<>()); + new LongArrayList(), + new LongArrayList(), + new ArrayList<>()); for (var word : subquery.searchTermsInclude) { includes.add(getWordId(word)); @@ -67,4 +79,46 @@ public record SearchTerms( public int size() { return includes.size() + excludes.size() + priority.size(); } + + public LongList includes() { + return includes; + } + + public LongList excludes() { + return excludes; + } + + public LongList priority() { + return priority; + } + + public List coherences() { + return coherences; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (SearchTerms) obj; + return Objects.equals(this.includes, that.includes) && + Objects.equals(this.excludes, that.excludes) && + Objects.equals(this.priority, that.priority) && + Objects.equals(this.coherences, that.coherences); + } + + @Override + public int hashCode() { + return Objects.hash(includes, excludes, priority, coherences); + } + + @Override + public String toString() { + return "SearchTerms[" + + "includes=" + includes + ", " + + "excludes=" + excludes + ", " + + "priority=" + priority + ", " + + "coherences=" + coherences + ']'; + } + } diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java index d5356f53..1932a5a4 100644 --- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java +++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java @@ -22,7 +22,6 @@ import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentI public class IndexMetadataService { private final StatefulIndex index; - @Inject public IndexMetadataService(StatefulIndex index) { this.index = index; diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java index 9251a5d2..51e59c63 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java @@ -47,18 +47,12 @@ public class IndexResultValuatorService { ResultRankingContext rankingContext, CombinedDocIdList resultIds) { - final var evaluator = new IndexResultValuationContext(metadataService, - resultValuator, - resultIds, - statefulIndex, - rankingContext, - params.subqueries, - params.queryParams); + final var evaluator = createValuationContext(params, rankingContext, resultIds); List results = new ArrayList<>(resultIds.size()); - for (long docId : resultIds.array()) { - var score = evaluator.calculatePreliminaryScore(docId); + for (long id : resultIds.array()) { + var score = evaluator.calculatePreliminaryScore(id); if (score != null) { results.add(score); } @@ -67,6 +61,19 @@ public class IndexResultValuatorService { return results; } + private IndexResultValuationContext createValuationContext(SearchParameters params, + ResultRankingContext rankingContext, + CombinedDocIdList resultIds) + { + return new IndexResultValuationContext(metadataService, + resultValuator, + resultIds, + statefulIndex, + rankingContext, + params.subqueries, + params.queryParams); + } + public List selectBestResults(SearchParameters params, ResultRankingContext rankingContext, diff --git a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java index bec6fb8e..17bd17a1 100644 --- a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java +++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java @@ -1,11 +1,17 @@ package nu.marginalia.index.results.model.ids; import it.unimi.dsi.fastutil.longs.LongArrayList; +import it.unimi.dsi.fastutil.longs.LongIterators; import org.roaringbitmap.longlong.Roaring64Bitmap; import java.util.Arrays; import java.util.stream.LongStream; +/** A list of document ids, with their ranking bits still remaining. + * + * @see nu.marginalia.index.results.model.ids.DocIdList + * @see nu.marginalia.model.id.UrlIdCodec + * */ public final class CombinedDocIdList { private final long[] data; @@ -48,5 +54,6 @@ public final class CombinedDocIdList { public void sort() { Arrays.sort(data); } + } diff --git a/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java index beb07c00..970cdd8f 100644 --- a/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java +++ b/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java @@ -6,6 +6,11 @@ import java.util.Arrays; import java.util.Objects; import java.util.stream.LongStream; +/** A list of document ids, with their ranking bits removed. + * + * @see nu.marginalia.index.results.model.ids.CombinedDocIdList + * @see nu.marginalia.model.id.UrlIdCodec + * */ public final class DocIdList { private final long[] array; diff --git a/code/index/query/java/nu/marginalia/index/query/IndexQuery.java b/code/index/query/java/nu/marginalia/index/query/IndexQuery.java index 734f08dd..651a3f24 100644 --- a/code/index/query/java/nu/marginalia/index/query/IndexQuery.java +++ b/code/index/query/java/nu/marginalia/index/query/IndexQuery.java @@ -19,29 +19,15 @@ public class IndexQuery { private final List sources; private final List inclusionFilter = new ArrayList<>(10); - public final IndexQueryPriority queryPriority; - public final int fetchSizeMultiplier; - - public IndexQuery(EntrySource... sources) { - this(List.of(sources), IndexQueryPriority.BEST, 1); - } - - /** - * Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier. - * - * @param sources List of EntrySource objects representing the sources to query from - * @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping - * @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go - */ - public IndexQuery(List sources, - IndexQueryPriority priority, - int fetchSizeMultiplier) + public IndexQuery(List sources) { this.sources = sources; - this.queryPriority = priority; - this.fetchSizeMultiplier = fetchSizeMultiplier; } + public IndexQuery(EntrySource... sources) + { + this.sources = List.of(sources); + } /** Adds a filter to the query. The filter will be applied to the results * after they are read from the sources. * diff --git a/code/index/query/java/nu/marginalia/index/query/IndexQueryPriority.java b/code/index/query/java/nu/marginalia/index/query/IndexQueryPriority.java deleted file mode 100644 index b2f37350..00000000 --- a/code/index/query/java/nu/marginalia/index/query/IndexQueryPriority.java +++ /dev/null @@ -1,14 +0,0 @@ -package nu.marginalia.index.query; - -/** Designates the presumptive value of an IndexQuery. - */ -public enum IndexQueryPriority { - /** This is likely to produce highly relevant results */ - BEST, - - /** This may produce relevant results */ - GOOD, - - /** This is a fallback query, only execute if no higher prioritized query returned any results */ - FALLBACK -} diff --git a/code/index/readme.md b/code/index/readme.md index 2254c2a2..6a819e0f 100644 --- a/code/index/readme.md +++ b/code/index/readme.md @@ -6,6 +6,9 @@ It exposes an API for querying the index, and contains the logic for ranking search results. It does not parse the query, that is the responsibility of the [search-query](../functions/search-query) module. +The central class of the index subsystem is the [IndexGrpcService](java/nu/marginalia/index/IndexGrpcService.java) class, +which is a gRPC service that exposes the index to the rest of the system. + ## Indexes There are two indexes with accompanying tools for constructing them.