mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Clean up index code
This commit is contained in:
parent
9415539b38
commit
9a045a0588
@ -6,12 +6,13 @@ import io.grpc.stub.StreamObserver;
|
||||
import io.prometheus.client.Counter;
|
||||
import io.prometheus.client.Gauge;
|
||||
import io.prometheus.client.Histogram;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.api.searchquery.*;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.*;
|
||||
import nu.marginalia.index.index.IndexQueryService;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.index.model.SearchParameters;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
@ -79,7 +80,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
private final StatefulIndex index;
|
||||
private final SearchSetsService searchSetsService;
|
||||
|
||||
private final IndexQueryService indexQueryService;
|
||||
private final IndexResultValuatorService resultValuator;
|
||||
|
||||
private final String nodeName;
|
||||
@ -90,7 +90,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
public IndexGrpcService(ServiceConfiguration serviceConfiguration,
|
||||
StatefulIndex index,
|
||||
SearchSetsService searchSetsService,
|
||||
IndexQueryService indexQueryService,
|
||||
IndexResultValuatorService resultValuator)
|
||||
{
|
||||
var nodeId = serviceConfiguration.node();
|
||||
@ -98,7 +97,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
this.index = index;
|
||||
this.searchSetsService = searchSetsService;
|
||||
this.resultValuator = resultValuator;
|
||||
this.indexQueryService = indexQueryService;
|
||||
}
|
||||
|
||||
// GRPC endpoint
|
||||
@ -222,13 +220,14 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
}
|
||||
|
||||
/** This class is responsible for executing a search query. It uses a thread pool to
|
||||
* execute the subqueries in parallel, and then uses another thread pool to rank the
|
||||
* results in parallel. The results are then combined into a bounded priority queue,
|
||||
* and finally the best results are returned.
|
||||
* execute the subqueries and their valuation in parallel. The results are then combined
|
||||
* into a bounded priority queue, and finally the best results are returned.
|
||||
*/
|
||||
private class QueryExecution {
|
||||
private static final Executor workerPool = Executors.newWorkStealingPool(indexValuationThreads*4);
|
||||
|
||||
/** The queue where the results from the index lookup threads are placed,
|
||||
* pending ranking by the result ranker threads */
|
||||
private final ArrayBlockingQueue<CombinedDocIdList> resultCandidateQueue
|
||||
= new ArrayBlockingQueue<>(8);
|
||||
|
||||
@ -291,7 +290,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
|
||||
/** This class is responsible for executing a subquery and adding the results to the
|
||||
* resultCandidateQueue, which depending on the state of the valuator threads may
|
||||
* or may not block*/
|
||||
* or may not block */
|
||||
class IndexLookup implements Runnable {
|
||||
private final IndexQuery query;
|
||||
private final IndexSearchBudget budget;
|
||||
@ -306,11 +305,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
indexQueryService.evaluateSubquery(
|
||||
query,
|
||||
budget,
|
||||
this::drain
|
||||
);
|
||||
executeSearch();
|
||||
}
|
||||
finally {
|
||||
synchronized (remainingIndexTasks) {
|
||||
@ -321,7 +316,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
}
|
||||
}
|
||||
|
||||
private void drain(CombinedDocIdList resultIds) {
|
||||
private void executeSearch() {
|
||||
final LongArrayList results = new LongArrayList(512);
|
||||
|
||||
// These queries are different indices for one subquery
|
||||
final LongQueryBuffer buffer = new LongQueryBuffer(512);
|
||||
|
||||
while (query.hasMore() && budget.hasTimeLeft())
|
||||
{
|
||||
buffer.reset();
|
||||
query.getMoreResults(buffer);
|
||||
|
||||
results.addElements(0, buffer.data, 0, buffer.end);
|
||||
|
||||
if (results.size() < 512) {
|
||||
enqueueResults(new CombinedDocIdList(results));
|
||||
results.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.isEmpty()) {
|
||||
enqueueResults(new CombinedDocIdList(results));
|
||||
}
|
||||
}
|
||||
|
||||
private void enqueueResults(CombinedDocIdList resultIds) {
|
||||
long remainingTime = budget.timeLeft();
|
||||
|
||||
try {
|
||||
@ -353,30 +372,9 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
while (parameters.budget.timeLeft() > 0) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
CombinedDocIdList resultIds = resultCandidateQueue.poll(
|
||||
Math.clamp(parameters.budget.timeLeft(), 1, 5),
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
if (resultIds == null) {
|
||||
if (remainingIndexTasks.get() == 0
|
||||
&& resultCandidateQueue.isEmpty())
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
stallTime.addAndGet(System.currentTimeMillis() - start);
|
||||
|
||||
var bestResults = resultValuator.rankResults(parameters, rankingContext, resultIds);
|
||||
|
||||
resultHeap.addAll(bestResults);
|
||||
}
|
||||
while (parameters.budget.timeLeft() > 0 && execute());
|
||||
}
|
||||
catch (Exception e) {
|
||||
catch (InterruptedException e) {
|
||||
logger.warn("Interrupted while waiting to poll resultIds from queue", e);
|
||||
}
|
||||
finally {
|
||||
@ -386,6 +384,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean execute() throws InterruptedException {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
// Do a relatively short poll to ensure we terminate in a timely manner
|
||||
// in the event all work is done
|
||||
final long pollTime = Math.clamp(parameters.budget.timeLeft(), 1, 5);
|
||||
CombinedDocIdList resultIds = resultCandidateQueue.poll(pollTime, TimeUnit.MILLISECONDS);
|
||||
|
||||
if (resultIds == null) {
|
||||
// check if we are done and can terminate
|
||||
if (remainingIndexTasks.get() == 0 && resultCandidateQueue.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
stallTime.addAndGet(System.currentTimeMillis() - start);
|
||||
|
||||
resultHeap.addAll(
|
||||
resultValuator.rankResults(parameters, rankingContext, resultIds)
|
||||
);
|
||||
}
|
||||
|
||||
return true; // keep going
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,18 @@ import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/** A priority queue for search results. This class is not thread-safe,
|
||||
* in general, except for concurrent use of the addAll method.
|
||||
* <p></p>
|
||||
* The class implements a subset of the Collection interface, and
|
||||
* is intended to be used as a priority queue for search results,
|
||||
* with a maximum size.
|
||||
* <p></p>
|
||||
* Since the expected use case is to add a large number of items
|
||||
* and then iterate over the items, the class is optimized for
|
||||
* this scenario, and does not implement other mutating methods
|
||||
* than addAll().
|
||||
*/
|
||||
public class ResultPriorityQueue implements Iterable<SearchResultItem>,
|
||||
Collection<SearchResultItem> {
|
||||
private final int limit;
|
||||
@ -34,16 +46,12 @@ public class ResultPriorityQueue implements Iterable<SearchResultItem>,
|
||||
|
||||
@Override
|
||||
public boolean add(SearchResultItem searchResultItem) {
|
||||
throw new UnsupportedOperationException("Use addAll instead ya dingus");
|
||||
throw new UnsupportedOperationException("Use addAll instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(Object o) {
|
||||
if (o instanceof SearchResultItem sri) {
|
||||
idsInSet.remove(sri.getDocumentId());
|
||||
return idsInSet.remove(sri.getDocumentId());
|
||||
}
|
||||
throw new IllegalArgumentException("Object is not a SearchResultItem");
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,17 +85,18 @@ public class ResultPriorityQueue implements Iterable<SearchResultItem>,
|
||||
|
||||
@Override
|
||||
public boolean removeAll(@NotNull Collection<?> c) {
|
||||
return backingList.removeAll(c);
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean retainAll(@NotNull Collection<?> c) {
|
||||
return backingList.retainAll(c);
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
|
||||
backingList.clear();
|
||||
idsInSet.clear();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
|
@ -5,7 +5,6 @@ import nu.marginalia.index.forward.ForwardIndexReader;
|
||||
import nu.marginalia.index.model.QueryParams;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexQueryBuilder;
|
||||
import nu.marginalia.index.query.IndexQueryPriority;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
@ -16,7 +15,6 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/** A reader for the combined forward and reverse indexes */
|
||||
@ -42,22 +40,15 @@ public class CombinedIndexReader {
|
||||
|
||||
|
||||
/** Creates a query builder for terms in the priority index */
|
||||
public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority,
|
||||
long wordId,
|
||||
int fetchSizeMultiplier) {
|
||||
return newQueryBuilder(new IndexQuery(
|
||||
List.of(reverseIndexPriorityReader.documents(wordId)),
|
||||
priority,
|
||||
fetchSizeMultiplier))
|
||||
public IndexQueryBuilder findPriorityWord(long wordId) {
|
||||
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
|
||||
.withSourceTerms(wordId);
|
||||
}
|
||||
|
||||
/** Creates a query builder for terms in the full index */
|
||||
public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) {
|
||||
public IndexQueryBuilder findFullWord(long wordId) {
|
||||
return newQueryBuilder(
|
||||
new IndexQuery(List.of(reverseIndexFullReader.documents(wordId)),
|
||||
priority,
|
||||
fetchSizeMultiplier))
|
||||
new IndexQuery(reverseIndexFullReader.documents(wordId)))
|
||||
.withSourceTerms(wordId);
|
||||
}
|
||||
|
||||
|
@ -1,64 +0,0 @@
|
||||
package nu.marginalia.index.index;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
import nu.marginalia.index.model.QueryParams;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
|
||||
import java.util.function.Consumer;
|
||||
|
||||
@Singleton
|
||||
public class IndexQueryService {
|
||||
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(IndexQueryService.class);
|
||||
private final StatefulIndex index;
|
||||
|
||||
@Inject
|
||||
public IndexQueryService(StatefulIndex index) {
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
/** Execute subqueries and return a list of document ids. The index is queried for each subquery,
|
||||
* at different priorty depths until timeout is reached or the results are all visited.
|
||||
* Then the results are combined.
|
||||
* */
|
||||
public void evaluateSubquery(IndexQuery query,
|
||||
IndexSearchBudget timeout,
|
||||
Consumer<CombinedDocIdList> drain)
|
||||
{
|
||||
final LongArrayList results = new LongArrayList(512);
|
||||
|
||||
// These queries are different indices for one subquery
|
||||
final LongQueryBuffer buffer = new LongQueryBuffer(512);
|
||||
|
||||
while (query.hasMore() && timeout.hasTimeLeft())
|
||||
{
|
||||
buffer.reset();
|
||||
query.getMoreResults(buffer);
|
||||
|
||||
results.addElements(0, buffer.data, 0, buffer.end);
|
||||
|
||||
if (results.size() < 512) {
|
||||
drain.accept(new CombinedDocIdList(results));
|
||||
results.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.isEmpty()) {
|
||||
drain.accept(new CombinedDocIdList(results));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -8,8 +8,6 @@ import nu.marginalia.index.model.QueryParams;
|
||||
import nu.marginalia.index.IndexFactory;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
import nu.marginalia.index.query.*;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepFromPredicate;
|
||||
import nu.marginalia.index.results.model.ids.TermIdList;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
@ -22,7 +20,6 @@ import java.util.List;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import java.util.function.LongPredicate;
|
||||
|
||||
/** This class delegates SearchIndexReader and deals with the stateful nature of the index,
|
||||
* i.e. it may be possible to reconstruct the index and load a new set of data.
|
||||
@ -122,19 +119,13 @@ public class StatefulIndex {
|
||||
List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
|
||||
List<IndexQuery> queries = new ArrayList<>(10);
|
||||
|
||||
// Fetch more results than specified for short queries, as the query itself is cheap and the
|
||||
// priority index may contain a considerable amount of less interesting results
|
||||
final int fetchSizeMultiplier;
|
||||
if (orderedIncludes.length == 1) fetchSizeMultiplier = 4;
|
||||
else fetchSizeMultiplier = 1;
|
||||
|
||||
// To ensure that good results are processed first, create query heads for the priority index that filter for terms
|
||||
// that contain pairs of two search terms
|
||||
// To ensure that good results are discovered, create separate query heads for the priority index that
|
||||
// filter for terms that contain pairs of two search terms
|
||||
if (orderedIncludesPrio.length > 1) {
|
||||
for (int i = 0; i + 1 < orderedIncludesPrio.length; i++) {
|
||||
for (int j = i + 1; j < orderedIncludesPrio.length; j++) {
|
||||
var entrySource = combinedIndexReader
|
||||
.findPriorityWord(IndexQueryPriority.BEST, orderedIncludesPrio[i], fetchSizeMultiplier)
|
||||
.findPriorityWord(orderedIncludesPrio[i])
|
||||
.alsoPrio(orderedIncludesPrio[j]);
|
||||
queryHeads.add(entrySource);
|
||||
}
|
||||
@ -143,18 +134,20 @@ public class StatefulIndex {
|
||||
|
||||
// Next consider entries that appear only once in the priority index
|
||||
for (var wordId : orderedIncludesPrio) {
|
||||
queryHeads.add(combinedIndexReader.findPriorityWord(IndexQueryPriority.GOOD, wordId, fetchSizeMultiplier));
|
||||
queryHeads.add(combinedIndexReader.findPriorityWord(wordId));
|
||||
}
|
||||
|
||||
// Finally consider terms in the full index, but only do this for sufficiently long queries
|
||||
// as short queries tend to be too underspecified to produce anything other than CPU warmth
|
||||
queryHeads.add(combinedIndexReader.findFullWord(IndexQueryPriority.FALLBACK, orderedIncludes[0], fetchSizeMultiplier));
|
||||
// Finally consider terms in the full index
|
||||
queryHeads.add(combinedIndexReader.findFullWord(orderedIncludes[0]));
|
||||
|
||||
for (var query : queryHeads) {
|
||||
if (query == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// Note that we can add all includes as filters, even though
|
||||
// they may not be present in the query head, as the query builder
|
||||
// will ignore redundant include filters:
|
||||
for (long orderedInclude : orderedIncludes) {
|
||||
query = query.alsoFull(orderedInclude);
|
||||
}
|
||||
@ -163,7 +156,7 @@ public class StatefulIndex {
|
||||
query = query.notFull(term);
|
||||
}
|
||||
|
||||
// Run these last, as they'll worst-case cause as many page faults as there are
|
||||
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
|
||||
// items in the buffer
|
||||
queries.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build());
|
||||
}
|
||||
|
@ -4,23 +4,95 @@ import nu.marginalia.index.searchset.SearchSet;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
/** IndexQueryParams is a set of parameters for a query.
|
||||
*
|
||||
* @param qualityLimit The quality limit.
|
||||
* @param year The year limit.
|
||||
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
|
||||
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
|
||||
* @param searchSet The search set. Limits the search to a set of domains.
|
||||
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
|
||||
* the keywords to appear in the title, or in the domain.
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* IndexQueryParams is a set of parameters for a query.
|
||||
*/
|
||||
public record QueryParams(SpecificationLimit qualityLimit,
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
SearchSet searchSet,
|
||||
QueryStrategy queryStrategy
|
||||
)
|
||||
{
|
||||
public final class QueryParams {
|
||||
private final SpecificationLimit qualityLimit;
|
||||
private final SpecificationLimit year;
|
||||
private final SpecificationLimit size;
|
||||
private final SpecificationLimit rank;
|
||||
private final SearchSet searchSet;
|
||||
private final QueryStrategy queryStrategy;
|
||||
|
||||
/**
|
||||
* @param qualityLimit The quality limit.
|
||||
* @param year The year limit.
|
||||
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
|
||||
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
|
||||
* @param searchSet The search set. Limits the search to a set of domains.
|
||||
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
|
||||
* the keywords to appear in the title, or in the domain.
|
||||
*/
|
||||
public QueryParams(SpecificationLimit qualityLimit,
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
SearchSet searchSet,
|
||||
QueryStrategy queryStrategy
|
||||
) {
|
||||
this.qualityLimit = qualityLimit;
|
||||
this.year = year;
|
||||
this.size = size;
|
||||
this.rank = rank;
|
||||
this.searchSet = searchSet;
|
||||
this.queryStrategy = queryStrategy;
|
||||
}
|
||||
|
||||
public SpecificationLimit qualityLimit() {
|
||||
return qualityLimit;
|
||||
}
|
||||
|
||||
public SpecificationLimit year() {
|
||||
return year;
|
||||
}
|
||||
|
||||
public SpecificationLimit size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public SpecificationLimit rank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
public SearchSet searchSet() {
|
||||
return searchSet;
|
||||
}
|
||||
|
||||
public QueryStrategy queryStrategy() {
|
||||
return queryStrategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this) return true;
|
||||
if (obj == null || obj.getClass() != this.getClass()) return false;
|
||||
var that = (QueryParams) obj;
|
||||
return Objects.equals(this.qualityLimit, that.qualityLimit) &&
|
||||
Objects.equals(this.year, that.year) &&
|
||||
Objects.equals(this.size, that.size) &&
|
||||
Objects.equals(this.rank, that.rank) &&
|
||||
Objects.equals(this.searchSet, that.searchSet) &&
|
||||
Objects.equals(this.queryStrategy, that.queryStrategy);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(qualityLimit, year, size, rank, searchSet, queryStrategy);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "QueryParams[" +
|
||||
"qualityLimit=" + qualityLimit + ", " +
|
||||
"year=" + year + ", " +
|
||||
"size=" + size + ", " +
|
||||
"rank=" + rank + ", " +
|
||||
"searchSet=" + searchSet + ", " +
|
||||
"queryStrategy=" + queryStrategy + ']';
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
package nu.marginalia.index.model;
|
||||
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import nu.marginalia.api.searchquery.IndexProtobufCodec;
|
||||
import nu.marginalia.api.searchquery.RpcIndexQuery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.IndexProtobufCodec;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.searchset.SearchSet;
|
||||
@ -85,20 +82,8 @@ public class SearchParameters {
|
||||
rankingParams = IndexProtobufCodec.convertRankingParameterss(request.getParameters());
|
||||
}
|
||||
|
||||
public boolean hasTimeLeft() {
|
||||
return budget.hasTimeLeft();
|
||||
}
|
||||
|
||||
public long getDataCost() {
|
||||
return dataCost;
|
||||
}
|
||||
|
||||
private static class CachedObjects {
|
||||
private static final ThreadLocal<TLongHashSet> consideredCache = ThreadLocal.withInitial(() -> new TLongHashSet(4096));
|
||||
private static TLongHashSet getConsideredUrlsMap() {
|
||||
var ret = consideredCache.get();
|
||||
ret.clear();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,21 +8,33 @@ import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static nu.marginalia.index.model.SearchTermsUtil.getWordId;
|
||||
|
||||
public record SearchTerms(
|
||||
LongList includes,
|
||||
LongList excludes,
|
||||
LongList priority,
|
||||
List<LongList> coherences
|
||||
)
|
||||
{
|
||||
public final class SearchTerms {
|
||||
private final LongList includes;
|
||||
private final LongList excludes;
|
||||
private final LongList priority;
|
||||
private final List<LongList> coherences;
|
||||
|
||||
public SearchTerms(
|
||||
LongList includes,
|
||||
LongList excludes,
|
||||
LongList priority,
|
||||
List<LongList> coherences
|
||||
) {
|
||||
this.includes = includes;
|
||||
this.excludes = excludes;
|
||||
this.priority = priority;
|
||||
this.coherences = coherences;
|
||||
}
|
||||
|
||||
public SearchTerms(SearchSubquery subquery) {
|
||||
this(new LongArrayList(),
|
||||
new LongArrayList(),
|
||||
new LongArrayList(),
|
||||
new ArrayList<>());
|
||||
new LongArrayList(),
|
||||
new LongArrayList(),
|
||||
new ArrayList<>());
|
||||
|
||||
for (var word : subquery.searchTermsInclude) {
|
||||
includes.add(getWordId(word));
|
||||
@ -67,4 +79,46 @@ public record SearchTerms(
|
||||
public int size() {
|
||||
return includes.size() + excludes.size() + priority.size();
|
||||
}
|
||||
|
||||
public LongList includes() {
|
||||
return includes;
|
||||
}
|
||||
|
||||
public LongList excludes() {
|
||||
return excludes;
|
||||
}
|
||||
|
||||
public LongList priority() {
|
||||
return priority;
|
||||
}
|
||||
|
||||
public List<LongList> coherences() {
|
||||
return coherences;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this) return true;
|
||||
if (obj == null || obj.getClass() != this.getClass()) return false;
|
||||
var that = (SearchTerms) obj;
|
||||
return Objects.equals(this.includes, that.includes) &&
|
||||
Objects.equals(this.excludes, that.excludes) &&
|
||||
Objects.equals(this.priority, that.priority) &&
|
||||
Objects.equals(this.coherences, that.coherences);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(includes, excludes, priority, coherences);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SearchTerms[" +
|
||||
"includes=" + includes + ", " +
|
||||
"excludes=" + excludes + ", " +
|
||||
"priority=" + priority + ", " +
|
||||
"coherences=" + coherences + ']';
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentI
|
||||
public class IndexMetadataService {
|
||||
private final StatefulIndex index;
|
||||
|
||||
|
||||
@Inject
|
||||
public IndexMetadataService(StatefulIndex index) {
|
||||
this.index = index;
|
||||
|
@ -47,18 +47,12 @@ public class IndexResultValuatorService {
|
||||
ResultRankingContext rankingContext,
|
||||
CombinedDocIdList resultIds)
|
||||
{
|
||||
final var evaluator = new IndexResultValuationContext(metadataService,
|
||||
resultValuator,
|
||||
resultIds,
|
||||
statefulIndex,
|
||||
rankingContext,
|
||||
params.subqueries,
|
||||
params.queryParams);
|
||||
final var evaluator = createValuationContext(params, rankingContext, resultIds);
|
||||
|
||||
List<SearchResultItem> results = new ArrayList<>(resultIds.size());
|
||||
|
||||
for (long docId : resultIds.array()) {
|
||||
var score = evaluator.calculatePreliminaryScore(docId);
|
||||
for (long id : resultIds.array()) {
|
||||
var score = evaluator.calculatePreliminaryScore(id);
|
||||
if (score != null) {
|
||||
results.add(score);
|
||||
}
|
||||
@ -67,6 +61,19 @@ public class IndexResultValuatorService {
|
||||
return results;
|
||||
}
|
||||
|
||||
private IndexResultValuationContext createValuationContext(SearchParameters params,
|
||||
ResultRankingContext rankingContext,
|
||||
CombinedDocIdList resultIds)
|
||||
{
|
||||
return new IndexResultValuationContext(metadataService,
|
||||
resultValuator,
|
||||
resultIds,
|
||||
statefulIndex,
|
||||
rankingContext,
|
||||
params.subqueries,
|
||||
params.queryParams);
|
||||
}
|
||||
|
||||
|
||||
public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params,
|
||||
ResultRankingContext rankingContext,
|
||||
|
@ -1,11 +1,17 @@
|
||||
package nu.marginalia.index.results.model.ids;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongIterators;
|
||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
/** A list of document ids, with their ranking bits still remaining.
|
||||
*
|
||||
* @see nu.marginalia.index.results.model.ids.DocIdList
|
||||
* @see nu.marginalia.model.id.UrlIdCodec
|
||||
* */
|
||||
public final class CombinedDocIdList {
|
||||
private final long[] data;
|
||||
|
||||
@ -48,5 +54,6 @@ public final class CombinedDocIdList {
|
||||
public void sort() {
|
||||
Arrays.sort(data);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,11 @@ import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
/** A list of document ids, with their ranking bits removed.
|
||||
*
|
||||
* @see nu.marginalia.index.results.model.ids.CombinedDocIdList
|
||||
* @see nu.marginalia.model.id.UrlIdCodec
|
||||
* */
|
||||
public final class DocIdList {
|
||||
private final long[] array;
|
||||
|
||||
|
@ -19,29 +19,15 @@ public class IndexQuery {
|
||||
private final List<EntrySource> sources;
|
||||
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
|
||||
|
||||
public final IndexQueryPriority queryPriority;
|
||||
public final int fetchSizeMultiplier;
|
||||
|
||||
public IndexQuery(EntrySource... sources) {
|
||||
this(List.of(sources), IndexQueryPriority.BEST, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier.
|
||||
*
|
||||
* @param sources List of EntrySource objects representing the sources to query from
|
||||
* @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping
|
||||
* @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go
|
||||
*/
|
||||
public IndexQuery(List<EntrySource> sources,
|
||||
IndexQueryPriority priority,
|
||||
int fetchSizeMultiplier)
|
||||
public IndexQuery(List<EntrySource> sources)
|
||||
{
|
||||
this.sources = sources;
|
||||
this.queryPriority = priority;
|
||||
this.fetchSizeMultiplier = fetchSizeMultiplier;
|
||||
}
|
||||
|
||||
public IndexQuery(EntrySource... sources)
|
||||
{
|
||||
this.sources = List.of(sources);
|
||||
}
|
||||
/** Adds a filter to the query. The filter will be applied to the results
|
||||
* after they are read from the sources.
|
||||
*
|
||||
|
@ -1,14 +0,0 @@
|
||||
package nu.marginalia.index.query;
|
||||
|
||||
/** Designates the presumptive value of an IndexQuery.
|
||||
*/
|
||||
public enum IndexQueryPriority {
|
||||
/** This is likely to produce highly relevant results */
|
||||
BEST,
|
||||
|
||||
/** This may produce relevant results */
|
||||
GOOD,
|
||||
|
||||
/** This is a fallback query, only execute if no higher prioritized query returned any results */
|
||||
FALLBACK
|
||||
}
|
@ -6,6 +6,9 @@ It exposes an API for querying the index, and contains the logic
|
||||
for ranking search results. It does not parse the query, that is
|
||||
the responsibility of the [search-query](../functions/search-query) module.
|
||||
|
||||
The central class of the index subsystem is the [IndexGrpcService](java/nu/marginalia/index/IndexGrpcService.java) class,
|
||||
which is a gRPC service that exposes the index to the rest of the system.
|
||||
|
||||
## Indexes
|
||||
|
||||
There are two indexes with accompanying tools for constructing them.
|
||||
|
Loading…
Reference in New Issue
Block a user