(index) Clean up index code

This commit is contained in:
Viktor Lofgren 2024-02-28 13:09:47 +01:00
parent 9415539b38
commit 9a045a0588
15 changed files with 282 additions and 226 deletions

View File

@ -6,12 +6,13 @@ import io.grpc.stub.StreamObserver;
import io.prometheus.client.Counter; import io.prometheus.client.Counter;
import io.prometheus.client.Gauge; import io.prometheus.client.Gauge;
import io.prometheus.client.Histogram; import io.prometheus.client.Histogram;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.api.searchquery.*; import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery; import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.*; import nu.marginalia.api.searchquery.model.results.*;
import nu.marginalia.index.index.IndexQueryService; import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.model.SearchParameters; import nu.marginalia.index.model.SearchParameters;
import nu.marginalia.index.model.SearchTerms; import nu.marginalia.index.model.SearchTerms;
@ -79,7 +80,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
private final StatefulIndex index; private final StatefulIndex index;
private final SearchSetsService searchSetsService; private final SearchSetsService searchSetsService;
private final IndexQueryService indexQueryService;
private final IndexResultValuatorService resultValuator; private final IndexResultValuatorService resultValuator;
private final String nodeName; private final String nodeName;
@ -90,7 +90,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
public IndexGrpcService(ServiceConfiguration serviceConfiguration, public IndexGrpcService(ServiceConfiguration serviceConfiguration,
StatefulIndex index, StatefulIndex index,
SearchSetsService searchSetsService, SearchSetsService searchSetsService,
IndexQueryService indexQueryService,
IndexResultValuatorService resultValuator) IndexResultValuatorService resultValuator)
{ {
var nodeId = serviceConfiguration.node(); var nodeId = serviceConfiguration.node();
@ -98,7 +97,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
this.index = index; this.index = index;
this.searchSetsService = searchSetsService; this.searchSetsService = searchSetsService;
this.resultValuator = resultValuator; this.resultValuator = resultValuator;
this.indexQueryService = indexQueryService;
} }
// GRPC endpoint // GRPC endpoint
@ -222,13 +220,14 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
} }
/** This class is responsible for executing a search query. It uses a thread pool to /** This class is responsible for executing a search query. It uses a thread pool to
* execute the subqueries in parallel, and then uses another thread pool to rank the * execute the subqueries and their valuation in parallel. The results are then combined
* results in parallel. The results are then combined into a bounded priority queue, * into a bounded priority queue, and finally the best results are returned.
* and finally the best results are returned.
*/ */
private class QueryExecution { private class QueryExecution {
private static final Executor workerPool = Executors.newWorkStealingPool(indexValuationThreads*4); private static final Executor workerPool = Executors.newWorkStealingPool(indexValuationThreads*4);
/** The queue where the results from the index lookup threads are placed,
* pending ranking by the result ranker threads */
private final ArrayBlockingQueue<CombinedDocIdList> resultCandidateQueue private final ArrayBlockingQueue<CombinedDocIdList> resultCandidateQueue
= new ArrayBlockingQueue<>(8); = new ArrayBlockingQueue<>(8);
@ -291,7 +290,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
/** This class is responsible for executing a subquery and adding the results to the /** This class is responsible for executing a subquery and adding the results to the
* resultCandidateQueue, which depending on the state of the valuator threads may * resultCandidateQueue, which depending on the state of the valuator threads may
* or may not block*/ * or may not block */
class IndexLookup implements Runnable { class IndexLookup implements Runnable {
private final IndexQuery query; private final IndexQuery query;
private final IndexSearchBudget budget; private final IndexSearchBudget budget;
@ -306,11 +305,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
public void run() { public void run() {
try { try {
indexQueryService.evaluateSubquery( executeSearch();
query,
budget,
this::drain
);
} }
finally { finally {
synchronized (remainingIndexTasks) { synchronized (remainingIndexTasks) {
@ -321,7 +316,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
} }
} }
private void drain(CombinedDocIdList resultIds) { private void executeSearch() {
final LongArrayList results = new LongArrayList(512);
// These queries are different indices for one subquery
final LongQueryBuffer buffer = new LongQueryBuffer(512);
while (query.hasMore() && budget.hasTimeLeft())
{
buffer.reset();
query.getMoreResults(buffer);
results.addElements(0, buffer.data, 0, buffer.end);
if (results.size() < 512) {
enqueueResults(new CombinedDocIdList(results));
results.clear();
}
}
if (!results.isEmpty()) {
enqueueResults(new CombinedDocIdList(results));
}
}
private void enqueueResults(CombinedDocIdList resultIds) {
long remainingTime = budget.timeLeft(); long remainingTime = budget.timeLeft();
try { try {
@ -353,30 +372,9 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
public void run() { public void run() {
try { try {
while (parameters.budget.timeLeft() > 0) { while (parameters.budget.timeLeft() > 0 && execute());
long start = System.currentTimeMillis();
CombinedDocIdList resultIds = resultCandidateQueue.poll(
Math.clamp(parameters.budget.timeLeft(), 1, 5),
TimeUnit.MILLISECONDS);
if (resultIds == null) {
if (remainingIndexTasks.get() == 0
&& resultCandidateQueue.isEmpty())
break;
else
continue;
} }
catch (InterruptedException e) {
stallTime.addAndGet(System.currentTimeMillis() - start);
var bestResults = resultValuator.rankResults(parameters, rankingContext, resultIds);
resultHeap.addAll(bestResults);
}
}
catch (Exception e) {
logger.warn("Interrupted while waiting to poll resultIds from queue", e); logger.warn("Interrupted while waiting to poll resultIds from queue", e);
} }
finally { finally {
@ -386,6 +384,31 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
} }
} }
} }
private boolean execute() throws InterruptedException {
long start = System.currentTimeMillis();
// Do a relatively short poll to ensure we terminate in a timely manner
// in the event all work is done
final long pollTime = Math.clamp(parameters.budget.timeLeft(), 1, 5);
CombinedDocIdList resultIds = resultCandidateQueue.poll(pollTime, TimeUnit.MILLISECONDS);
if (resultIds == null) {
// check if we are done and can terminate
if (remainingIndexTasks.get() == 0 && resultCandidateQueue.isEmpty()) {
return false;
}
}
else {
stallTime.addAndGet(System.currentTimeMillis() - start);
resultHeap.addAll(
resultValuator.rankResults(parameters, rankingContext, resultIds)
);
}
return true; // keep going
}
} }
} }

View File

@ -6,6 +6,18 @@ import org.jetbrains.annotations.NotNull;
import java.util.*; import java.util.*;
/** A priority queue for search results. This class is not thread-safe,
* in general, except for concurrent use of the addAll method.
* <p></p>
* The class implements a subset of the Collection interface, and
* is intended to be used as a priority queue for search results,
* with a maximum size.
* <p></p>
* Since the expected use case is to add a large number of items
* and then iterate over the items, the class is optimized for
* this scenario, and does not implement other mutating methods
* than addAll().
*/
public class ResultPriorityQueue implements Iterable<SearchResultItem>, public class ResultPriorityQueue implements Iterable<SearchResultItem>,
Collection<SearchResultItem> { Collection<SearchResultItem> {
private final int limit; private final int limit;
@ -34,16 +46,12 @@ public class ResultPriorityQueue implements Iterable<SearchResultItem>,
@Override @Override
public boolean add(SearchResultItem searchResultItem) { public boolean add(SearchResultItem searchResultItem) {
throw new UnsupportedOperationException("Use addAll instead ya dingus"); throw new UnsupportedOperationException("Use addAll instead");
} }
@Override @Override
public boolean remove(Object o) { public boolean remove(Object o) {
if (o instanceof SearchResultItem sri) { throw new UnsupportedOperationException();
idsInSet.remove(sri.getDocumentId());
return idsInSet.remove(sri.getDocumentId());
}
throw new IllegalArgumentException("Object is not a SearchResultItem");
} }
@Override @Override
@ -77,17 +85,18 @@ public class ResultPriorityQueue implements Iterable<SearchResultItem>,
@Override @Override
public boolean removeAll(@NotNull Collection<?> c) { public boolean removeAll(@NotNull Collection<?> c) {
return backingList.removeAll(c); throw new UnsupportedOperationException();
} }
@Override @Override
public boolean retainAll(@NotNull Collection<?> c) { public boolean retainAll(@NotNull Collection<?> c) {
return backingList.retainAll(c); throw new UnsupportedOperationException();
} }
@Override @Override
public void clear() { public void clear() {
backingList.clear();
idsInSet.clear();
} }
public int size() { public int size() {

View File

@ -5,7 +5,6 @@ import nu.marginalia.index.forward.ForwardIndexReader;
import nu.marginalia.index.model.QueryParams; import nu.marginalia.index.model.QueryParams;
import nu.marginalia.index.query.IndexQuery; import nu.marginalia.index.query.IndexQuery;
import nu.marginalia.index.query.IndexQueryBuilder; import nu.marginalia.index.query.IndexQueryBuilder;
import nu.marginalia.index.query.IndexQueryPriority;
import nu.marginalia.index.query.filter.QueryFilterStepIf; import nu.marginalia.index.query.filter.QueryFilterStepIf;
import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.index.query.limit.SpecificationLimitType;
import nu.marginalia.index.results.model.ids.CombinedDocIdList; import nu.marginalia.index.results.model.ids.CombinedDocIdList;
@ -16,7 +15,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.time.Duration; import java.time.Duration;
import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
/** A reader for the combined forward and reverse indexes */ /** A reader for the combined forward and reverse indexes */
@ -42,22 +40,15 @@ public class CombinedIndexReader {
/** Creates a query builder for terms in the priority index */ /** Creates a query builder for terms in the priority index */
public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, public IndexQueryBuilder findPriorityWord(long wordId) {
long wordId, return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
int fetchSizeMultiplier) {
return newQueryBuilder(new IndexQuery(
List.of(reverseIndexPriorityReader.documents(wordId)),
priority,
fetchSizeMultiplier))
.withSourceTerms(wordId); .withSourceTerms(wordId);
} }
/** Creates a query builder for terms in the full index */ /** Creates a query builder for terms in the full index */
public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) { public IndexQueryBuilder findFullWord(long wordId) {
return newQueryBuilder( return newQueryBuilder(
new IndexQuery(List.of(reverseIndexFullReader.documents(wordId)), new IndexQuery(reverseIndexFullReader.documents(wordId)))
priority,
fetchSizeMultiplier))
.withSourceTerms(wordId); .withSourceTerms(wordId);
} }

View File

@ -1,64 +0,0 @@
package nu.marginalia.index.index;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.index.model.QueryParams;
import nu.marginalia.index.model.SearchTerms;
import nu.marginalia.index.query.IndexQuery;
import nu.marginalia.index.query.IndexSearchBudget;
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
import org.roaringbitmap.longlong.Roaring64Bitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.util.function.Consumer;
@Singleton
public class IndexQueryService {
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
private static final Logger logger = LoggerFactory.getLogger(IndexQueryService.class);
private final StatefulIndex index;
@Inject
public IndexQueryService(StatefulIndex index) {
this.index = index;
}
/** Execute subqueries and return a list of document ids. The index is queried for each subquery,
* at different priorty depths until timeout is reached or the results are all visited.
* Then the results are combined.
* */
public void evaluateSubquery(IndexQuery query,
IndexSearchBudget timeout,
Consumer<CombinedDocIdList> drain)
{
final LongArrayList results = new LongArrayList(512);
// These queries are different indices for one subquery
final LongQueryBuffer buffer = new LongQueryBuffer(512);
while (query.hasMore() && timeout.hasTimeLeft())
{
buffer.reset();
query.getMoreResults(buffer);
results.addElements(0, buffer.data, 0, buffer.end);
if (results.size() < 512) {
drain.accept(new CombinedDocIdList(results));
results.clear();
}
}
if (!results.isEmpty()) {
drain.accept(new CombinedDocIdList(results));
}
}
}

View File

@ -8,8 +8,6 @@ import nu.marginalia.index.model.QueryParams;
import nu.marginalia.index.IndexFactory; import nu.marginalia.index.IndexFactory;
import nu.marginalia.index.model.SearchTerms; import nu.marginalia.index.model.SearchTerms;
import nu.marginalia.index.query.*; import nu.marginalia.index.query.*;
import nu.marginalia.index.query.filter.QueryFilterStepFromPredicate;
import nu.marginalia.index.results.model.ids.TermIdList;
import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceEventLog;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -22,7 +20,6 @@ import java.util.List;
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.LongPredicate;
/** This class delegates SearchIndexReader and deals with the stateful nature of the index, /** This class delegates SearchIndexReader and deals with the stateful nature of the index,
* i.e. it may be possible to reconstruct the index and load a new set of data. * i.e. it may be possible to reconstruct the index and load a new set of data.
@ -122,19 +119,13 @@ public class StatefulIndex {
List<IndexQueryBuilder> queryHeads = new ArrayList<>(10); List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
List<IndexQuery> queries = new ArrayList<>(10); List<IndexQuery> queries = new ArrayList<>(10);
// Fetch more results than specified for short queries, as the query itself is cheap and the // To ensure that good results are discovered, create separate query heads for the priority index that
// priority index may contain a considerable amount of less interesting results // filter for terms that contain pairs of two search terms
final int fetchSizeMultiplier;
if (orderedIncludes.length == 1) fetchSizeMultiplier = 4;
else fetchSizeMultiplier = 1;
// To ensure that good results are processed first, create query heads for the priority index that filter for terms
// that contain pairs of two search terms
if (orderedIncludesPrio.length > 1) { if (orderedIncludesPrio.length > 1) {
for (int i = 0; i + 1 < orderedIncludesPrio.length; i++) { for (int i = 0; i + 1 < orderedIncludesPrio.length; i++) {
for (int j = i + 1; j < orderedIncludesPrio.length; j++) { for (int j = i + 1; j < orderedIncludesPrio.length; j++) {
var entrySource = combinedIndexReader var entrySource = combinedIndexReader
.findPriorityWord(IndexQueryPriority.BEST, orderedIncludesPrio[i], fetchSizeMultiplier) .findPriorityWord(orderedIncludesPrio[i])
.alsoPrio(orderedIncludesPrio[j]); .alsoPrio(orderedIncludesPrio[j]);
queryHeads.add(entrySource); queryHeads.add(entrySource);
} }
@ -143,18 +134,20 @@ public class StatefulIndex {
// Next consider entries that appear only once in the priority index // Next consider entries that appear only once in the priority index
for (var wordId : orderedIncludesPrio) { for (var wordId : orderedIncludesPrio) {
queryHeads.add(combinedIndexReader.findPriorityWord(IndexQueryPriority.GOOD, wordId, fetchSizeMultiplier)); queryHeads.add(combinedIndexReader.findPriorityWord(wordId));
} }
// Finally consider terms in the full index, but only do this for sufficiently long queries // Finally consider terms in the full index
// as short queries tend to be too underspecified to produce anything other than CPU warmth queryHeads.add(combinedIndexReader.findFullWord(orderedIncludes[0]));
queryHeads.add(combinedIndexReader.findFullWord(IndexQueryPriority.FALLBACK, orderedIncludes[0], fetchSizeMultiplier));
for (var query : queryHeads) { for (var query : queryHeads) {
if (query == null) { if (query == null) {
return Collections.emptyList(); return Collections.emptyList();
} }
// Note that we can add all includes as filters, even though
// they may not be present in the query head, as the query builder
// will ignore redundant include filters:
for (long orderedInclude : orderedIncludes) { for (long orderedInclude : orderedIncludes) {
query = query.alsoFull(orderedInclude); query = query.alsoFull(orderedInclude);
} }
@ -163,7 +156,7 @@ public class StatefulIndex {
query = query.notFull(term); query = query.notFull(term);
} }
// Run these last, as they'll worst-case cause as many page faults as there are // Run these filter steps last, as they'll worst-case cause as many page faults as there are
// items in the buffer // items in the buffer
queries.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build()); queries.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build());
} }

View File

@ -4,8 +4,20 @@ import nu.marginalia.index.searchset.SearchSet;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimit;
/** IndexQueryParams is a set of parameters for a query. import java.util.Objects;
*
/**
* IndexQueryParams is a set of parameters for a query.
*/
public final class QueryParams {
private final SpecificationLimit qualityLimit;
private final SpecificationLimit year;
private final SpecificationLimit size;
private final SpecificationLimit rank;
private final SearchSet searchSet;
private final QueryStrategy queryStrategy;
/**
* @param qualityLimit The quality limit. * @param qualityLimit The quality limit.
* @param year The year limit. * @param year The year limit.
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
@ -14,13 +26,73 @@ import nu.marginalia.index.query.limit.SpecificationLimit;
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
* the keywords to appear in the title, or in the domain. * the keywords to appear in the title, or in the domain.
*/ */
public record QueryParams(SpecificationLimit qualityLimit, public QueryParams(SpecificationLimit qualityLimit,
SpecificationLimit year, SpecificationLimit year,
SpecificationLimit size, SpecificationLimit size,
SpecificationLimit rank, SpecificationLimit rank,
SearchSet searchSet, SearchSet searchSet,
QueryStrategy queryStrategy QueryStrategy queryStrategy
) ) {
{ this.qualityLimit = qualityLimit;
this.year = year;
this.size = size;
this.rank = rank;
this.searchSet = searchSet;
this.queryStrategy = queryStrategy;
}
public SpecificationLimit qualityLimit() {
return qualityLimit;
}
public SpecificationLimit year() {
return year;
}
public SpecificationLimit size() {
return size;
}
public SpecificationLimit rank() {
return rank;
}
public SearchSet searchSet() {
return searchSet;
}
public QueryStrategy queryStrategy() {
return queryStrategy;
}
@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (QueryParams) obj;
return Objects.equals(this.qualityLimit, that.qualityLimit) &&
Objects.equals(this.year, that.year) &&
Objects.equals(this.size, that.size) &&
Objects.equals(this.rank, that.rank) &&
Objects.equals(this.searchSet, that.searchSet) &&
Objects.equals(this.queryStrategy, that.queryStrategy);
}
@Override
public int hashCode() {
return Objects.hash(qualityLimit, year, size, rank, searchSet, queryStrategy);
}
@Override
public String toString() {
return "QueryParams[" +
"qualityLimit=" + qualityLimit + ", " +
"year=" + year + ", " +
"size=" + size + ", " +
"rank=" + rank + ", " +
"searchSet=" + searchSet + ", " +
"queryStrategy=" + queryStrategy + ']';
}
} }

View File

@ -1,13 +1,10 @@
package nu.marginalia.index.model; package nu.marginalia.index.model;
import gnu.trove.set.hash.TLongHashSet; import nu.marginalia.api.searchquery.IndexProtobufCodec;
import nu.marginalia.api.searchquery.RpcIndexQuery; import nu.marginalia.api.searchquery.RpcIndexQuery;
import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery; import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.IndexProtobufCodec;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.query.IndexQuery;
import nu.marginalia.index.query.IndexSearchBudget; import nu.marginalia.index.query.IndexSearchBudget;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.searchset.SearchSet; import nu.marginalia.index.searchset.SearchSet;
@ -85,20 +82,8 @@ public class SearchParameters {
rankingParams = IndexProtobufCodec.convertRankingParameterss(request.getParameters()); rankingParams = IndexProtobufCodec.convertRankingParameterss(request.getParameters());
} }
public boolean hasTimeLeft() {
return budget.hasTimeLeft();
}
public long getDataCost() { public long getDataCost() {
return dataCost; return dataCost;
} }
private static class CachedObjects {
private static final ThreadLocal<TLongHashSet> consideredCache = ThreadLocal.withInitial(() -> new TLongHashSet(4096));
private static TLongHashSet getConsideredUrlsMap() {
var ret = consideredCache.get();
ret.clear();
return ret;
}
}
} }

View File

@ -8,16 +8,28 @@ import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects;
import static nu.marginalia.index.model.SearchTermsUtil.getWordId; import static nu.marginalia.index.model.SearchTermsUtil.getWordId;
public record SearchTerms( public final class SearchTerms {
private final LongList includes;
private final LongList excludes;
private final LongList priority;
private final List<LongList> coherences;
public SearchTerms(
LongList includes, LongList includes,
LongList excludes, LongList excludes,
LongList priority, LongList priority,
List<LongList> coherences List<LongList> coherences
) ) {
{ this.includes = includes;
this.excludes = excludes;
this.priority = priority;
this.coherences = coherences;
}
public SearchTerms(SearchSubquery subquery) { public SearchTerms(SearchSubquery subquery) {
this(new LongArrayList(), this(new LongArrayList(),
new LongArrayList(), new LongArrayList(),
@ -67,4 +79,46 @@ public record SearchTerms(
public int size() { public int size() {
return includes.size() + excludes.size() + priority.size(); return includes.size() + excludes.size() + priority.size();
} }
public LongList includes() {
return includes;
}
public LongList excludes() {
return excludes;
}
public LongList priority() {
return priority;
}
public List<LongList> coherences() {
return coherences;
}
@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (SearchTerms) obj;
return Objects.equals(this.includes, that.includes) &&
Objects.equals(this.excludes, that.excludes) &&
Objects.equals(this.priority, that.priority) &&
Objects.equals(this.coherences, that.coherences);
}
@Override
public int hashCode() {
return Objects.hash(includes, excludes, priority, coherences);
}
@Override
public String toString() {
return "SearchTerms[" +
"includes=" + includes + ", " +
"excludes=" + excludes + ", " +
"priority=" + priority + ", " +
"coherences=" + coherences + ']';
}
} }

View File

@ -22,7 +22,6 @@ import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentI
public class IndexMetadataService { public class IndexMetadataService {
private final StatefulIndex index; private final StatefulIndex index;
@Inject @Inject
public IndexMetadataService(StatefulIndex index) { public IndexMetadataService(StatefulIndex index) {
this.index = index; this.index = index;

View File

@ -47,18 +47,12 @@ public class IndexResultValuatorService {
ResultRankingContext rankingContext, ResultRankingContext rankingContext,
CombinedDocIdList resultIds) CombinedDocIdList resultIds)
{ {
final var evaluator = new IndexResultValuationContext(metadataService, final var evaluator = createValuationContext(params, rankingContext, resultIds);
resultValuator,
resultIds,
statefulIndex,
rankingContext,
params.subqueries,
params.queryParams);
List<SearchResultItem> results = new ArrayList<>(resultIds.size()); List<SearchResultItem> results = new ArrayList<>(resultIds.size());
for (long docId : resultIds.array()) { for (long id : resultIds.array()) {
var score = evaluator.calculatePreliminaryScore(docId); var score = evaluator.calculatePreliminaryScore(id);
if (score != null) { if (score != null) {
results.add(score); results.add(score);
} }
@ -67,6 +61,19 @@ public class IndexResultValuatorService {
return results; return results;
} }
private IndexResultValuationContext createValuationContext(SearchParameters params,
ResultRankingContext rankingContext,
CombinedDocIdList resultIds)
{
return new IndexResultValuationContext(metadataService,
resultValuator,
resultIds,
statefulIndex,
rankingContext,
params.subqueries,
params.queryParams);
}
public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params, public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params,
ResultRankingContext rankingContext, ResultRankingContext rankingContext,

View File

@ -1,11 +1,17 @@
package nu.marginalia.index.results.model.ids; package nu.marginalia.index.results.model.ids;
import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongIterators;
import org.roaringbitmap.longlong.Roaring64Bitmap; import org.roaringbitmap.longlong.Roaring64Bitmap;
import java.util.Arrays; import java.util.Arrays;
import java.util.stream.LongStream; import java.util.stream.LongStream;
/** A list of document ids, with their ranking bits still remaining.
*
* @see nu.marginalia.index.results.model.ids.DocIdList
* @see nu.marginalia.model.id.UrlIdCodec
* */
public final class CombinedDocIdList { public final class CombinedDocIdList {
private final long[] data; private final long[] data;
@ -48,5 +54,6 @@ public final class CombinedDocIdList {
public void sort() { public void sort() {
Arrays.sort(data); Arrays.sort(data);
} }
} }

View File

@ -6,6 +6,11 @@ import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import java.util.stream.LongStream; import java.util.stream.LongStream;
/** A list of document ids, with their ranking bits removed.
*
* @see nu.marginalia.index.results.model.ids.CombinedDocIdList
* @see nu.marginalia.model.id.UrlIdCodec
* */
public final class DocIdList { public final class DocIdList {
private final long[] array; private final long[] array;

View File

@ -19,29 +19,15 @@ public class IndexQuery {
private final List<EntrySource> sources; private final List<EntrySource> sources;
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10); private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
public final IndexQueryPriority queryPriority; public IndexQuery(List<EntrySource> sources)
public final int fetchSizeMultiplier;
public IndexQuery(EntrySource... sources) {
this(List.of(sources), IndexQueryPriority.BEST, 1);
}
/**
* Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier.
*
* @param sources List of EntrySource objects representing the sources to query from
* @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping
* @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go
*/
public IndexQuery(List<EntrySource> sources,
IndexQueryPriority priority,
int fetchSizeMultiplier)
{ {
this.sources = sources; this.sources = sources;
this.queryPriority = priority;
this.fetchSizeMultiplier = fetchSizeMultiplier;
} }
public IndexQuery(EntrySource... sources)
{
this.sources = List.of(sources);
}
/** Adds a filter to the query. The filter will be applied to the results /** Adds a filter to the query. The filter will be applied to the results
* after they are read from the sources. * after they are read from the sources.
* *

View File

@ -1,14 +0,0 @@
package nu.marginalia.index.query;
/** Designates the presumptive value of an IndexQuery.
*/
public enum IndexQueryPriority {
/** This is likely to produce highly relevant results */
BEST,
/** This may produce relevant results */
GOOD,
/** This is a fallback query, only execute if no higher prioritized query returned any results */
FALLBACK
}

View File

@ -6,6 +6,9 @@ It exposes an API for querying the index, and contains the logic
for ranking search results. It does not parse the query, that is for ranking search results. It does not parse the query, that is
the responsibility of the [search-query](../functions/search-query) module. the responsibility of the [search-query](../functions/search-query) module.
The central class of the index subsystem is the [IndexGrpcService](java/nu/marginalia/index/IndexGrpcService.java) class,
which is a gRPC service that exposes the index to the rest of the system.
## Indexes ## Indexes
There are two indexes with accompanying tools for constructing them. There are two indexes with accompanying tools for constructing them.