mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(index) Refactor to reduce the level of indirection
This commit is contained in:
parent
daf2a8df54
commit
4fcd4a8197
@ -69,8 +69,8 @@ class ReversePreindexFinalizeTest {
|
|||||||
var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
|
var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
|
||||||
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
||||||
|
|
||||||
var docsHeader = BTreeReader.readHeader(docsArray, 0);
|
var docsHeader = new BTreeHeader(docsArray, 0);
|
||||||
var wordsHeader = BTreeReader.readHeader(wordsArray, 0);
|
var wordsHeader = new BTreeHeader(wordsArray, 0);
|
||||||
|
|
||||||
assertEquals(1, docsHeader.numEntries());
|
assertEquals(1, docsHeader.numEntries());
|
||||||
assertEquals(1, wordsHeader.numEntries());
|
assertEquals(1, wordsHeader.numEntries());
|
||||||
@ -107,7 +107,7 @@ class ReversePreindexFinalizeTest {
|
|||||||
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
||||||
|
|
||||||
|
|
||||||
var wordsHeader = BTreeReader.readHeader(wordsArray, 0);
|
var wordsHeader = new BTreeHeader(wordsArray, 0);
|
||||||
|
|
||||||
System.out.println(wordsHeader);
|
System.out.println(wordsHeader);
|
||||||
|
|
||||||
@ -123,14 +123,14 @@ class ReversePreindexFinalizeTest {
|
|||||||
|
|
||||||
BTreeHeader docsHeader;
|
BTreeHeader docsHeader;
|
||||||
|
|
||||||
docsHeader = BTreeReader.readHeader(docsArray, offset1);
|
docsHeader = new BTreeHeader(docsArray, offset1);
|
||||||
System.out.println(docsHeader);
|
System.out.println(docsHeader);
|
||||||
assertEquals(1, docsHeader.numEntries());
|
assertEquals(1, docsHeader.numEntries());
|
||||||
|
|
||||||
assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
|
assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
|
||||||
assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
|
assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
|
||||||
|
|
||||||
docsHeader = BTreeReader.readHeader(docsArray, offset2);
|
docsHeader = new BTreeHeader(docsArray, offset2);
|
||||||
System.out.println(docsHeader);
|
System.out.println(docsHeader);
|
||||||
assertEquals(1, docsHeader.numEntries());
|
assertEquals(1, docsHeader.numEntries());
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
.labelNames("node")
|
.labelNames("node")
|
||||||
.register();
|
.register();
|
||||||
|
|
||||||
private final StatefulIndex index;
|
private final StatefulIndex statefulIndex;
|
||||||
private final SearchSetsService searchSetsService;
|
private final SearchSetsService searchSetsService;
|
||||||
|
|
||||||
private final IndexResultValuatorService resultValuator;
|
private final IndexResultValuatorService resultValuator;
|
||||||
@ -89,13 +89,13 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexGrpcService(ServiceConfiguration serviceConfiguration,
|
public IndexGrpcService(ServiceConfiguration serviceConfiguration,
|
||||||
StatefulIndex index,
|
StatefulIndex statefulIndex,
|
||||||
SearchSetsService searchSetsService,
|
SearchSetsService searchSetsService,
|
||||||
IndexResultValuatorService resultValuator)
|
IndexResultValuatorService resultValuator)
|
||||||
{
|
{
|
||||||
var nodeId = serviceConfiguration.node();
|
var nodeId = serviceConfiguration.node();
|
||||||
this.nodeName = Integer.toString(nodeId);
|
this.nodeName = Integer.toString(nodeId);
|
||||||
this.index = index;
|
this.statefulIndex = statefulIndex;
|
||||||
this.searchSetsService = searchSetsService;
|
this.searchSetsService = searchSetsService;
|
||||||
this.resultValuator = resultValuator;
|
this.resultValuator = resultValuator;
|
||||||
}
|
}
|
||||||
@ -207,7 +207,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
|
|
||||||
private SearchResultSet executeSearch(SearchParameters params) throws SQLException, InterruptedException {
|
private SearchResultSet executeSearch(SearchParameters params) throws SQLException, InterruptedException {
|
||||||
|
|
||||||
if (!index.isLoaded()) {
|
if (!statefulIndex.isLoaded()) {
|
||||||
// Short-circuit if the index is not loaded, as we trivially know that there can be no results
|
// Short-circuit if the index is not loaded, as we trivially know that there can be no results
|
||||||
return new SearchResultSet(List.of());
|
return new SearchResultSet(List.of());
|
||||||
}
|
}
|
||||||
@ -268,7 +268,8 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
|
|
||||||
var terms = new SearchTerms(parameters.query, parameters.compiledQueryIds);
|
var terms = new SearchTerms(parameters.query, parameters.compiledQueryIds);
|
||||||
|
|
||||||
for (var indexQuery : index.createQueries(terms, parameters.queryParams)) {
|
var currentIndex = statefulIndex.get();
|
||||||
|
for (var indexQuery : currentIndex.createQueries(terms, parameters.queryParams)) {
|
||||||
workerPool.execute(new IndexLookup(indexQuery, parameters.budget));
|
workerPool.execute(new IndexLookup(indexQuery, parameters.budget));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,10 +436,12 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
BitSet ngramsMask = new BitSet(compiledQuery.size());
|
BitSet ngramsMask = new BitSet(compiledQuery.size());
|
||||||
BitSet regularMask = new BitSet(compiledQuery.size());
|
BitSet regularMask = new BitSet(compiledQuery.size());
|
||||||
|
|
||||||
|
var currentIndex = statefulIndex.get();
|
||||||
|
|
||||||
for (int idx = 0; idx < compiledQueryIds.size(); idx++) {
|
for (int idx = 0; idx < compiledQueryIds.size(); idx++) {
|
||||||
long id = compiledQueryIds.at(idx);
|
long id = compiledQueryIds.at(idx);
|
||||||
full[idx] = index.getTermFrequency(id);
|
full[idx] = currentIndex.numHits(id);
|
||||||
prio[idx] = index.getTermFrequencyPrio(id);
|
prio[idx] = currentIndex.numHitsPrio(id);
|
||||||
|
|
||||||
if (compiledQuery.at(idx).contains("_")) {
|
if (compiledQuery.at(idx).contains("_")) {
|
||||||
ngramsMask.set(idx);
|
ngramsMask.set(idx);
|
||||||
@ -448,7 +451,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ResultRankingContext(index.getTotalDocCount(),
|
return new ResultRankingContext(currentIndex.totalDocCount(),
|
||||||
rankingParams,
|
rankingParams,
|
||||||
ngramsMask,
|
ngramsMask,
|
||||||
regularMask,
|
regularMask,
|
||||||
|
@ -1,8 +1,14 @@
|
|||||||
package nu.marginalia.index.index;
|
package nu.marginalia.index.index;
|
||||||
|
|
||||||
|
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||||
|
import it.unimi.dsi.fastutil.longs.LongList;
|
||||||
|
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||||
|
import it.unimi.dsi.fastutil.longs.LongSet;
|
||||||
|
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
||||||
import nu.marginalia.index.ReverseIndexReader;
|
import nu.marginalia.index.ReverseIndexReader;
|
||||||
import nu.marginalia.index.forward.ForwardIndexReader;
|
import nu.marginalia.index.forward.ForwardIndexReader;
|
||||||
import nu.marginalia.index.model.QueryParams;
|
import nu.marginalia.index.model.QueryParams;
|
||||||
|
import nu.marginalia.index.model.SearchTerms;
|
||||||
import nu.marginalia.index.query.IndexQuery;
|
import nu.marginalia.index.query.IndexQuery;
|
||||||
import nu.marginalia.index.query.IndexQueryBuilder;
|
import nu.marginalia.index.query.IndexQueryBuilder;
|
||||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||||
@ -15,9 +21,17 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
/** A reader for the combined forward and reverse indexes */
|
/** A reader for the combined forward and reverse indexes.
|
||||||
|
* <p></p>
|
||||||
|
* This class does not deal with the lifecycle of the indexes,
|
||||||
|
* that is the responsibility of {@link StatefulIndex}.
|
||||||
|
* */
|
||||||
public class CombinedIndexReader {
|
public class CombinedIndexReader {
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
@ -66,12 +80,91 @@ public class CombinedIndexReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the number of occurrences of the word in the full index */
|
/** Returns the number of occurrences of the word in the full index */
|
||||||
public long numHits(long word) {
|
public int numHits(long word) {
|
||||||
return reverseIndexFullReader.numDocuments(word);
|
return reverseIndexFullReader.numDocuments(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<IndexQuery> createQueries(SearchTerms terms, QueryParams params) {
|
||||||
|
|
||||||
|
if (!isLoaded()) {
|
||||||
|
logger.warn("Index reader not ready");
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
|
||||||
|
|
||||||
|
final long[] termPriority = terms.sortedDistinctIncludes(this::compareKeywords);
|
||||||
|
List<LongSet> paths = CompiledQueryAggregates.queriesAggregate(terms.compiledQuery());
|
||||||
|
|
||||||
|
// Remove any paths that do not contain all prioritized terms, as this means
|
||||||
|
// the term is missing from the index and can never be found
|
||||||
|
paths.removeIf(containsAll(termPriority).negate());
|
||||||
|
|
||||||
|
for (var path : paths) {
|
||||||
|
LongList elements = new LongArrayList(path);
|
||||||
|
|
||||||
|
elements.sort((a, b) -> {
|
||||||
|
for (int i = 0; i < termPriority.length; i++) {
|
||||||
|
if (termPriority[i] == a)
|
||||||
|
return -1;
|
||||||
|
if (termPriority[i] == b)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
var head = findFullWord(elements.getLong(0));
|
||||||
|
for (int i = 1; i < elements.size(); i++) {
|
||||||
|
head.addInclusionFilter(hasWordFull(elements.getLong(i)));
|
||||||
|
}
|
||||||
|
queryHeads.add(head);
|
||||||
|
|
||||||
|
// If there are few paths, we can afford to check the priority index as well
|
||||||
|
if (paths.size() < 4) {
|
||||||
|
var prioHead = findPriorityWord(elements.getLong(0));
|
||||||
|
for (int i = 1; i < elements.size(); i++) {
|
||||||
|
prioHead.addInclusionFilter(hasWordPrio(elements.getLong(i)));
|
||||||
|
}
|
||||||
|
queryHeads.add(prioHead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add additional conditions to the query heads
|
||||||
|
for (var query : queryHeads) {
|
||||||
|
|
||||||
|
// Advice terms are a special case, mandatory but not ranked, and exempt from re-writing
|
||||||
|
for (long term : terms.advice()) {
|
||||||
|
query = query.also(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (long term : terms.excludes()) {
|
||||||
|
query = query.not(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
|
||||||
|
// items in the buffer
|
||||||
|
query.addInclusionFilter(filterForParams(params));
|
||||||
|
}
|
||||||
|
|
||||||
|
return queryHeads
|
||||||
|
.stream()
|
||||||
|
.map(IndexQueryBuilder::build)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Predicate<LongSet> containsAll(long[] permitted) {
|
||||||
|
LongSet permittedTerms = new LongOpenHashSet(permitted);
|
||||||
|
return permittedTerms::containsAll;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareKeywords(long a, long b) {
|
||||||
|
return Long.compare(
|
||||||
|
numHits(a),
|
||||||
|
numHits(b)
|
||||||
|
);
|
||||||
|
}
|
||||||
/** Returns the number of occurrences of the word in the priority index */
|
/** Returns the number of occurrences of the word in the priority index */
|
||||||
public long numHitsPrio(long word) {
|
public int numHitsPrio(long word) {
|
||||||
return reverseIndexPriorityReader.numDocuments(word);
|
return reverseIndexPriorityReader.numDocuments(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,32 +2,19 @@ package nu.marginalia.index.index;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import it.unimi.dsi.fastutil.longs.*;
|
|
||||||
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
|
||||||
import nu.marginalia.index.query.filter.QueryFilterAllOf;
|
|
||||||
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
|
|
||||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
|
||||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
|
||||||
import nu.marginalia.index.results.model.ids.DocMetadataList;
|
|
||||||
import nu.marginalia.index.model.QueryParams;
|
|
||||||
import nu.marginalia.index.IndexFactory;
|
import nu.marginalia.index.IndexFactory;
|
||||||
import nu.marginalia.index.model.SearchTerms;
|
|
||||||
import nu.marginalia.index.query.*;
|
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
|
||||||
import java.util.concurrent.locks.Lock;
|
import java.util.concurrent.locks.Lock;
|
||||||
import java.util.concurrent.locks.ReadWriteLock;
|
import java.util.concurrent.locks.ReadWriteLock;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
import java.util.function.Predicate;
|
|
||||||
|
|
||||||
/** This class delegates SearchIndexReader and deals with the stateful nature of the index,
|
/** This class holds {@link CombinedIndexReader} and deals with the stateful nature of the index,
|
||||||
* i.e. it may be possible to reconstruct the index and load a new set of data.
|
* i.e. it may be possible to reconstruct the index and load a new set of data.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
@Singleton
|
@Singleton
|
||||||
public class StatefulIndex {
|
public class StatefulIndex {
|
||||||
@ -108,109 +95,11 @@ public class StatefulIndex {
|
|||||||
return combinedIndexReader != null && combinedIndexReader.isLoaded();
|
return combinedIndexReader != null && combinedIndexReader.isLoaded();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<IndexQuery> createQueries(SearchTerms terms, QueryParams params) {
|
/** Returns the current index reader. It is acceptable to hold the returned value for the duration of the query,
|
||||||
|
* but not share it between queries
|
||||||
if (!isLoaded()) {
|
|
||||||
logger.warn("Index reader not ready");
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
|
|
||||||
List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
|
|
||||||
|
|
||||||
final long[] termPriority = terms.sortedDistinctIncludes(this::compareKeywords);
|
|
||||||
List<LongSet> paths = CompiledQueryAggregates.queriesAggregate(terms.compiledQuery());
|
|
||||||
|
|
||||||
// Remove any paths that do not contain all prioritized terms, as this means
|
|
||||||
// the term is missing from the index and can never be found
|
|
||||||
paths.removeIf(containsAll(termPriority).negate());
|
|
||||||
|
|
||||||
for (var path : paths) {
|
|
||||||
LongList elements = new LongArrayList(path);
|
|
||||||
|
|
||||||
elements.sort((a, b) -> {
|
|
||||||
for (int i = 0; i < termPriority.length; i++) {
|
|
||||||
if (termPriority[i] == a)
|
|
||||||
return -1;
|
|
||||||
if (termPriority[i] == b)
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
|
|
||||||
var head = combinedIndexReader.findFullWord(elements.getLong(0));
|
|
||||||
for (int i = 1; i < elements.size(); i++) {
|
|
||||||
head.addInclusionFilter(combinedIndexReader.hasWordFull(elements.getLong(i)));
|
|
||||||
}
|
|
||||||
queryHeads.add(head);
|
|
||||||
|
|
||||||
// If there are few paths, we can afford to check the priority index as well
|
|
||||||
if (paths.size() < 4) {
|
|
||||||
var prioHead = combinedIndexReader.findPriorityWord(elements.getLong(0));
|
|
||||||
for (int i = 1; i < elements.size(); i++) {
|
|
||||||
prioHead.addInclusionFilter(combinedIndexReader.hasWordPrio(elements.getLong(i)));
|
|
||||||
}
|
|
||||||
queryHeads.add(prioHead);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add additional conditions to the query heads
|
|
||||||
for (var query : queryHeads) {
|
|
||||||
|
|
||||||
// Advice terms are a special case, mandatory but not ranked, and exempt from re-writing
|
|
||||||
for (long term : terms.advice()) {
|
|
||||||
query = query.also(term);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (long term : terms.excludes()) {
|
|
||||||
query = query.not(term);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
|
|
||||||
// items in the buffer
|
|
||||||
query.addInclusionFilter(combinedIndexReader.filterForParams(params));
|
|
||||||
}
|
|
||||||
|
|
||||||
return queryHeads
|
|
||||||
.stream()
|
|
||||||
.map(IndexQueryBuilder::build)
|
|
||||||
.toList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Predicate<LongSet> containsAll(long[] permitted) {
|
|
||||||
LongSet permittedTerms = new LongOpenHashSet(permitted);
|
|
||||||
return permittedTerms::containsAll;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int compareKeywords(long a, long b) {
|
|
||||||
return Long.compare(
|
|
||||||
combinedIndexReader.numHits(a),
|
|
||||||
combinedIndexReader.numHits(b)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return an array of encoded document metadata longs corresponding to the
|
|
||||||
* document identifiers provided; with metadata for termId. The input array
|
|
||||||
* docs[] *must* be sorted.
|
|
||||||
*/
|
*/
|
||||||
public DocMetadataList getTermMetadata(long termId, CombinedDocIdList docs) {
|
public CombinedIndexReader get() {
|
||||||
return combinedIndexReader.getMetadata(termId, docs);
|
return combinedIndexReader;
|
||||||
}
|
|
||||||
public long getDocumentMetadata(long docId) {
|
|
||||||
return combinedIndexReader.getDocumentMetadata(docId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getHtmlFeatures(long docId) {
|
|
||||||
return combinedIndexReader.getHtmlFeatures(docId);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getTotalDocCount() {
|
|
||||||
return combinedIndexReader.totalDocCount();
|
|
||||||
}
|
|
||||||
public int getTermFrequency(long id) {
|
|
||||||
return (int) combinedIndexReader.numHits(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getTermFrequencyPrio(long id) {
|
|
||||||
return (int) combinedIndexReader.numHitsPrio(id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -18,21 +18,24 @@ import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoher
|
|||||||
import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata;
|
import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata;
|
||||||
|
|
||||||
public class IndexMetadataService {
|
public class IndexMetadataService {
|
||||||
private final StatefulIndex index;
|
private final StatefulIndex statefulIndex;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexMetadataService(StatefulIndex index) {
|
public IndexMetadataService(StatefulIndex index) {
|
||||||
this.index = index;
|
this.statefulIndex = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll,
|
public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll,
|
||||||
TermIdList termIdsList)
|
TermIdList termIdsList)
|
||||||
{
|
{
|
||||||
|
var currentIndex = statefulIndex.get();
|
||||||
|
|
||||||
Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta =
|
Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta =
|
||||||
new Long2ObjectArrayMap<>(termIdsList.size());
|
new Long2ObjectArrayMap<>(termIdsList.size());
|
||||||
|
|
||||||
for (long termId : termIdsList.array()) {
|
for (long termId : termIdsList.array()) {
|
||||||
var metadata = index.getTermMetadata(termId, combinedIdsAll);
|
var metadata = currentIndex.getMetadata(termId, combinedIdsAll);
|
||||||
|
|
||||||
termdocToMeta.put(termId,
|
termdocToMeta.put(termId,
|
||||||
new DocumentsWithMetadata(combinedIdsAll, metadata));
|
new DocumentsWithMetadata(combinedIdsAll, metadata));
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggre
|
|||||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||||
|
import nu.marginalia.index.index.CombinedIndexReader;
|
||||||
import nu.marginalia.index.index.StatefulIndex;
|
import nu.marginalia.index.index.StatefulIndex;
|
||||||
import nu.marginalia.index.model.SearchParameters;
|
import nu.marginalia.index.model.SearchParameters;
|
||||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||||
@ -24,7 +25,7 @@ import java.util.List;
|
|||||||
* It holds the data required to perform the scoring, as there is strong
|
* It holds the data required to perform the scoring, as there is strong
|
||||||
* reasons to cache this data, and performs the calculations */
|
* reasons to cache this data, and performs the calculations */
|
||||||
public class IndexResultValuationContext {
|
public class IndexResultValuationContext {
|
||||||
private final StatefulIndex statefulIndex;
|
private final CombinedIndexReader index;
|
||||||
private final QueryParams queryParams;
|
private final QueryParams queryParams;
|
||||||
|
|
||||||
private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds;
|
private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds;
|
||||||
@ -42,7 +43,7 @@ public class IndexResultValuationContext {
|
|||||||
ResultRankingContext rankingContext,
|
ResultRankingContext rankingContext,
|
||||||
SearchParameters params
|
SearchParameters params
|
||||||
) {
|
) {
|
||||||
this.statefulIndex = statefulIndex;
|
this.index = statefulIndex.get();
|
||||||
this.rankingContext = rankingContext;
|
this.rankingContext = rankingContext;
|
||||||
this.searchResultValuator = searchResultValuator;
|
this.searchResultValuator = searchResultValuator;
|
||||||
|
|
||||||
@ -67,8 +68,8 @@ public class IndexResultValuationContext {
|
|||||||
if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
|
if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
long docMetadata = statefulIndex.getDocumentMetadata(docId);
|
long docMetadata = index.getDocumentMetadata(docId);
|
||||||
int htmlFeatures = statefulIndex.getHtmlFeatures(docId);
|
int htmlFeatures = index.getHtmlFeatures(docId);
|
||||||
|
|
||||||
SearchResultItem searchResult = new SearchResultItem(docId,
|
SearchResultItem searchResult = new SearchResultItem(docId,
|
||||||
docMetadata,
|
docMetadata,
|
||||||
|
@ -19,7 +19,7 @@ public class BTreeReader {
|
|||||||
|
|
||||||
public BTreeReader(LongArray file, BTreeContext ctx, long offset) {
|
public BTreeReader(LongArray file, BTreeContext ctx, long offset) {
|
||||||
this.ctx = ctx;
|
this.ctx = ctx;
|
||||||
this.header = readHeader(file, offset);
|
this.header = new BTreeHeader(file, offset);
|
||||||
|
|
||||||
dataBlockEnd = (long) ctx.entrySize * header.numEntries();
|
dataBlockEnd = (long) ctx.entrySize * header.numEntries();
|
||||||
index = file.range(header.indexOffsetLongs(), header.dataOffsetLongs());
|
index = file.range(header.indexOffsetLongs(), header.dataOffsetLongs());
|
||||||
@ -35,10 +35,6 @@ public class BTreeReader {
|
|||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BTreeHeader readHeader(LongArray file, long fileOffset) {
|
|
||||||
return new BTreeHeader(file, fileOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BTreeHeader getHeader() {
|
public BTreeHeader getHeader() {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
@ -153,7 +149,6 @@ public class BTreeReader {
|
|||||||
pointer.walkToData(keys[i]);
|
pointer.walkToData(keys[i]);
|
||||||
|
|
||||||
long dataAddress = pointer.findData(keys[i]);
|
long dataAddress = pointer.findData(keys[i]);
|
||||||
|
|
||||||
if (dataAddress >= 0) {
|
if (dataAddress >= 0) {
|
||||||
ret[i] = data.get(dataAddress + offset);
|
ret[i] = data.get(dataAddress + offset);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user