(index-reverse) Split index construction into separate packages for full and priority index

This commit is contained in:
Viktor Lofgren 2024-07-06 15:44:47 +02:00
parent a4ecd5f4ce
commit 85c99ae808
24 changed files with 1006 additions and 139 deletions

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArrayTransformations; import nu.marginalia.array.algo.LongArrayTransformations;
@ -9,7 +9,7 @@ import java.io.IOException;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
/** Constructs the BTrees in a reverse index */ /** Constructs the BTrees in a reverse index */
public class ReverseIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer { public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
private final BTreeWriter writer; private final BTreeWriter writer;
private final FileChannel intermediateChannel; private final FileChannel intermediateChannel;
@ -18,10 +18,10 @@ public class ReverseIndexBTreeTransformer implements LongArrayTransformations.Lo
long start = 0; long start = 0;
long writeOffset = 0; long writeOffset = 0;
public ReverseIndexBTreeTransformer(LongArray urlsFileMap, public FullIndexBTreeTransformer(LongArray urlsFileMap,
int entrySize, int entrySize,
BTreeContext bTreeContext, BTreeContext bTreeContext,
FileChannel intermediateChannel) { FileChannel intermediateChannel) {
this.writer = new BTreeWriter(urlsFileMap, bTreeContext); this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
this.entrySize = entrySize; this.entrySize = entrySize;
this.intermediateChannel = intermediateChannel; this.intermediateChannel = intermediateChannel;

View File

@ -1,6 +1,9 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.JournalReaderSource;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.index.journal.IndexJournalFileNames; import nu.marginalia.index.journal.IndexJournalFileNames;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -10,9 +13,9 @@ import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
public class ReverseIndexConstructor { public class FullIndexConstructor {
private static final Logger logger = LoggerFactory.getLogger(ReverseIndexConstructor.class); private static final Logger logger = LoggerFactory.getLogger(FullIndexConstructor.class);
public enum CreateReverseIndexSteps { public enum CreateReverseIndexSteps {
CONSTRUCT, CONSTRUCT,
@ -27,12 +30,12 @@ public class ReverseIndexConstructor {
private final DocIdRewriter docIdRewriter; private final DocIdRewriter docIdRewriter;
private final Path tmpDir; private final Path tmpDir;
public ReverseIndexConstructor(Path outputFileDocs, public FullIndexConstructor(Path outputFileDocs,
Path outputFileWords, Path outputFileWords,
Path outputFilePositions, Path outputFilePositions,
JournalReaderSource readerSource, JournalReaderSource readerSource,
DocIdRewriter docIdRewriter, DocIdRewriter docIdRewriter,
Path tmpDir) { Path tmpDir) {
this.outputFileDocs = outputFileDocs; this.outputFileDocs = outputFileDocs;
this.outputFileWords = outputFileWords; this.outputFileWords = outputFileWords;
this.outputFilePositions = outputFilePositions; this.outputFilePositions = outputFilePositions;
@ -77,20 +80,20 @@ public class ReverseIndexConstructor {
} }
@SneakyThrows @SneakyThrows
private ReversePreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) { private FullPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
return ReversePreindex return FullPreindex
.constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir) .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir)
.closeToReference(); .closeToReference();
} }
@SneakyThrows @SneakyThrows
private ReversePreindexReference merge(ReversePreindexReference leftR, ReversePreindexReference rightR) { private FullPreindexReference merge(FullPreindexReference leftR, FullPreindexReference rightR) {
var left = leftR.open(); var left = leftR.open();
var right = rightR.open(); var right = rightR.open();
try { try {
return ReversePreindex.merge(tmpDir, left, right).closeToReference(); return FullPreindex.merge(tmpDir, left, right).closeToReference();
} }
finally { finally {
left.delete(); left.delete();
@ -101,7 +104,7 @@ public class ReverseIndexConstructor {
} }
@SneakyThrows @SneakyThrows
private void finalizeIndex(ReversePreindexReference finalPR) { private void finalizeIndex(FullPreindexReference finalPR) {
var finalP = finalPR.open(); var finalP = finalPR.open();
finalP.finalizeIndex(outputFileDocs, outputFileWords); finalP.finalizeIndex(outputFileDocs, outputFileWords);
finalP.delete(); finalP.delete();

View File

@ -1,9 +1,13 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.btree.BTreeWriter; import nu.marginalia.btree.BTreeWriter;
import nu.marginalia.index.ReverseIndexParameters; import nu.marginalia.index.ReverseIndexParameters;
import nu.marginalia.index.construction.CountToOffsetTransformer;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.IndexSizeEstimator;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.index.journal.reader.IndexJournalReader;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -25,13 +29,13 @@ import static nu.marginalia.array.algo.TwoArrayOperations.*;
* the union of their data. This operation requires no additional * the union of their data. This operation requires no additional
* RAM. * RAM.
*/ */
public class ReversePreindex { public class FullPreindex {
final ReversePreindexWordSegments segments; final FullPreindexWordSegments segments;
final ReversePreindexDocuments documents; final FullPreindexDocuments documents;
private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class); private static final Logger logger = LoggerFactory.getLogger(FullPreindex.class);
public ReversePreindex(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) { public FullPreindex(FullPreindexWordSegments segments, FullPreindexDocuments documents) {
this.segments = segments; this.segments = segments;
this.documents = documents; this.documents = documents;
} }
@ -39,27 +43,27 @@ public class ReversePreindex {
/** Constructs a new preindex with the data associated with reader. The backing files /** Constructs a new preindex with the data associated with reader. The backing files
* will have randomly assigned names. * will have randomly assigned names.
*/ */
public static ReversePreindex constructPreindex(IndexJournalReader reader, public static FullPreindex constructPreindex(IndexJournalReader reader,
PositionsFileConstructor positionsFileConstructor, PositionsFileConstructor positionsFileConstructor,
DocIdRewriter docIdRewriter, DocIdRewriter docIdRewriter,
Path workDir) throws IOException Path workDir) throws IOException
{ {
Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat"); Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat");
Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat"); Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat");
Path docsFile = Files.createTempFile(workDir, "docs", ".dat"); Path docsFile = Files.createTempFile(workDir, "docs", ".dat");
var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile); var segments = FullPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
var docs = ReversePreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments); var docs = FullPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
return new ReversePreindex(segments, docs); return new FullPreindex(segments, docs);
} }
/** Close the associated memory mapped areas and return /** Close the associated memory mapped areas and return
* a dehydrated version of this object that can be re-opened * a dehydrated version of this object that can be re-opened
* later. * later.
*/ */
public ReversePreindexReference closeToReference() { public FullPreindexReference closeToReference() {
try { try {
return new ReversePreindexReference(segments, documents); return new FullPreindexReference(segments, documents);
} }
finally { finally {
segments.force(); segments.force();
@ -85,7 +89,7 @@ public class ReversePreindex {
LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size); LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size);
try (var intermediateDocChannel = documents.createDocumentsFileChannel()) { try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
offsets.transformEachIO(0, offsets.size(), offsets.transformEachIO(0, offsets.size(),
new ReverseIndexBTreeTransformer(finalDocs, 2, new FullIndexBTreeTransformer(finalDocs, 2,
ReverseIndexParameters.docsBTreeContext, ReverseIndexParameters.docsBTreeContext,
intermediateDocChannel)); intermediateDocChannel));
intermediateDocChannel.force(false); intermediateDocChannel.force(false);
@ -126,11 +130,11 @@ public class ReversePreindex {
documents.delete(); documents.delete();
} }
public static ReversePreindex merge(Path destDir, public static FullPreindex merge(Path destDir,
ReversePreindex left, FullPreindex left,
ReversePreindex right) throws IOException { FullPreindex right) throws IOException {
ReversePreindexWordSegments mergingSegment = FullPreindexWordSegments mergingSegment =
createMergedSegmentWordFile(destDir, left.segments, right.segments); createMergedSegmentWordFile(destDir, left.segments, right.segments);
var mergingIter = mergingSegment.constructionIterator(2); var mergingIter = mergingSegment.constructionIterator(2);
@ -198,18 +202,18 @@ public class ReversePreindex {
mergedDocuments = shrinkMergedDocuments(mergedDocuments, mergedDocuments = shrinkMergedDocuments(mergedDocuments,
docsFile, 2 * mergingSegment.totalSize()); docsFile, 2 * mergingSegment.totalSize());
return new ReversePreindex( return new FullPreindex(
mergingSegment, mergingSegment,
new ReversePreindexDocuments(mergedDocuments, docsFile) new FullPreindexDocuments(mergedDocuments, docsFile)
); );
} }
/** Create a segment word file with each word from both inputs, with zero counts for all the data. /** Create a segment word file with each word from both inputs, with zero counts for all the data.
* This is an intermediate product in merging. * This is an intermediate product in merging.
*/ */
static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir, static FullPreindexWordSegments createMergedSegmentWordFile(Path destDir,
ReversePreindexWordSegments left, FullPreindexWordSegments left,
ReversePreindexWordSegments right) throws IOException { FullPreindexWordSegments right) throws IOException {
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
@ -228,7 +232,7 @@ public class ReversePreindex {
LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize); LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize);
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); return new FullPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
} }
/** It's possible we overestimated the necessary size of the documents file, /** It's possible we overestimated the necessary size of the documents file,
@ -256,12 +260,12 @@ public class ReversePreindex {
/** Merge contents of the segments indicated by leftIter and rightIter into the destionation /** Merge contents of the segments indicated by leftIter and rightIter into the destionation
* segment, and advance the construction iterator with the appropriate size. * segment, and advance the construction iterator with the appropriate size.
*/ */
private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter, private static void mergeSegments(FullPreindexWordSegments.SegmentIterator leftIter,
ReversePreindexWordSegments.SegmentIterator rightIter, FullPreindexWordSegments.SegmentIterator rightIter,
ReversePreindexDocuments left, FullPreindexDocuments left,
ReversePreindexDocuments right, FullPreindexDocuments right,
LongArray dest, LongArray dest,
ReversePreindexWordSegments.SegmentConstructionIterator destIter) FullPreindexWordSegments.SegmentConstructionIterator destIter)
{ {
long segSize = mergeArrays2(dest, long segSize = mergeArrays2(dest,
left.documents, left.documents,
@ -279,10 +283,10 @@ public class ReversePreindex {
/** Copy the data from the source segment at the position and length indicated by sourceIter, /** Copy the data from the source segment at the position and length indicated by sourceIter,
* into the destination segment, and advance the construction iterator. * into the destination segment, and advance the construction iterator.
*/ */
private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter, private static boolean copySegment(FullPreindexWordSegments.SegmentIterator sourceIter,
LongArray dest, LongArray dest,
FileChannel sourceChannel, FileChannel sourceChannel,
ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { FullPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
long size = sourceIter.endOffset - sourceIter.startOffset; long size = sourceIter.endOffset - sourceIter.startOffset;
long start = mergingIter.startOffset; long start = mergingIter.startOffset;

View File

@ -1,8 +1,10 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.index.journal.reader.IndexJournalReader;
import nu.marginalia.rwf.RandomFileAssembler; import nu.marginalia.rwf.RandomFileAssembler;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -20,35 +22,35 @@ import java.util.concurrent.TimeUnit;
/** A LongArray with document data, segmented according to /** A LongArray with document data, segmented according to
* the associated ReversePreindexWordSegments data * the associated ReversePreindexWordSegments data
*/ */
public class ReversePreindexDocuments { public class FullPreindexDocuments {
public final LongArray documents; public final LongArray documents;
private static PositionsFileConstructor positionsFileConstructor; private static PositionsFileConstructor positionsFileConstructor;
private static final int RECORD_SIZE_LONGS = 2; private static final int RECORD_SIZE_LONGS = 2;
private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class); private static final Logger logger = LoggerFactory.getLogger(FullPreindexDocuments.class);
public final Path file; public final Path file;
public ReversePreindexDocuments(LongArray documents, Path file) { public FullPreindexDocuments(LongArray documents, Path file) {
this.documents = documents; this.documents = documents;
this.file = file; this.file = file;
} }
public static ReversePreindexDocuments construct( public static FullPreindexDocuments construct(
Path docsFile, Path docsFile,
Path workDir, Path workDir,
IndexJournalReader reader, IndexJournalReader reader,
DocIdRewriter docIdRewriter, DocIdRewriter docIdRewriter,
PositionsFileConstructor positionsFileConstructor, PositionsFileConstructor positionsFileConstructor,
ReversePreindexWordSegments segments) throws IOException { FullPreindexWordSegments segments) throws IOException {
ReversePreindexDocuments.positionsFileConstructor = positionsFileConstructor; FullPreindexDocuments.positionsFileConstructor = positionsFileConstructor;
createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter); createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter);
LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile); LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile);
sortDocsFile(docsFileMap, segments); sortDocsFile(docsFileMap, segments);
return new ReversePreindexDocuments(docsFileMap, docsFile); return new FullPreindexDocuments(docsFileMap, docsFile);
} }
public FileChannel createDocumentsFileChannel() throws IOException { public FileChannel createDocumentsFileChannel() throws IOException {
@ -67,7 +69,7 @@ public class ReversePreindexDocuments {
private static void createUnsortedDocsFile(Path docsFile, private static void createUnsortedDocsFile(Path docsFile,
Path workDir, Path workDir,
IndexJournalReader reader, IndexJournalReader reader,
ReversePreindexWordSegments segments, FullPreindexWordSegments segments,
DocIdRewriter docIdRewriter) throws IOException { DocIdRewriter docIdRewriter) throws IOException {
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize(); long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
@ -99,7 +101,7 @@ public class ReversePreindexDocuments {
} }
@SneakyThrows @SneakyThrows
private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException { private static void sortDocsFile(LongArray docsFileMap, FullPreindexWordSegments segments) throws IOException {
var iter = segments.iterator(RECORD_SIZE_LONGS); var iter = segments.iterator(RECORD_SIZE_LONGS);

View File

@ -1,33 +1,33 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.LongArrayFactory;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
/** This is a dehydrated version of a ReversePreIndex, that only /** This is a dehydrated version of a FullPreIndex, that only
* keeps references to its location on disk but does not hold associated * keeps references to its location on disk but does not hold associated
* memory maps. * memory maps.
*/ */
public record ReversePreindexReference( public record FullPreindexReference(
Path wordsFile, Path wordsFile,
Path countsFile, Path countsFile,
Path documentsFile Path documentsFile
) )
{ {
public ReversePreindexReference(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) { public FullPreindexReference(FullPreindexWordSegments segments, FullPreindexDocuments documents) {
this(segments.wordsFile, segments.countsFile, documents.file); this(segments.wordsFile, segments.countsFile, documents.file);
} }
public ReversePreindex open() throws IOException { public FullPreindex open() throws IOException {
return new ReversePreindex( return new FullPreindex(
new ReversePreindexWordSegments( new FullPreindexWordSegments(
LongArrayFactory.mmapForModifyingShared(wordsFile), LongArrayFactory.mmapForModifyingShared(wordsFile),
LongArrayFactory.mmapForModifyingShared(countsFile), LongArrayFactory.mmapForModifyingShared(countsFile),
wordsFile, wordsFile,
countsFile countsFile
), ),
new ReversePreindexDocuments( new FullPreindexDocuments(
LongArrayFactory.mmapForModifyingShared(documentsFile), LongArrayFactory.mmapForModifyingShared(documentsFile),
documentsFile documentsFile
) )

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
@ -14,17 +14,17 @@ import java.nio.file.Path;
/** A pair of file-backed arrays of sorted wordIds /** A pair of file-backed arrays of sorted wordIds
* and the count of documents associated with each termId. * and the count of documents associated with each termId.
*/ */
public class ReversePreindexWordSegments { public class FullPreindexWordSegments {
public final LongArray wordIds; public final LongArray wordIds;
public final LongArray counts; public final LongArray counts;
final Path wordsFile; final Path wordsFile;
final Path countsFile; final Path countsFile;
public ReversePreindexWordSegments(LongArray wordIds, public FullPreindexWordSegments(LongArray wordIds,
LongArray counts, LongArray counts,
Path wordsFile, Path wordsFile,
Path countsFile) Path countsFile)
{ {
assert wordIds.size() == counts.size(); assert wordIds.size() == counts.size();
@ -51,9 +51,9 @@ public class ReversePreindexWordSegments {
return ret; return ret;
} }
public static ReversePreindexWordSegments construct(IndexJournalReader reader, public static FullPreindexWordSegments construct(IndexJournalReader reader,
Path wordIdsFile, Path wordIdsFile,
Path countsFile) Path countsFile)
throws IOException throws IOException
{ {
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
@ -79,7 +79,7 @@ public class ReversePreindexWordSegments {
counts.set(i, countsMap.get(words.get(i))); counts.set(i, countsMap.get(words.get(i)));
} }
return new ReversePreindexWordSegments(words, counts, wordIdsFile, countsFile); return new FullPreindexWordSegments(words, counts, wordIdsFile, countsFile);
} }
public SegmentIterator iterator(int recordSize) { public SegmentIterator iterator(int recordSize) {

View File

@ -0,0 +1,48 @@
package nu.marginalia.index.construction.prio;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArrayTransformations;
import nu.marginalia.btree.BTreeWriter;
import nu.marginalia.btree.model.BTreeContext;
import java.io.IOException;
import java.nio.channels.FileChannel;
/** Constructs the BTrees in a reverse index */
public class PrioIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
private final BTreeWriter writer;
private final FileChannel intermediateChannel;
private final int entrySize;
long start = 0;
long writeOffset = 0;
public PrioIndexBTreeTransformer(LongArray urlsFileMap,
int entrySize,
BTreeContext bTreeContext,
FileChannel intermediateChannel) {
this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
this.entrySize = entrySize;
this.intermediateChannel = intermediateChannel;
}
@Override
public long transform(long pos, long end) throws IOException {
final int size = (int) ((end - start) / entrySize);
if (size == 0) {
return -1;
}
final long offsetForBlock = writeOffset;
writeOffset += writer.write(writeOffset, size,
mapRegion -> mapRegion.transferFrom(intermediateChannel, start, 0, end - start)
);
start = end;
return offsetForBlock;
}
}

View File

@ -0,0 +1,114 @@
package nu.marginalia.index.construction.prio;
import lombok.SneakyThrows;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.JournalReaderSource;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.journal.IndexJournalFileNames;
import nu.marginalia.process.control.ProcessHeartbeat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicInteger;
public class PrioIndexConstructor {
private static final Logger logger = LoggerFactory.getLogger(PrioIndexConstructor.class);
public enum CreateReverseIndexSteps {
CONSTRUCT,
FINALIZE,
FINISHED
}
private final Path outputFileDocs;
private final Path outputFileWords;
private final Path outputFilePositions;
private final JournalReaderSource readerSource;
private final DocIdRewriter docIdRewriter;
private final Path tmpDir;
public PrioIndexConstructor(Path outputFileDocs,
Path outputFileWords,
Path outputFilePositions,
JournalReaderSource readerSource,
DocIdRewriter docIdRewriter,
Path tmpDir) {
this.outputFileDocs = outputFileDocs;
this.outputFileWords = outputFileWords;
this.outputFilePositions = outputFilePositions;
this.readerSource = readerSource;
this.docIdRewriter = docIdRewriter;
this.tmpDir = tmpDir;
}
public void createReverseIndex(ProcessHeartbeat processHeartbeat,
String processName,
Path sourceBaseDir) throws IOException
{
var inputs = IndexJournalFileNames.findJournalFiles(sourceBaseDir);
if (inputs.isEmpty()) {
logger.error("No journal files in base dir {}", sourceBaseDir);
return;
}
try (var heartbeat = processHeartbeat.createProcessTaskHeartbeat(CreateReverseIndexSteps.class, processName);
var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes");
var posConstructor = new PositionsFileConstructor(outputFilePositions)
) {
heartbeat.progress(CreateReverseIndexSteps.CONSTRUCT);
AtomicInteger progress = new AtomicInteger(0);
inputs
.parallelStream()
.map(in -> {
preindexHeartbeat.progress("PREINDEX/MERGE", progress.incrementAndGet(), inputs.size());
return construct(in, posConstructor);
})
.reduce(this::merge)
.ifPresent((index) -> {
heartbeat.progress(CreateReverseIndexSteps.FINALIZE);
finalizeIndex(index);
heartbeat.progress(CreateReverseIndexSteps.FINISHED);
});
heartbeat.progress(CreateReverseIndexSteps.FINISHED);
}
}
@SneakyThrows
private PrioPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
return PrioPreindex
.constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir)
.closeToReference();
}
@SneakyThrows
private PrioPreindexReference merge(PrioPreindexReference leftR, PrioPreindexReference rightR) {
var left = leftR.open();
var right = rightR.open();
try {
return PrioPreindex.merge(tmpDir, left, right).closeToReference();
}
finally {
left.delete();
right.delete();
}
}
@SneakyThrows
private void finalizeIndex(PrioPreindexReference finalPR) {
var finalP = finalPR.open();
finalP.finalizeIndex(outputFileDocs, outputFileWords);
finalP.delete();
}
}

View File

@ -0,0 +1,310 @@
package nu.marginalia.index.construction.prio;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.btree.BTreeWriter;
import nu.marginalia.index.ReverseIndexParameters;
import nu.marginalia.index.construction.CountToOffsetTransformer;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.IndexSizeEstimator;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.journal.reader.IndexJournalReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import static nu.marginalia.array.algo.TwoArrayOperations.*;
/** Contains the data that would go into a reverse index,
* that is, a mapping from words to documents, minus the actual
* index structure that makes the data quick to access while
* searching.
* <p>
* Two preindexes can be merged into a third preindex containing
* the union of their data. This operation requires no additional
* RAM.
*/
public class PrioPreindex {
final PrioPreindexWordSegments segments;
final PrioPreindexDocuments documents;
private static final Logger logger = LoggerFactory.getLogger(PrioPreindex.class);
public PrioPreindex(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) {
this.segments = segments;
this.documents = documents;
}
/** Constructs a new preindex with the data associated with reader. The backing files
* will have randomly assigned names.
*/
public static PrioPreindex constructPreindex(IndexJournalReader reader,
PositionsFileConstructor positionsFileConstructor,
DocIdRewriter docIdRewriter,
Path workDir) throws IOException
{
Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat");
Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat");
Path docsFile = Files.createTempFile(workDir, "docs", ".dat");
var segments = PrioPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
var docs = PrioPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
return new PrioPreindex(segments, docs);
}
/** Close the associated memory mapped areas and return
* a dehydrated version of this object that can be re-opened
* later.
*/
public PrioPreindexReference closeToReference() {
try {
return new PrioPreindexReference(segments, documents);
}
finally {
segments.force();
documents.force();
segments.close();
documents.close();
}
}
/** Transform the preindex into a reverse index */
public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
var offsets = segments.counts;
Files.deleteIfExists(outputFileDocs);
Files.deleteIfExists(outputFileWords);
// Estimate the size of the docs index data
offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2));
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
offsets.fold(0, 0, offsets.size(), sizeEstimator);
// Write the docs file
LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size);
try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
offsets.transformEachIO(0, offsets.size(),
new PrioIndexBTreeTransformer(finalDocs, 2,
ReverseIndexParameters.docsBTreeContext,
intermediateDocChannel));
intermediateDocChannel.force(false);
}
LongArray wordIds = segments.wordIds;
if (offsets.size() != wordIds.size())
throw new IllegalStateException("Offsets and word-ids of different size");
if (offsets.size() > Integer.MAX_VALUE) {
throw new IllegalStateException("offsets.size() too big!");
}
// Estimate the size of the words index data
long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
// Construct the tree
LongArray wordsArray = LongArrayFactory.mmapForWritingConfined(outputFileWords, wordsSize);
new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext)
.write(0, (int) offsets.size(), mapRegion -> {
for (long i = 0; i < offsets.size(); i++) {
mapRegion.set(2*i, wordIds.get(i));
mapRegion.set(2*i + 1, offsets.get(i));
}
});
finalDocs.force();
finalDocs.close();
wordsArray.force();
wordsArray.close();
}
/** Delete all files associated with this pre-index */
public void delete() throws IOException {
segments.delete();
documents.delete();
}
public static PrioPreindex merge(Path destDir,
PrioPreindex left,
PrioPreindex right) throws IOException {
PrioPreindexWordSegments mergingSegment =
createMergedSegmentWordFile(destDir, left.segments, right.segments);
var mergingIter = mergingSegment.constructionIterator(2);
var leftIter = left.segments.iterator(2);
var rightIter = right.segments.iterator(2);
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, left.documents.size() + right.documents.size());
leftIter.next();
rightIter.next();
try (FileChannel leftChannel = left.documents.createDocumentsFileChannel();
FileChannel rightChannel = right.documents.createDocumentsFileChannel())
{
while (mergingIter.canPutMore()
&& leftIter.isPositionBeforeEnd()
&& rightIter.isPositionBeforeEnd())
{
final long currentWord = mergingIter.wordId;
if (leftIter.wordId == currentWord && rightIter.wordId == currentWord)
{
// both inputs have documents for the current word
mergeSegments(leftIter, rightIter,
left.documents, right.documents,
mergedDocuments, mergingIter);
}
else if (leftIter.wordId == currentWord) {
if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
break;
}
else if (rightIter.wordId == currentWord) {
if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
break;
}
else assert false : "This should never happen"; // the helvetica scenario
}
if (leftIter.isPositionBeforeEnd()) {
while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
}
if (rightIter.isPositionBeforeEnd()) {
while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
}
}
if (leftIter.isPositionBeforeEnd())
throw new IllegalStateException("Left has more to go");
if (rightIter.isPositionBeforeEnd())
throw new IllegalStateException("Right has more to go");
if (mergingIter.canPutMore())
throw new IllegalStateException("Source iters ran dry before merging iter");
mergingSegment.force();
// We may have overestimated the size of the merged docs size in the case there were
// duplicates in the data, so we need to shrink it to the actual size we wrote.
mergedDocuments = shrinkMergedDocuments(mergedDocuments,
docsFile, 2 * mergingSegment.totalSize());
return new PrioPreindex(
mergingSegment,
new PrioPreindexDocuments(mergedDocuments, docsFile)
);
}
/** Create a segment word file with each word from both inputs, with zero counts for all the data.
* This is an intermediate product in merging.
*/
static PrioPreindexWordSegments createMergedSegmentWordFile(Path destDir,
PrioPreindexWordSegments left,
PrioPreindexWordSegments right) throws IOException {
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
// We need total size to request a direct LongArray range. Seems slower, but is faster.
// ... see LongArray.directRangeIfPossible(long start, long end)
long segmentsSize = countDistinctElements(left.wordIds, right.wordIds,
0, left.wordIds.size(),
0, right.wordIds.size());
LongArray wordIdsFile = LongArrayFactory.mmapForWritingConfined(segmentWordsFile, segmentsSize);
mergeArrays(wordIdsFile, left.wordIds, right.wordIds,
0,
0, left.wordIds.size(),
0, right.wordIds.size());
LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize);
return new PrioPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
}
/** It's possible we overestimated the necessary size of the documents file,
* this will permit us to shrink it down to the smallest necessary size.
*/
private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
mergedDocuments.force();
long beforeSize = mergedDocuments.size();
long afterSize = sizeLongs * 8;
if (beforeSize != afterSize) {
mergedDocuments.close();
try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
bc.truncate(sizeLongs * 8);
}
logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs);
}
return mergedDocuments;
}
/** Merge contents of the segments indicated by leftIter and rightIter into the destionation
* segment, and advance the construction iterator with the appropriate size.
*/
private static void mergeSegments(PrioPreindexWordSegments.SegmentIterator leftIter,
PrioPreindexWordSegments.SegmentIterator rightIter,
PrioPreindexDocuments left,
PrioPreindexDocuments right,
LongArray dest,
PrioPreindexWordSegments.SegmentConstructionIterator destIter)
{
long segSize = mergeArrays2(dest,
left.documents,
right.documents,
destIter.startOffset,
leftIter.startOffset, leftIter.endOffset,
rightIter.startOffset, rightIter.endOffset);
long distinct = segSize / 2;
destIter.putNext(distinct);
leftIter.next();
rightIter.next();
}
/** Copy the data from the source segment at the position and length indicated by sourceIter,
* into the destination segment, and advance the construction iterator.
*/
private static boolean copySegment(PrioPreindexWordSegments.SegmentIterator sourceIter,
LongArray dest,
FileChannel sourceChannel,
PrioPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
long size = sourceIter.endOffset - sourceIter.startOffset;
long start = mergingIter.startOffset;
long end = start + size;
dest.transferFrom(sourceChannel,
sourceIter.startOffset,
mergingIter.startOffset,
end);
boolean putNext = mergingIter.putNext(size / 2);
boolean iterNext = sourceIter.next();
if (!putNext && iterNext)
throw new IllegalStateException("Source iterator ran out before dest iterator?!");
return iterNext;
}
}

View File

@ -0,0 +1,141 @@
package nu.marginalia.index.construction.prio;
import lombok.SneakyThrows;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.journal.reader.IndexJournalReader;
import nu.marginalia.rwf.RandomFileAssembler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/** A LongArray with document data, segmented according to
* the associated ReversePreindexWordSegments data
*/
public class PrioPreindexDocuments {
public final LongArray documents;
private static PositionsFileConstructor positionsFileConstructor;
private static final int RECORD_SIZE_LONGS = 2;
private static final Logger logger = LoggerFactory.getLogger(PrioPreindexDocuments.class);
public final Path file;
public PrioPreindexDocuments(LongArray documents, Path file) {
this.documents = documents;
this.file = file;
}
public static PrioPreindexDocuments construct(
Path docsFile,
Path workDir,
IndexJournalReader reader,
DocIdRewriter docIdRewriter,
PositionsFileConstructor positionsFileConstructor,
PrioPreindexWordSegments segments) throws IOException {
PrioPreindexDocuments.positionsFileConstructor = positionsFileConstructor;
createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter);
LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile);
sortDocsFile(docsFileMap, segments);
return new PrioPreindexDocuments(docsFileMap, docsFile);
}
public FileChannel createDocumentsFileChannel() throws IOException {
return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ);
}
public LongArray slice(long start, long end) {
return documents.range(start, end);
}
public long size() {
return documents.size();
}
private static void createUnsortedDocsFile(Path docsFile,
Path workDir,
IndexJournalReader reader,
PrioPreindexWordSegments segments,
DocIdRewriter docIdRewriter) throws IOException {
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
var pointer = reader.newPointer())
{
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
offsetMap.defaultReturnValue(0);
while (pointer.nextDocument()) {
long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
for (var termData : pointer) {
long termId = termData.termId();
long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
// write position data to the positions file and get the offset
long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positionsBuffer());
assembly.put(offset + 0, rankEncodedId);
assembly.put(offset + 1, encodedPosOffset);
}
}
assembly.write(docsFile);
}
}
@SneakyThrows
private static void sortDocsFile(LongArray docsFileMap, PrioPreindexWordSegments segments) throws IOException {
var iter = segments.iterator(RECORD_SIZE_LONGS);
ExecutorService sortingWorkers = Executors.newWorkStealingPool(Runtime.getRuntime().availableProcessors());
while (iter.next()) {
long iterStart = iter.startOffset;
long iterEnd = iter.endOffset;
if (iter.size() < 1024) {
docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd);
}
else {
sortingWorkers.execute(() ->
docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd));
}
}
sortingWorkers.shutdown();
while (!sortingWorkers.awaitTermination(1, TimeUnit.HOURS));
sortingWorkers.close();
}
public void delete() throws IOException {
Files.delete(this.file);
documents.close();
}
public void close() {
documents.close();
}
public void force() {
documents.force();
}
}

View File

@ -0,0 +1,36 @@
package nu.marginalia.index.construction.prio;
import nu.marginalia.array.LongArrayFactory;
import java.io.IOException;
import java.nio.file.Path;
/** This is a dehydrated version of a PrioPreIndex, that only
* keeps references to its location on disk but does not hold associated
* memory maps.
*/
public record PrioPreindexReference(
Path wordsFile,
Path countsFile,
Path documentsFile
)
{
public PrioPreindexReference(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) {
this(segments.wordsFile, segments.countsFile, documents.file);
}
public PrioPreindex open() throws IOException {
return new PrioPreindex(
new PrioPreindexWordSegments(
LongArrayFactory.mmapForModifyingShared(wordsFile),
LongArrayFactory.mmapForModifyingShared(countsFile),
wordsFile,
countsFile
),
new PrioPreindexDocuments(
LongArrayFactory.mmapForModifyingShared(documentsFile),
documentsFile
)
);
}
}

View File

@ -0,0 +1,205 @@
package nu.marginalia.index.construction.prio;
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongIterator;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.index.journal.reader.IndexJournalReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
/** A pair of file-backed arrays of sorted wordIds
* and the count of documents associated with each termId.
*/
public class PrioPreindexWordSegments {
public final LongArray wordIds;
public final LongArray counts;
final Path wordsFile;
final Path countsFile;
public PrioPreindexWordSegments(LongArray wordIds,
LongArray counts,
Path wordsFile,
Path countsFile)
{
assert wordIds.size() == counts.size();
this.wordIds = wordIds;
this.counts = counts;
this.wordsFile = wordsFile;
this.countsFile = countsFile;
}
/** Returns a long-long hash map where each key is a termId,
* and each value is the start offset of the data.
*/
public Long2LongOpenHashMap asMap(int recordSize) {
if (wordIds.size() > Integer.MAX_VALUE)
throw new IllegalArgumentException("Cannot create a map with more than Integer.MAX_VALUE entries");
Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
var iter = iterator(recordSize);
while (iter.next()) {
ret.put(iter.wordId, iter.startOffset);
}
return ret;
}
public static PrioPreindexWordSegments construct(IndexJournalReader reader,
Path wordIdsFile,
Path countsFile)
throws IOException
{
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
countsMap.defaultReturnValue(0);
reader.forEachWordId(wordId -> countsMap.addTo(wordId, 1));
LongArray words = LongArrayFactory.mmapForWritingConfined(wordIdsFile, countsMap.size());
LongArray counts = LongArrayFactory.mmapForWritingConfined(countsFile, countsMap.size());
// Create the words file by iterating over the map and inserting them into
// the words file in whatever bizarro hash table order they appear in
long i = 0;
LongIterator iter = countsMap.keySet().iterator();
while (iter.hasNext()) {
words.set(i++, iter.nextLong());
}
// Sort the words file
words.sort(0, counts.size());
// Populate the counts
for (i = 0; i < countsMap.size(); i++) {
counts.set(i, countsMap.get(words.get(i)));
}
return new PrioPreindexWordSegments(words, counts, wordIdsFile, countsFile);
}
public SegmentIterator iterator(int recordSize) {
return new SegmentIterator(recordSize);
}
public SegmentConstructionIterator constructionIterator(int recordSize) {
return new SegmentConstructionIterator(recordSize);
}
public long totalSize() {
return counts.fold(0, 0, counts.size(), Long::sum);
}
public void delete() throws IOException {
Files.delete(countsFile);
Files.delete(wordsFile);
counts.close();
wordIds.close();
}
public void force() {
counts.force();
wordIds.force();
}
public void close() {
wordIds.close();
counts.close();
}
public class SegmentIterator {
private final int recordSize;
private final long fileSize;
long wordId;
long startOffset = 0;
long endOffset = 0;
private SegmentIterator(int recordSize) {
this.recordSize = recordSize;
this.fileSize = wordIds.size();
}
private long i = -1;
public long idx() {
return i;
}
public boolean next() {
if (++i >= fileSize) {
wordId = Long.MIN_VALUE;
return false;
}
wordId = wordIds.get(i);
startOffset = endOffset;
endOffset = startOffset + recordSize * counts.get(i);
return true;
}
public boolean hasMorePositions() {
return i + 1 < wordIds.size();
}
public boolean isPositionBeforeEnd() {
return i < wordIds.size();
}
public long size() {
return endOffset - startOffset;
}
}
class SegmentConstructionIterator {
private final int recordSize;
private final long fileSize;
long wordId;
long startOffset = 0;
long endOffset = 0;
private SegmentConstructionIterator(int recordSize) {
this.recordSize = recordSize;
this.fileSize = wordIds.size();
if (fileSize == 0) {
throw new IllegalArgumentException("Cannot construct zero-length word segment file");
}
this.wordId = wordIds.get(0);
}
private long i = 0;
public long idx() {
return i;
}
public boolean putNext(long size) {
if (i >= fileSize)
return false;
endOffset = startOffset + recordSize * size;
counts.set(i, size);
startOffset = endOffset;
endOffset = -1;
i++;
if (i == fileSize) {
// We've reached the end of the iteration and there is no
// "next" termId to fetch
wordId = Long.MIN_VALUE;
return false;
}
else {
wordId = wordIds.get(i);
return true;
}
}
public boolean canPutMore() {
return i < wordIds.size();
}
}
}

View File

@ -4,9 +4,9 @@ import it.unimi.dsi.fastutil.ints.IntList;
import nu.marginalia.array.page.LongQueryBuffer; import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor; import nu.marginalia.index.construction.PositionsFileConstructor;
import nu.marginalia.index.construction.ReversePreindex; import nu.marginalia.index.construction.full.FullPreindex;
import nu.marginalia.index.construction.TestJournalFactory; import nu.marginalia.index.construction.full.TestJournalFactory;
import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta; import nu.marginalia.index.construction.full.TestJournalFactory.EntryDataWithWordMeta;
import nu.marginalia.index.positions.PositionsFileReader; import nu.marginalia.index.positions.PositionsFileReader;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -19,7 +19,7 @@ import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static nu.marginalia.index.construction.TestJournalFactory.wm; import static nu.marginalia.index.construction.full.TestJournalFactory.wm;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
class ReverseIndexReaderTest { class ReverseIndexReaderTest {
@ -99,7 +99,7 @@ class ReverseIndexReaderTest {
Path wordsFile = tempDir.resolve("words.dat"); Path wordsFile = tempDir.resolve("words.dat");
try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) { try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) {
var preindex = ReversePreindex.constructPreindex(reader, var preindex = FullPreindex.constructPreindex(reader,
positionsFileConstructor, positionsFileConstructor,
DocIdRewriter.identity(), tempDir); DocIdRewriter.identity(), tempDir);
preindex.finalizeIndex(docsFile, wordsFile); preindex.finalizeIndex(docsFile, wordsFile);

View File

@ -1,5 +1,7 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -11,10 +13,10 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import static nu.marginalia.index.construction.TestJournalFactory.EntryData; import static nu.marginalia.index.construction.full.TestJournalFactory.EntryData;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
class ReversePreindexDocsTest { class FullPreindexDocsTest {
Path countsFile; Path countsFile;
Path wordsIdFile; Path wordsIdFile;
Path docsFile; Path docsFile;
@ -57,8 +59,8 @@ class ReversePreindexDocsTest {
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }), new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
@ -86,8 +88,8 @@ class ReversePreindexDocsTest {
new EntryData(-0xF00BA3L, 0, 4, 4) new EntryData(-0xF00BA3L, 0, 4, 4)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
new PositionsFileConstructor(positionsFile), new PositionsFileConstructor(positionsFile),
segments); segments);
@ -115,8 +117,8 @@ class ReversePreindexDocsTest {
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
new PositionsFileConstructor(positionsFile), new PositionsFileConstructor(positionsFile),
segments); segments);

View File

@ -1,8 +1,10 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.btree.model.BTreeHeader; import nu.marginalia.btree.model.BTreeHeader;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -12,11 +14,11 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.*; import java.util.*;
import static nu.marginalia.index.construction.TestJournalFactory.*; import static nu.marginalia.index.construction.full.TestJournalFactory.*;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
class ReversePreindexFinalizeTest { class FullPreindexFinalizeTest {
TestJournalFactory journalFactory; TestJournalFactory journalFactory;
Path positionsFile; Path positionsFile;
Path countsFile; Path countsFile;
@ -52,7 +54,7 @@ class ReversePreindexFinalizeTest {
@Test @Test
public void testFinalizeSimple() throws IOException { public void testFinalizeSimple() throws IOException {
var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51))); var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
var preindex = ReversePreindex.constructPreindex(reader, var preindex = FullPreindex.constructPreindex(reader,
new PositionsFileConstructor(positionsFile), new PositionsFileConstructor(positionsFile),
DocIdRewriter.identity(), tempDir); DocIdRewriter.identity(), tempDir);
@ -90,7 +92,7 @@ class ReversePreindexFinalizeTest {
new EntryDataWithWordMeta(101, 101, wm(51, 52)) new EntryDataWithWordMeta(101, 101, wm(51, 52))
); );
var preindex = ReversePreindex.constructPreindex(reader, var preindex = FullPreindex.constructPreindex(reader,
new PositionsFileConstructor(positionsFile), new PositionsFileConstructor(positionsFile),
DocIdRewriter.identity(), tempDir); DocIdRewriter.identity(), tempDir);

View File

@ -1,6 +1,8 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.PositionsFileConstructor;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -10,10 +12,10 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.*; import java.util.*;
import static nu.marginalia.index.construction.TestJournalFactory.*; import static nu.marginalia.index.construction.full.TestJournalFactory.*;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
class ReversePreindexMergeTest { class FullPreindexMergeTest {
TestJournalFactory journalFactory; TestJournalFactory journalFactory;
Path countsFile; Path countsFile;
Path wordsIdFile; Path wordsIdFile;
@ -46,19 +48,19 @@ class ReversePreindexMergeTest {
Files.delete(tempDir); Files.delete(tempDir);
} }
public ReversePreindex runMergeScenario( public FullPreindex runMergeScenario(
List<EntryDataWithWordMeta> leftData, List<EntryDataWithWordMeta> leftData,
List<EntryDataWithWordMeta> rightData List<EntryDataWithWordMeta> rightData
) throws IOException { ) throws IOException {
var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new)); var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new));
var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new)); var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new));
var left = ReversePreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); var left = FullPreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
var right = ReversePreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); var right = FullPreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
return ReversePreindex.merge(tempDir, left, right); return FullPreindex.merge(tempDir, left, right);
} }
private List<TestSegmentData> getData(ReversePreindex merged) { private List<TestSegmentData> getData(FullPreindex merged) {
var iter = merged.segments.iterator(2); var iter = merged.segments.iterator(2);
List<TestSegmentData> actual = new ArrayList<>(); List<TestSegmentData> actual = new ArrayList<>();
while (iter.next()) { while (iter.next()) {

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
@ -11,10 +11,10 @@ import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static nu.marginalia.index.construction.TestJournalFactory.*; import static nu.marginalia.index.construction.full.TestJournalFactory.*;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
class ReversePreindexWordSegmentsTest { class FullPreindexWordSegmentsTest {
Path countsFile; Path countsFile;
Path wordsIdFile; Path wordsIdFile;
Path docsFile; Path docsFile;
@ -51,7 +51,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 1L<<33) new EntryData(-0xF00BA3L, 0, 1L<<33)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -72,7 +72,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 5, 5) new EntryData(-0xF00BA3L, 0, 5, 5)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -94,7 +94,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -120,7 +120,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -148,7 +148,7 @@ class ReversePreindexWordSegmentsTest {
LongArray countsArray = LongArray.allocate(4); LongArray countsArray = LongArray.allocate(4);
wordsArray.set(0, -1, -2, -3, -4); wordsArray.set(0, -1, -2, -3, -4);
countsArray.set(0, 2, 1, 3, 5); countsArray.set(0, 2, 1, 3, 5);
var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null); var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null);
var ritr = segments.iterator(1); var ritr = segments.iterator(1);
assertTrue(ritr.hasMorePositions()); assertTrue(ritr.hasMorePositions());
@ -196,7 +196,7 @@ class ReversePreindexWordSegmentsTest {
LongArray wordsArray = LongArray.allocate(4); LongArray wordsArray = LongArray.allocate(4);
LongArray countsArray = LongArray.allocate(4); LongArray countsArray = LongArray.allocate(4);
wordsArray.set(0, -1, -2, -3, -4); wordsArray.set(0, -1, -2, -3, -4);
var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null); var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null);
var citr = segments.constructionIterator(1); var citr = segments.constructionIterator(1);
assertEquals(-1, citr.wordId); assertEquals(-1, citr.wordId);

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryData;
import nu.marginalia.index.journal.model.IndexJournalEntryHeader; import nu.marginalia.index.journal.model.IndexJournalEntryHeader;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.construction; package nu.marginalia.index.construction.full;
import java.util.Arrays; import java.util.Arrays;

View File

@ -3,13 +3,11 @@ package nu.marginalia.index;
import com.google.inject.Guice; import com.google.inject.Guice;
import com.google.inject.Inject; import com.google.inject.Inject;
import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongList;
import nu.marginalia.IndexLocations; import nu.marginalia.IndexLocations;
import nu.marginalia.array.page.LongQueryBuffer; import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.hash.MurmurHash3_128; import nu.marginalia.hash.MurmurHash3_128;
import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.construction.full.FullIndexConstructor;
import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.domainrankings.DomainRankings;
import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexFileNames;
@ -33,7 +31,6 @@ import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.process.control.FakeProcessHeartbeat; import nu.marginalia.process.control.FakeProcessHeartbeat;
import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.service.control.ServiceHeartbeat;
import nu.marginalia.service.server.Initialization; import nu.marginalia.service.server.Initialization;
import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.FileStorageService;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
@ -247,7 +244,7 @@ public class CombinedIndexReaderTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = var constructor =
new ReverseIndexConstructor( new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,
@ -267,7 +264,7 @@ public class CombinedIndexReaderTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,

View File

@ -14,7 +14,7 @@ import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.FileStorageService;
import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.construction.full.FullIndexConstructor;
import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexFileNames;
import nu.marginalia.index.journal.model.IndexJournalEntryHeader; import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -291,7 +291,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,
@ -313,7 +313,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,

View File

@ -7,13 +7,13 @@ import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.construction.full.FullIndexConstructor;
import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryData;
import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.FileStorageService;
import nu.marginalia.hash.MurmurHash3_128; import nu.marginalia.hash.MurmurHash3_128;
import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.ReverseIndexConstructor;
import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexFileNames;
import nu.marginalia.index.journal.model.IndexJournalEntryHeader; import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -493,7 +493,7 @@ public class IndexQueryServiceIntegrationTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = var constructor =
new ReverseIndexConstructor( new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,
@ -513,7 +513,7 @@ public class IndexQueryServiceIntegrationTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,

View File

@ -6,10 +6,11 @@ import com.google.inject.Inject;
import nu.marginalia.IndexLocations; import nu.marginalia.IndexLocations;
import nu.marginalia.ProcessConfiguration; import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule; import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.index.construction.prio.PrioIndexConstructor;
import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.domainrankings.DomainRankings;
import nu.marginalia.service.ProcessMainClass; import nu.marginalia.service.ProcessMainClass;
import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.FileStorageService;
import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.construction.full.FullIndexConstructor;
import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexFileNames;
import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.index.journal.reader.IndexJournalReader;
@ -117,7 +118,7 @@ public class IndexConstructorMain extends ProcessMainClass {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,
@ -142,7 +143,7 @@ public class IndexConstructorMain extends ProcessMainClass {
// important to the document. This filter will act on the encoded {@see WordMetadata} // important to the document. This filter will act on the encoded {@see WordMetadata}
LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter(); LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter();
var constructor = new ReverseIndexConstructor( var constructor = new PrioIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,

View File

@ -17,7 +17,7 @@ import nu.marginalia.functions.searchquery.QueryFactory;
import nu.marginalia.index.IndexGrpcService; import nu.marginalia.index.IndexGrpcService;
import nu.marginalia.index.ReverseIndexFullFileNames; import nu.marginalia.index.ReverseIndexFullFileNames;
import nu.marginalia.index.ReverseIndexPrioFileNames; import nu.marginalia.index.ReverseIndexPrioFileNames;
import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.construction.full.FullIndexConstructor;
import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.domainrankings.DomainRankings;
import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexFileNames;
@ -244,7 +244,7 @@ public class IntegrationTest {
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,
@ -269,7 +269,7 @@ public class IntegrationTest {
// important to the document. This filter will act on the encoded {@see WordMetadata} // important to the document. This filter will act on the encoded {@see WordMetadata}
LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter(); LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter();
var constructor = new ReverseIndexConstructor( var constructor = new FullIndexConstructor(
outputFileDocs, outputFileDocs,
outputFileWords, outputFileWords,
outputFilePositions, outputFilePositions,