diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java similarity index 73% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java index dd5499bf..ccf21331 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.array.LongArray; import nu.marginalia.array.algo.LongArrayTransformations; @@ -9,7 +9,7 @@ import java.io.IOException; import java.nio.channels.FileChannel; /** Constructs the BTrees in a reverse index */ -public class ReverseIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer { +public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer { private final BTreeWriter writer; private final FileChannel intermediateChannel; @@ -18,10 +18,10 @@ public class ReverseIndexBTreeTransformer implements LongArrayTransformations.Lo long start = 0; long writeOffset = 0; - public ReverseIndexBTreeTransformer(LongArray urlsFileMap, - int entrySize, - BTreeContext bTreeContext, - FileChannel intermediateChannel) { + public FullIndexBTreeTransformer(LongArray urlsFileMap, + int entrySize, + BTreeContext bTreeContext, + FileChannel intermediateChannel) { this.writer = new BTreeWriter(urlsFileMap, bTreeContext); this.entrySize = entrySize; this.intermediateChannel = intermediateChannel; diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexConstructor.java similarity index 74% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexConstructor.java index 9fa3ed93..db7d5604 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexConstructor.java @@ -1,6 +1,9 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import lombok.SneakyThrows; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.JournalReaderSource; +import nu.marginalia.index.construction.PositionsFileConstructor; import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.index.journal.IndexJournalFileNames; import org.slf4j.Logger; @@ -10,9 +13,9 @@ import java.io.IOException; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicInteger; -public class ReverseIndexConstructor { +public class FullIndexConstructor { - private static final Logger logger = LoggerFactory.getLogger(ReverseIndexConstructor.class); + private static final Logger logger = LoggerFactory.getLogger(FullIndexConstructor.class); public enum CreateReverseIndexSteps { CONSTRUCT, @@ -27,12 +30,12 @@ public class ReverseIndexConstructor { private final DocIdRewriter docIdRewriter; private final Path tmpDir; - public ReverseIndexConstructor(Path outputFileDocs, - Path outputFileWords, - Path outputFilePositions, - JournalReaderSource readerSource, - DocIdRewriter docIdRewriter, - Path tmpDir) { + public FullIndexConstructor(Path outputFileDocs, + Path outputFileWords, + Path outputFilePositions, + JournalReaderSource readerSource, + DocIdRewriter docIdRewriter, + Path tmpDir) { this.outputFileDocs = outputFileDocs; this.outputFileWords = outputFileWords; this.outputFilePositions = outputFilePositions; @@ -77,20 +80,20 @@ public class ReverseIndexConstructor { } @SneakyThrows - private ReversePreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) { - return ReversePreindex + private FullPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) { + return FullPreindex .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir) .closeToReference(); } @SneakyThrows - private ReversePreindexReference merge(ReversePreindexReference leftR, ReversePreindexReference rightR) { + private FullPreindexReference merge(FullPreindexReference leftR, FullPreindexReference rightR) { var left = leftR.open(); var right = rightR.open(); try { - return ReversePreindex.merge(tmpDir, left, right).closeToReference(); + return FullPreindex.merge(tmpDir, left, right).closeToReference(); } finally { left.delete(); @@ -101,7 +104,7 @@ public class ReverseIndexConstructor { } @SneakyThrows - private void finalizeIndex(ReversePreindexReference finalPR) { + private void finalizeIndex(FullPreindexReference finalPR) { var finalP = finalPR.open(); finalP.finalizeIndex(outputFileDocs, outputFileWords); finalP.delete(); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java similarity index 79% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java index 3abe8171..668263d8 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java @@ -1,9 +1,13 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; import nu.marginalia.btree.BTreeWriter; import nu.marginalia.index.ReverseIndexParameters; +import nu.marginalia.index.construction.CountToOffsetTransformer; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.IndexSizeEstimator; +import nu.marginalia.index.construction.PositionsFileConstructor; import nu.marginalia.index.journal.reader.IndexJournalReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,13 +29,13 @@ import static nu.marginalia.array.algo.TwoArrayOperations.*; * the union of their data. This operation requires no additional * RAM. */ -public class ReversePreindex { - final ReversePreindexWordSegments segments; - final ReversePreindexDocuments documents; +public class FullPreindex { + final FullPreindexWordSegments segments; + final FullPreindexDocuments documents; - private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class); + private static final Logger logger = LoggerFactory.getLogger(FullPreindex.class); - public ReversePreindex(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) { + public FullPreindex(FullPreindexWordSegments segments, FullPreindexDocuments documents) { this.segments = segments; this.documents = documents; } @@ -39,27 +43,27 @@ public class ReversePreindex { /** Constructs a new preindex with the data associated with reader. The backing files * will have randomly assigned names. */ - public static ReversePreindex constructPreindex(IndexJournalReader reader, - PositionsFileConstructor positionsFileConstructor, - DocIdRewriter docIdRewriter, - Path workDir) throws IOException + public static FullPreindex constructPreindex(IndexJournalReader reader, + PositionsFileConstructor positionsFileConstructor, + DocIdRewriter docIdRewriter, + Path workDir) throws IOException { Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat"); Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat"); Path docsFile = Files.createTempFile(workDir, "docs", ".dat"); - var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile); - var docs = ReversePreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments); - return new ReversePreindex(segments, docs); + var segments = FullPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile); + var docs = FullPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments); + return new FullPreindex(segments, docs); } /** Close the associated memory mapped areas and return * a dehydrated version of this object that can be re-opened * later. */ - public ReversePreindexReference closeToReference() { + public FullPreindexReference closeToReference() { try { - return new ReversePreindexReference(segments, documents); + return new FullPreindexReference(segments, documents); } finally { segments.force(); @@ -85,7 +89,7 @@ public class ReversePreindex { LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size); try (var intermediateDocChannel = documents.createDocumentsFileChannel()) { offsets.transformEachIO(0, offsets.size(), - new ReverseIndexBTreeTransformer(finalDocs, 2, + new FullIndexBTreeTransformer(finalDocs, 2, ReverseIndexParameters.docsBTreeContext, intermediateDocChannel)); intermediateDocChannel.force(false); @@ -126,11 +130,11 @@ public class ReversePreindex { documents.delete(); } - public static ReversePreindex merge(Path destDir, - ReversePreindex left, - ReversePreindex right) throws IOException { + public static FullPreindex merge(Path destDir, + FullPreindex left, + FullPreindex right) throws IOException { - ReversePreindexWordSegments mergingSegment = + FullPreindexWordSegments mergingSegment = createMergedSegmentWordFile(destDir, left.segments, right.segments); var mergingIter = mergingSegment.constructionIterator(2); @@ -198,18 +202,18 @@ public class ReversePreindex { mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize()); - return new ReversePreindex( + return new FullPreindex( mergingSegment, - new ReversePreindexDocuments(mergedDocuments, docsFile) + new FullPreindexDocuments(mergedDocuments, docsFile) ); } /** Create a segment word file with each word from both inputs, with zero counts for all the data. * This is an intermediate product in merging. */ - static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir, - ReversePreindexWordSegments left, - ReversePreindexWordSegments right) throws IOException { + static FullPreindexWordSegments createMergedSegmentWordFile(Path destDir, + FullPreindexWordSegments left, + FullPreindexWordSegments right) throws IOException { Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); @@ -228,7 +232,7 @@ public class ReversePreindex { LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize); - return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); + return new FullPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); } /** It's possible we overestimated the necessary size of the documents file, @@ -256,12 +260,12 @@ public class ReversePreindex { /** Merge contents of the segments indicated by leftIter and rightIter into the destionation * segment, and advance the construction iterator with the appropriate size. */ - private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter, - ReversePreindexWordSegments.SegmentIterator rightIter, - ReversePreindexDocuments left, - ReversePreindexDocuments right, + private static void mergeSegments(FullPreindexWordSegments.SegmentIterator leftIter, + FullPreindexWordSegments.SegmentIterator rightIter, + FullPreindexDocuments left, + FullPreindexDocuments right, LongArray dest, - ReversePreindexWordSegments.SegmentConstructionIterator destIter) + FullPreindexWordSegments.SegmentConstructionIterator destIter) { long segSize = mergeArrays2(dest, left.documents, @@ -279,10 +283,10 @@ public class ReversePreindex { /** Copy the data from the source segment at the position and length indicated by sourceIter, * into the destination segment, and advance the construction iterator. */ - private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter, - LongArray dest, - FileChannel sourceChannel, - ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { + private static boolean copySegment(FullPreindexWordSegments.SegmentIterator sourceIter, + LongArray dest, + FileChannel sourceChannel, + FullPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { long size = sourceIter.endOffset - sourceIter.startOffset; long start = mergingIter.startOffset; diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java similarity index 84% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java index d0d5ed7e..49442367 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java @@ -1,8 +1,10 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import lombok.SneakyThrows; import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.PositionsFileConstructor; import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.rwf.RandomFileAssembler; import org.slf4j.Logger; @@ -20,35 +22,35 @@ import java.util.concurrent.TimeUnit; /** A LongArray with document data, segmented according to * the associated ReversePreindexWordSegments data */ -public class ReversePreindexDocuments { +public class FullPreindexDocuments { public final LongArray documents; private static PositionsFileConstructor positionsFileConstructor; private static final int RECORD_SIZE_LONGS = 2; - private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class); + private static final Logger logger = LoggerFactory.getLogger(FullPreindexDocuments.class); public final Path file; - public ReversePreindexDocuments(LongArray documents, Path file) { + public FullPreindexDocuments(LongArray documents, Path file) { this.documents = documents; this.file = file; } - public static ReversePreindexDocuments construct( + public static FullPreindexDocuments construct( Path docsFile, Path workDir, IndexJournalReader reader, DocIdRewriter docIdRewriter, PositionsFileConstructor positionsFileConstructor, - ReversePreindexWordSegments segments) throws IOException { - ReversePreindexDocuments.positionsFileConstructor = positionsFileConstructor; + FullPreindexWordSegments segments) throws IOException { + FullPreindexDocuments.positionsFileConstructor = positionsFileConstructor; createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter); LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile); sortDocsFile(docsFileMap, segments); - return new ReversePreindexDocuments(docsFileMap, docsFile); + return new FullPreindexDocuments(docsFileMap, docsFile); } public FileChannel createDocumentsFileChannel() throws IOException { @@ -67,7 +69,7 @@ public class ReversePreindexDocuments { private static void createUnsortedDocsFile(Path docsFile, Path workDir, IndexJournalReader reader, - ReversePreindexWordSegments segments, + FullPreindexWordSegments segments, DocIdRewriter docIdRewriter) throws IOException { long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize(); @@ -99,7 +101,7 @@ public class ReversePreindexDocuments { } @SneakyThrows - private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException { + private static void sortDocsFile(LongArray docsFileMap, FullPreindexWordSegments segments) throws IOException { var iter = segments.iterator(RECORD_SIZE_LONGS); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexReference.java similarity index 62% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexReference.java index 16c542d5..9045b0c7 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexReference.java @@ -1,33 +1,33 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.array.LongArrayFactory; import java.io.IOException; import java.nio.file.Path; -/** This is a dehydrated version of a ReversePreIndex, that only +/** This is a dehydrated version of a FullPreIndex, that only * keeps references to its location on disk but does not hold associated * memory maps. */ -public record ReversePreindexReference( +public record FullPreindexReference( Path wordsFile, Path countsFile, Path documentsFile ) { - public ReversePreindexReference(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) { + public FullPreindexReference(FullPreindexWordSegments segments, FullPreindexDocuments documents) { this(segments.wordsFile, segments.countsFile, documents.file); } - public ReversePreindex open() throws IOException { - return new ReversePreindex( - new ReversePreindexWordSegments( + public FullPreindex open() throws IOException { + return new FullPreindex( + new FullPreindexWordSegments( LongArrayFactory.mmapForModifyingShared(wordsFile), LongArrayFactory.mmapForModifyingShared(countsFile), wordsFile, countsFile ), - new ReversePreindexDocuments( + new FullPreindexDocuments( LongArrayFactory.mmapForModifyingShared(documentsFile), documentsFile ) diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java similarity index 89% rename from code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java index 0351ed45..eb744616 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; @@ -14,17 +14,17 @@ import java.nio.file.Path; /** A pair of file-backed arrays of sorted wordIds * and the count of documents associated with each termId. */ -public class ReversePreindexWordSegments { +public class FullPreindexWordSegments { public final LongArray wordIds; public final LongArray counts; final Path wordsFile; final Path countsFile; - public ReversePreindexWordSegments(LongArray wordIds, - LongArray counts, - Path wordsFile, - Path countsFile) + public FullPreindexWordSegments(LongArray wordIds, + LongArray counts, + Path wordsFile, + Path countsFile) { assert wordIds.size() == counts.size(); @@ -51,9 +51,9 @@ public class ReversePreindexWordSegments { return ret; } - public static ReversePreindexWordSegments construct(IndexJournalReader reader, - Path wordIdsFile, - Path countsFile) + public static FullPreindexWordSegments construct(IndexJournalReader reader, + Path wordIdsFile, + Path countsFile) throws IOException { Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); @@ -79,7 +79,7 @@ public class ReversePreindexWordSegments { counts.set(i, countsMap.get(words.get(i))); } - return new ReversePreindexWordSegments(words, counts, wordIdsFile, countsFile); + return new FullPreindexWordSegments(words, counts, wordIdsFile, countsFile); } public SegmentIterator iterator(int recordSize) { diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexBTreeTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexBTreeTransformer.java new file mode 100644 index 00000000..d402405a --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexBTreeTransformer.java @@ -0,0 +1,48 @@ +package nu.marginalia.index.construction.prio; + +import nu.marginalia.array.LongArray; +import nu.marginalia.array.algo.LongArrayTransformations; +import nu.marginalia.btree.BTreeWriter; +import nu.marginalia.btree.model.BTreeContext; + +import java.io.IOException; +import java.nio.channels.FileChannel; + +/** Constructs the BTrees in a reverse index */ +public class PrioIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer { + private final BTreeWriter writer; + private final FileChannel intermediateChannel; + + private final int entrySize; + + long start = 0; + long writeOffset = 0; + + public PrioIndexBTreeTransformer(LongArray urlsFileMap, + int entrySize, + BTreeContext bTreeContext, + FileChannel intermediateChannel) { + this.writer = new BTreeWriter(urlsFileMap, bTreeContext); + this.entrySize = entrySize; + this.intermediateChannel = intermediateChannel; + } + + @Override + public long transform(long pos, long end) throws IOException { + + final int size = (int) ((end - start) / entrySize); + + if (size == 0) { + return -1; + } + + final long offsetForBlock = writeOffset; + + writeOffset += writer.write(writeOffset, size, + mapRegion -> mapRegion.transferFrom(intermediateChannel, start, 0, end - start) + ); + + start = end; + return offsetForBlock; + } +} diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java new file mode 100644 index 00000000..4cad80b9 --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java @@ -0,0 +1,114 @@ +package nu.marginalia.index.construction.prio; + +import lombok.SneakyThrows; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.JournalReaderSource; +import nu.marginalia.index.construction.PositionsFileConstructor; +import nu.marginalia.index.journal.IndexJournalFileNames; +import nu.marginalia.process.control.ProcessHeartbeat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; + +public class PrioIndexConstructor { + + private static final Logger logger = LoggerFactory.getLogger(PrioIndexConstructor.class); + + public enum CreateReverseIndexSteps { + CONSTRUCT, + FINALIZE, + FINISHED + } + + private final Path outputFileDocs; + private final Path outputFileWords; + private final Path outputFilePositions; + private final JournalReaderSource readerSource; + private final DocIdRewriter docIdRewriter; + private final Path tmpDir; + + public PrioIndexConstructor(Path outputFileDocs, + Path outputFileWords, + Path outputFilePositions, + JournalReaderSource readerSource, + DocIdRewriter docIdRewriter, + Path tmpDir) { + this.outputFileDocs = outputFileDocs; + this.outputFileWords = outputFileWords; + this.outputFilePositions = outputFilePositions; + this.readerSource = readerSource; + this.docIdRewriter = docIdRewriter; + this.tmpDir = tmpDir; + } + + public void createReverseIndex(ProcessHeartbeat processHeartbeat, + String processName, + Path sourceBaseDir) throws IOException + { + var inputs = IndexJournalFileNames.findJournalFiles(sourceBaseDir); + if (inputs.isEmpty()) { + logger.error("No journal files in base dir {}", sourceBaseDir); + return; + } + + try (var heartbeat = processHeartbeat.createProcessTaskHeartbeat(CreateReverseIndexSteps.class, processName); + var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes"); + var posConstructor = new PositionsFileConstructor(outputFilePositions) + ) { + heartbeat.progress(CreateReverseIndexSteps.CONSTRUCT); + + AtomicInteger progress = new AtomicInteger(0); + + inputs + .parallelStream() + .map(in -> { + preindexHeartbeat.progress("PREINDEX/MERGE", progress.incrementAndGet(), inputs.size()); + return construct(in, posConstructor); + }) + .reduce(this::merge) + .ifPresent((index) -> { + heartbeat.progress(CreateReverseIndexSteps.FINALIZE); + finalizeIndex(index); + heartbeat.progress(CreateReverseIndexSteps.FINISHED); + }); + + heartbeat.progress(CreateReverseIndexSteps.FINISHED); + } + } + + @SneakyThrows + private PrioPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) { + return PrioPreindex + .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir) + .closeToReference(); + } + + @SneakyThrows + private PrioPreindexReference merge(PrioPreindexReference leftR, PrioPreindexReference rightR) { + + var left = leftR.open(); + var right = rightR.open(); + + try { + return PrioPreindex.merge(tmpDir, left, right).closeToReference(); + } + finally { + left.delete(); + right.delete(); + } + + + } + + @SneakyThrows + private void finalizeIndex(PrioPreindexReference finalPR) { + var finalP = finalPR.open(); + finalP.finalizeIndex(outputFileDocs, outputFileWords); + finalP.delete(); + } + + +} diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java new file mode 100644 index 00000000..f5449231 --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java @@ -0,0 +1,310 @@ +package nu.marginalia.index.construction.prio; + +import nu.marginalia.array.LongArray; +import nu.marginalia.array.LongArrayFactory; +import nu.marginalia.btree.BTreeWriter; +import nu.marginalia.index.ReverseIndexParameters; +import nu.marginalia.index.construction.CountToOffsetTransformer; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.IndexSizeEstimator; +import nu.marginalia.index.construction.PositionsFileConstructor; +import nu.marginalia.index.journal.reader.IndexJournalReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +import static nu.marginalia.array.algo.TwoArrayOperations.*; + +/** Contains the data that would go into a reverse index, + * that is, a mapping from words to documents, minus the actual + * index structure that makes the data quick to access while + * searching. + *

+ * Two preindexes can be merged into a third preindex containing + * the union of their data. This operation requires no additional + * RAM. + */ +public class PrioPreindex { + final PrioPreindexWordSegments segments; + final PrioPreindexDocuments documents; + + private static final Logger logger = LoggerFactory.getLogger(PrioPreindex.class); + + public PrioPreindex(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) { + this.segments = segments; + this.documents = documents; + } + + /** Constructs a new preindex with the data associated with reader. The backing files + * will have randomly assigned names. + */ + public static PrioPreindex constructPreindex(IndexJournalReader reader, + PositionsFileConstructor positionsFileConstructor, + DocIdRewriter docIdRewriter, + Path workDir) throws IOException + { + Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat"); + Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat"); + Path docsFile = Files.createTempFile(workDir, "docs", ".dat"); + + var segments = PrioPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile); + var docs = PrioPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments); + return new PrioPreindex(segments, docs); + } + + /** Close the associated memory mapped areas and return + * a dehydrated version of this object that can be re-opened + * later. + */ + public PrioPreindexReference closeToReference() { + try { + return new PrioPreindexReference(segments, documents); + } + finally { + segments.force(); + documents.force(); + segments.close(); + documents.close(); + } + } + + /** Transform the preindex into a reverse index */ + public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException { + var offsets = segments.counts; + + Files.deleteIfExists(outputFileDocs); + Files.deleteIfExists(outputFileWords); + + // Estimate the size of the docs index data + offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2)); + IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2); + offsets.fold(0, 0, offsets.size(), sizeEstimator); + + // Write the docs file + LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size); + try (var intermediateDocChannel = documents.createDocumentsFileChannel()) { + offsets.transformEachIO(0, offsets.size(), + new PrioIndexBTreeTransformer(finalDocs, 2, + ReverseIndexParameters.docsBTreeContext, + intermediateDocChannel)); + intermediateDocChannel.force(false); + } + + LongArray wordIds = segments.wordIds; + + if (offsets.size() != wordIds.size()) + throw new IllegalStateException("Offsets and word-ids of different size"); + if (offsets.size() > Integer.MAX_VALUE) { + throw new IllegalStateException("offsets.size() too big!"); + } + + // Estimate the size of the words index data + long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size()); + + // Construct the tree + LongArray wordsArray = LongArrayFactory.mmapForWritingConfined(outputFileWords, wordsSize); + + new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext) + .write(0, (int) offsets.size(), mapRegion -> { + for (long i = 0; i < offsets.size(); i++) { + mapRegion.set(2*i, wordIds.get(i)); + mapRegion.set(2*i + 1, offsets.get(i)); + } + }); + + finalDocs.force(); + finalDocs.close(); + wordsArray.force(); + wordsArray.close(); + + } + + /** Delete all files associated with this pre-index */ + public void delete() throws IOException { + segments.delete(); + documents.delete(); + } + + public static PrioPreindex merge(Path destDir, + PrioPreindex left, + PrioPreindex right) throws IOException { + + PrioPreindexWordSegments mergingSegment = + createMergedSegmentWordFile(destDir, left.segments, right.segments); + + var mergingIter = mergingSegment.constructionIterator(2); + var leftIter = left.segments.iterator(2); + var rightIter = right.segments.iterator(2); + + Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); + + LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, left.documents.size() + right.documents.size()); + + leftIter.next(); + rightIter.next(); + + try (FileChannel leftChannel = left.documents.createDocumentsFileChannel(); + FileChannel rightChannel = right.documents.createDocumentsFileChannel()) + { + + while (mergingIter.canPutMore() + && leftIter.isPositionBeforeEnd() + && rightIter.isPositionBeforeEnd()) + { + final long currentWord = mergingIter.wordId; + + if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) + { + // both inputs have documents for the current word + mergeSegments(leftIter, rightIter, + left.documents, right.documents, + mergedDocuments, mergingIter); + } + else if (leftIter.wordId == currentWord) { + if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)) + break; + } + else if (rightIter.wordId == currentWord) { + if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)) + break; + } + else assert false : "This should never happen"; // the helvetica scenario + } + + if (leftIter.isPositionBeforeEnd()) { + while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)); + } + + if (rightIter.isPositionBeforeEnd()) { + while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)); + } + + } + + if (leftIter.isPositionBeforeEnd()) + throw new IllegalStateException("Left has more to go"); + if (rightIter.isPositionBeforeEnd()) + throw new IllegalStateException("Right has more to go"); + if (mergingIter.canPutMore()) + throw new IllegalStateException("Source iters ran dry before merging iter"); + + + mergingSegment.force(); + + // We may have overestimated the size of the merged docs size in the case there were + // duplicates in the data, so we need to shrink it to the actual size we wrote. + + mergedDocuments = shrinkMergedDocuments(mergedDocuments, + docsFile, 2 * mergingSegment.totalSize()); + + return new PrioPreindex( + mergingSegment, + new PrioPreindexDocuments(mergedDocuments, docsFile) + ); + } + + /** Create a segment word file with each word from both inputs, with zero counts for all the data. + * This is an intermediate product in merging. + */ + static PrioPreindexWordSegments createMergedSegmentWordFile(Path destDir, + PrioPreindexWordSegments left, + PrioPreindexWordSegments right) throws IOException { + Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); + Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); + + // We need total size to request a direct LongArray range. Seems slower, but is faster. + // ... see LongArray.directRangeIfPossible(long start, long end) + long segmentsSize = countDistinctElements(left.wordIds, right.wordIds, + 0, left.wordIds.size(), + 0, right.wordIds.size()); + + LongArray wordIdsFile = LongArrayFactory.mmapForWritingConfined(segmentWordsFile, segmentsSize); + + mergeArrays(wordIdsFile, left.wordIds, right.wordIds, + 0, + 0, left.wordIds.size(), + 0, right.wordIds.size()); + + LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize); + + return new PrioPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); + } + + /** It's possible we overestimated the necessary size of the documents file, + * this will permit us to shrink it down to the smallest necessary size. + */ + private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException { + + mergedDocuments.force(); + + long beforeSize = mergedDocuments.size(); + long afterSize = sizeLongs * 8; + if (beforeSize != afterSize) { + mergedDocuments.close(); + try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) { + bc.truncate(sizeLongs * 8); + } + + logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize); + mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs); + } + + return mergedDocuments; + } + + /** Merge contents of the segments indicated by leftIter and rightIter into the destionation + * segment, and advance the construction iterator with the appropriate size. + */ + private static void mergeSegments(PrioPreindexWordSegments.SegmentIterator leftIter, + PrioPreindexWordSegments.SegmentIterator rightIter, + PrioPreindexDocuments left, + PrioPreindexDocuments right, + LongArray dest, + PrioPreindexWordSegments.SegmentConstructionIterator destIter) + { + long segSize = mergeArrays2(dest, + left.documents, + right.documents, + destIter.startOffset, + leftIter.startOffset, leftIter.endOffset, + rightIter.startOffset, rightIter.endOffset); + + long distinct = segSize / 2; + destIter.putNext(distinct); + leftIter.next(); + rightIter.next(); + } + + /** Copy the data from the source segment at the position and length indicated by sourceIter, + * into the destination segment, and advance the construction iterator. + */ + private static boolean copySegment(PrioPreindexWordSegments.SegmentIterator sourceIter, + LongArray dest, + FileChannel sourceChannel, + PrioPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { + + long size = sourceIter.endOffset - sourceIter.startOffset; + long start = mergingIter.startOffset; + long end = start + size; + + dest.transferFrom(sourceChannel, + sourceIter.startOffset, + mergingIter.startOffset, + end); + + boolean putNext = mergingIter.putNext(size / 2); + boolean iterNext = sourceIter.next(); + + if (!putNext && iterNext) + throw new IllegalStateException("Source iterator ran out before dest iterator?!"); + + return iterNext; + } + + +} diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java new file mode 100644 index 00000000..03edb4b4 --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java @@ -0,0 +1,141 @@ +package nu.marginalia.index.construction.prio; + +import lombok.SneakyThrows; +import nu.marginalia.array.LongArray; +import nu.marginalia.array.LongArrayFactory; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.PositionsFileConstructor; +import nu.marginalia.index.journal.reader.IndexJournalReader; +import nu.marginalia.rwf.RandomFileAssembler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** A LongArray with document data, segmented according to + * the associated ReversePreindexWordSegments data + */ +public class PrioPreindexDocuments { + public final LongArray documents; + + private static PositionsFileConstructor positionsFileConstructor; + private static final int RECORD_SIZE_LONGS = 2; + private static final Logger logger = LoggerFactory.getLogger(PrioPreindexDocuments.class); + + public final Path file; + + public PrioPreindexDocuments(LongArray documents, Path file) { + this.documents = documents; + this.file = file; + } + + public static PrioPreindexDocuments construct( + Path docsFile, + Path workDir, + IndexJournalReader reader, + DocIdRewriter docIdRewriter, + PositionsFileConstructor positionsFileConstructor, + PrioPreindexWordSegments segments) throws IOException { + PrioPreindexDocuments.positionsFileConstructor = positionsFileConstructor; + + createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter); + + LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile); + sortDocsFile(docsFileMap, segments); + + return new PrioPreindexDocuments(docsFileMap, docsFile); + } + + public FileChannel createDocumentsFileChannel() throws IOException { + return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ); + } + + + public LongArray slice(long start, long end) { + return documents.range(start, end); + } + + public long size() { + return documents.size(); + } + + private static void createUnsortedDocsFile(Path docsFile, + Path workDir, + IndexJournalReader reader, + PrioPreindexWordSegments segments, + DocIdRewriter docIdRewriter) throws IOException { + + long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize(); + + try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs); + var pointer = reader.newPointer()) + { + + var offsetMap = segments.asMap(RECORD_SIZE_LONGS); + offsetMap.defaultReturnValue(0); + + while (pointer.nextDocument()) { + long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId()); + for (var termData : pointer) { + long termId = termData.termId(); + + long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS); + + // write position data to the positions file and get the offset + long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positionsBuffer()); + + assembly.put(offset + 0, rankEncodedId); + assembly.put(offset + 1, encodedPosOffset); + } + } + + assembly.write(docsFile); + } + } + + @SneakyThrows + private static void sortDocsFile(LongArray docsFileMap, PrioPreindexWordSegments segments) throws IOException { + + var iter = segments.iterator(RECORD_SIZE_LONGS); + + ExecutorService sortingWorkers = Executors.newWorkStealingPool(Runtime.getRuntime().availableProcessors()); + + while (iter.next()) { + long iterStart = iter.startOffset; + long iterEnd = iter.endOffset; + + if (iter.size() < 1024) { + docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd); + } + else { + sortingWorkers.execute(() -> + docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd)); + } + } + + sortingWorkers.shutdown(); + while (!sortingWorkers.awaitTermination(1, TimeUnit.HOURS)); + + sortingWorkers.close(); + } + + public void delete() throws IOException { + Files.delete(this.file); + documents.close(); + } + + public void close() { + documents.close(); + } + + public void force() { + documents.force(); + } +} diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexReference.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexReference.java new file mode 100644 index 00000000..10b590dd --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexReference.java @@ -0,0 +1,36 @@ +package nu.marginalia.index.construction.prio; + +import nu.marginalia.array.LongArrayFactory; + +import java.io.IOException; +import java.nio.file.Path; + +/** This is a dehydrated version of a PrioPreIndex, that only + * keeps references to its location on disk but does not hold associated + * memory maps. + */ +public record PrioPreindexReference( + Path wordsFile, + Path countsFile, + Path documentsFile +) +{ + public PrioPreindexReference(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) { + this(segments.wordsFile, segments.countsFile, documents.file); + } + + public PrioPreindex open() throws IOException { + return new PrioPreindex( + new PrioPreindexWordSegments( + LongArrayFactory.mmapForModifyingShared(wordsFile), + LongArrayFactory.mmapForModifyingShared(countsFile), + wordsFile, + countsFile + ), + new PrioPreindexDocuments( + LongArrayFactory.mmapForModifyingShared(documentsFile), + documentsFile + ) + ); + } +} diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java new file mode 100644 index 00000000..512f10ff --- /dev/null +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java @@ -0,0 +1,205 @@ +package nu.marginalia.index.construction.prio; + +import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; +import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; +import it.unimi.dsi.fastutil.longs.LongIterator; +import nu.marginalia.array.LongArray; +import nu.marginalia.array.LongArrayFactory; +import nu.marginalia.index.journal.reader.IndexJournalReader; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +/** A pair of file-backed arrays of sorted wordIds + * and the count of documents associated with each termId. + */ +public class PrioPreindexWordSegments { + public final LongArray wordIds; + public final LongArray counts; + + final Path wordsFile; + final Path countsFile; + + public PrioPreindexWordSegments(LongArray wordIds, + LongArray counts, + Path wordsFile, + Path countsFile) + { + assert wordIds.size() == counts.size(); + + this.wordIds = wordIds; + this.counts = counts; + this.wordsFile = wordsFile; + this.countsFile = countsFile; + } + + /** Returns a long-long hash map where each key is a termId, + * and each value is the start offset of the data. + */ + public Long2LongOpenHashMap asMap(int recordSize) { + if (wordIds.size() > Integer.MAX_VALUE) + throw new IllegalArgumentException("Cannot create a map with more than Integer.MAX_VALUE entries"); + + Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f); + var iter = iterator(recordSize); + + while (iter.next()) { + ret.put(iter.wordId, iter.startOffset); + } + + return ret; + } + + public static PrioPreindexWordSegments construct(IndexJournalReader reader, + Path wordIdsFile, + Path countsFile) + throws IOException + { + Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); + countsMap.defaultReturnValue(0); + reader.forEachWordId(wordId -> countsMap.addTo(wordId, 1)); + + LongArray words = LongArrayFactory.mmapForWritingConfined(wordIdsFile, countsMap.size()); + LongArray counts = LongArrayFactory.mmapForWritingConfined(countsFile, countsMap.size()); + + // Create the words file by iterating over the map and inserting them into + // the words file in whatever bizarro hash table order they appear in + long i = 0; + LongIterator iter = countsMap.keySet().iterator(); + while (iter.hasNext()) { + words.set(i++, iter.nextLong()); + } + + // Sort the words file + words.sort(0, counts.size()); + + // Populate the counts + for (i = 0; i < countsMap.size(); i++) { + counts.set(i, countsMap.get(words.get(i))); + } + + return new PrioPreindexWordSegments(words, counts, wordIdsFile, countsFile); + } + + public SegmentIterator iterator(int recordSize) { + return new SegmentIterator(recordSize); + } + public SegmentConstructionIterator constructionIterator(int recordSize) { + return new SegmentConstructionIterator(recordSize); + } + + public long totalSize() { + return counts.fold(0, 0, counts.size(), Long::sum); + } + + public void delete() throws IOException { + Files.delete(countsFile); + Files.delete(wordsFile); + + counts.close(); + wordIds.close(); + } + + public void force() { + counts.force(); + wordIds.force(); + } + + public void close() { + wordIds.close(); + counts.close(); + } + + public class SegmentIterator { + private final int recordSize; + private final long fileSize; + long wordId; + long startOffset = 0; + long endOffset = 0; + + private SegmentIterator(int recordSize) { + this.recordSize = recordSize; + this.fileSize = wordIds.size(); + } + + private long i = -1; + public long idx() { + return i; + } + public boolean next() { + if (++i >= fileSize) { + wordId = Long.MIN_VALUE; + return false; + } + + wordId = wordIds.get(i); + startOffset = endOffset; + endOffset = startOffset + recordSize * counts.get(i); + + return true; + } + + public boolean hasMorePositions() { + return i + 1 < wordIds.size(); + } + + public boolean isPositionBeforeEnd() { + return i < wordIds.size(); + } + + public long size() { + return endOffset - startOffset; + } + } + + class SegmentConstructionIterator { + private final int recordSize; + private final long fileSize; + long wordId; + long startOffset = 0; + long endOffset = 0; + + private SegmentConstructionIterator(int recordSize) { + this.recordSize = recordSize; + this.fileSize = wordIds.size(); + if (fileSize == 0) { + throw new IllegalArgumentException("Cannot construct zero-length word segment file"); + } + this.wordId = wordIds.get(0); + } + + private long i = 0; + public long idx() { + return i; + } + + public boolean putNext(long size) { + + if (i >= fileSize) + return false; + + endOffset = startOffset + recordSize * size; + counts.set(i, size); + startOffset = endOffset; + endOffset = -1; + + i++; + + if (i == fileSize) { + // We've reached the end of the iteration and there is no + // "next" termId to fetch + wordId = Long.MIN_VALUE; + return false; + } + else { + wordId = wordIds.get(i); + return true; + } + } + + public boolean canPutMore() { + return i < wordIds.size(); + } + } +} diff --git a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java index 2d53dd2e..5047da90 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java @@ -4,9 +4,9 @@ import it.unimi.dsi.fastutil.ints.IntList; import nu.marginalia.array.page.LongQueryBuffer; import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.PositionsFileConstructor; -import nu.marginalia.index.construction.ReversePreindex; -import nu.marginalia.index.construction.TestJournalFactory; -import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta; +import nu.marginalia.index.construction.full.FullPreindex; +import nu.marginalia.index.construction.full.TestJournalFactory; +import nu.marginalia.index.construction.full.TestJournalFactory.EntryDataWithWordMeta; import nu.marginalia.index.positions.PositionsFileReader; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -19,7 +19,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import static nu.marginalia.index.construction.TestJournalFactory.wm; +import static nu.marginalia.index.construction.full.TestJournalFactory.wm; import static org.junit.jupiter.api.Assertions.*; class ReverseIndexReaderTest { @@ -99,7 +99,7 @@ class ReverseIndexReaderTest { Path wordsFile = tempDir.resolve("words.dat"); try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) { - var preindex = ReversePreindex.constructPreindex(reader, + var preindex = FullPreindex.constructPreindex(reader, positionsFileConstructor, DocIdRewriter.identity(), tempDir); preindex.finalizeIndex(docsFile, wordsFile); diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexDocsTest.java similarity index 86% rename from code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexDocsTest.java index df378228..a5c87f0f 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexDocsTest.java @@ -1,5 +1,7 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.PositionsFileConstructor; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -11,10 +13,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import static nu.marginalia.index.construction.TestJournalFactory.EntryData; +import static nu.marginalia.index.construction.full.TestJournalFactory.EntryData; import static org.junit.jupiter.api.Assertions.assertEquals; -class ReversePreindexDocsTest { +class FullPreindexDocsTest { Path countsFile; Path wordsIdFile; Path docsFile; @@ -57,8 +59,8 @@ class ReversePreindexDocsTest { new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); List expected = List.of( new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }), @@ -86,8 +88,8 @@ class ReversePreindexDocsTest { new EntryData(-0xF00BA3L, 0, 4, 4) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); @@ -115,8 +117,8 @@ class ReversePreindexDocsTest { new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java similarity index 91% rename from code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java index e10c2c27..411f2cdc 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java @@ -1,8 +1,10 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.array.LongArrayFactory; import nu.marginalia.btree.model.BTreeHeader; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.PositionsFileConstructor; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -12,11 +14,11 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.*; -import static nu.marginalia.index.construction.TestJournalFactory.*; +import static nu.marginalia.index.construction.full.TestJournalFactory.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -class ReversePreindexFinalizeTest { +class FullPreindexFinalizeTest { TestJournalFactory journalFactory; Path positionsFile; Path countsFile; @@ -52,7 +54,7 @@ class ReversePreindexFinalizeTest { @Test public void testFinalizeSimple() throws IOException { var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51))); - var preindex = ReversePreindex.constructPreindex(reader, + var preindex = FullPreindex.constructPreindex(reader, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); @@ -90,7 +92,7 @@ class ReversePreindexFinalizeTest { new EntryDataWithWordMeta(101, 101, wm(51, 52)) ); - var preindex = ReversePreindex.constructPreindex(reader, + var preindex = FullPreindex.constructPreindex(reader, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexMergeTest.java similarity index 95% rename from code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexMergeTest.java index 2bfa6556..6abe612b 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexMergeTest.java @@ -1,6 +1,8 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; +import nu.marginalia.index.construction.DocIdRewriter; +import nu.marginalia.index.construction.PositionsFileConstructor; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -10,10 +12,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.*; -import static nu.marginalia.index.construction.TestJournalFactory.*; +import static nu.marginalia.index.construction.full.TestJournalFactory.*; import static org.junit.jupiter.api.Assertions.assertEquals; -class ReversePreindexMergeTest { +class FullPreindexMergeTest { TestJournalFactory journalFactory; Path countsFile; Path wordsIdFile; @@ -46,19 +48,19 @@ class ReversePreindexMergeTest { Files.delete(tempDir); } - public ReversePreindex runMergeScenario( + public FullPreindex runMergeScenario( List leftData, List rightData ) throws IOException { var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new)); var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new)); - var left = ReversePreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); - var right = ReversePreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); - return ReversePreindex.merge(tempDir, left, right); + var left = FullPreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); + var right = FullPreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); + return FullPreindex.merge(tempDir, left, right); } - private List getData(ReversePreindex merged) { + private List getData(FullPreindex merged) { var iter = merged.segments.iterator(2); List actual = new ArrayList<>(); while (iter.next()) { diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexWordSegmentsTest.java similarity index 90% rename from code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexWordSegmentsTest.java index 0ad3205a..72c13207 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexWordSegmentsTest.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.array.LongArray; import org.junit.jupiter.api.AfterEach; @@ -11,10 +11,10 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import static nu.marginalia.index.construction.TestJournalFactory.*; +import static nu.marginalia.index.construction.full.TestJournalFactory.*; import static org.junit.jupiter.api.Assertions.*; -class ReversePreindexWordSegmentsTest { +class FullPreindexWordSegmentsTest { Path countsFile; Path wordsIdFile; Path docsFile; @@ -51,7 +51,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 1L<<33) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -72,7 +72,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 5, 5) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -94,7 +94,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -120,7 +120,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -148,7 +148,7 @@ class ReversePreindexWordSegmentsTest { LongArray countsArray = LongArray.allocate(4); wordsArray.set(0, -1, -2, -3, -4); countsArray.set(0, 2, 1, 3, 5); - var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null); + var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null); var ritr = segments.iterator(1); assertTrue(ritr.hasMorePositions()); @@ -196,7 +196,7 @@ class ReversePreindexWordSegmentsTest { LongArray wordsArray = LongArray.allocate(4); LongArray countsArray = LongArray.allocate(4); wordsArray.set(0, -1, -2, -3, -4); - var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null); + var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null); var citr = segments.constructionIterator(1); assertEquals(-1, citr.wordId); diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestJournalFactory.java similarity index 98% rename from code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/TestJournalFactory.java index a4c15305..48bd8bc0 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestJournalFactory.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/TestSegmentData.java b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestSegmentData.java similarity index 96% rename from code/index/index-reverse/test/nu/marginalia/index/construction/TestSegmentData.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/full/TestSegmentData.java index 574bb61a..f37b5975 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestSegmentData.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestSegmentData.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.construction; +package nu.marginalia.index.construction.full; import java.util.Arrays; diff --git a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java index cd23261e..bce2a436 100644 --- a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java +++ b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java @@ -3,13 +3,11 @@ package nu.marginalia.index; import com.google.inject.Guice; import com.google.inject.Inject; import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongList; import nu.marginalia.IndexLocations; import nu.marginalia.array.page.LongQueryBuffer; import nu.marginalia.hash.MurmurHash3_128; import nu.marginalia.index.construction.DocIdRewriter; -import nu.marginalia.index.construction.ReverseIndexConstructor; +import nu.marginalia.index.construction.full.FullIndexConstructor; import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; @@ -33,7 +31,6 @@ import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.process.control.FakeProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.sequence.GammaCodedSequence; -import nu.marginalia.service.control.ServiceHeartbeat; import nu.marginalia.service.server.Initialization; import nu.marginalia.storage.FileStorageService; import org.junit.jupiter.api.AfterEach; @@ -247,7 +244,7 @@ public class CombinedIndexReaderTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); var constructor = - new ReverseIndexConstructor( + new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, @@ -267,7 +264,7 @@ public class CombinedIndexReaderTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, diff --git a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java index fe6f4354..9d334c2e 100644 --- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java @@ -14,7 +14,7 @@ import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.storage.FileStorageService; import nu.marginalia.index.construction.DocIdRewriter; -import nu.marginalia.index.construction.ReverseIndexConstructor; +import nu.marginalia.index.construction.full.FullIndexConstructor; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; @@ -291,7 +291,7 @@ public class IndexQueryServiceIntegrationSmokeTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, @@ -313,7 +313,7 @@ public class IndexQueryServiceIntegrationSmokeTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, diff --git a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java index 2662ed6b..2a24e350 100644 --- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java @@ -7,13 +7,13 @@ import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint; import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.index.construction.full.FullIndexConstructor; import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.storage.FileStorageService; import nu.marginalia.hash.MurmurHash3_128; import nu.marginalia.index.construction.DocIdRewriter; -import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; @@ -493,7 +493,7 @@ public class IndexQueryServiceIntegrationTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); var constructor = - new ReverseIndexConstructor( + new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, @@ -513,7 +513,7 @@ public class IndexQueryServiceIntegrationTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, diff --git a/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java index eac907eb..96b53799 100644 --- a/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java +++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java @@ -6,10 +6,11 @@ import com.google.inject.Inject; import nu.marginalia.IndexLocations; import nu.marginalia.ProcessConfiguration; import nu.marginalia.ProcessConfigurationModule; +import nu.marginalia.index.construction.prio.PrioIndexConstructor; import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.service.ProcessMainClass; import nu.marginalia.storage.FileStorageService; -import nu.marginalia.index.construction.ReverseIndexConstructor; +import nu.marginalia.index.construction.full.FullIndexConstructor; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.journal.reader.IndexJournalReader; @@ -117,7 +118,7 @@ public class IndexConstructorMain extends ProcessMainClass { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, @@ -142,7 +143,7 @@ public class IndexConstructorMain extends ProcessMainClass { // important to the document. This filter will act on the encoded {@see WordMetadata} LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter(); - var constructor = new ReverseIndexConstructor( + var constructor = new PrioIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, diff --git a/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java b/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java index 5428ccec..2cd178f2 100644 --- a/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java +++ b/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java @@ -17,7 +17,7 @@ import nu.marginalia.functions.searchquery.QueryFactory; import nu.marginalia.index.IndexGrpcService; import nu.marginalia.index.ReverseIndexFullFileNames; import nu.marginalia.index.ReverseIndexPrioFileNames; -import nu.marginalia.index.construction.ReverseIndexConstructor; +import nu.marginalia.index.construction.full.FullIndexConstructor; import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; @@ -244,7 +244,7 @@ public class IntegrationTest { if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions, @@ -269,7 +269,7 @@ public class IntegrationTest { // important to the document. This filter will act on the encoded {@see WordMetadata} LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter(); - var constructor = new ReverseIndexConstructor( + var constructor = new FullIndexConstructor( outputFileDocs, outputFileWords, outputFilePositions,