(index-reverse) Split index construction into separate packages for full and priority index

2025-02-23 21:18:58 +00:00 · 2024-07-06 15:44:47 +02:00 · 2024-07-06 15:44:47 +02:00 · 85c99ae808
commit 85c99ae808
parent a4ecd5f4ce
24 changed files with 1006 additions and 139 deletions
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.LongArrayTransformations;
@ -9,7 +9,7 @@ import java.io.IOException;
 import java.nio.channels.FileChannel;
 /** Constructs the BTrees in a reverse index */
-public class ReverseIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
+public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
    private final BTreeWriter writer;
    private final FileChannel intermediateChannel;
@ -18,10 +18,10 @@ public class ReverseIndexBTreeTransformer implements LongArrayTransformations.Lo
    long start = 0;
    long writeOffset = 0;
-    public ReverseIndexBTreeTransformer(LongArray urlsFileMap,
+    public FullIndexBTreeTransformer(LongArray urlsFileMap,
-                                        int entrySize,
+                                     int entrySize,
-                                        BTreeContext bTreeContext,
+                                     BTreeContext bTreeContext,
-                                        FileChannel intermediateChannel) {
+                                     FileChannel intermediateChannel) {
        this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
        this.entrySize = entrySize;
        this.intermediateChannel = intermediateChannel;
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexConstructor.java
@ -1,6 +1,9 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import lombok.SneakyThrows;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.JournalReaderSource;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
@ -10,9 +13,9 @@ import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;
-public class ReverseIndexConstructor {
+public class FullIndexConstructor {
-    private static final Logger logger = LoggerFactory.getLogger(ReverseIndexConstructor.class);
+    private static final Logger logger = LoggerFactory.getLogger(FullIndexConstructor.class);
    public enum CreateReverseIndexSteps {
        CONSTRUCT,
@ -27,12 +30,12 @@ public class ReverseIndexConstructor {
    private final DocIdRewriter docIdRewriter;
    private final Path tmpDir;
-    public ReverseIndexConstructor(Path outputFileDocs,
+    public FullIndexConstructor(Path outputFileDocs,
-                                   Path outputFileWords,
+                                Path outputFileWords,
-                                   Path outputFilePositions,
+                                Path outputFilePositions,
-                                   JournalReaderSource readerSource,
+                                JournalReaderSource readerSource,
-                                   DocIdRewriter docIdRewriter,
+                                DocIdRewriter docIdRewriter,
-                                   Path tmpDir) {
+                                Path tmpDir) {
        this.outputFileDocs = outputFileDocs;
        this.outputFileWords = outputFileWords;
        this.outputFilePositions = outputFilePositions;
@ -77,20 +80,20 @@ public class ReverseIndexConstructor {
    }
    @SneakyThrows
-    private ReversePreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
+    private FullPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
-        return ReversePreindex
+        return FullPreindex
                .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir)
                .closeToReference();
    }
    @SneakyThrows
-    private ReversePreindexReference merge(ReversePreindexReference leftR, ReversePreindexReference rightR) {
+    private FullPreindexReference merge(FullPreindexReference leftR, FullPreindexReference rightR) {
        var left = leftR.open();
        var right = rightR.open();
        try {
-            return ReversePreindex.merge(tmpDir, left, right).closeToReference();
+            return FullPreindex.merge(tmpDir, left, right).closeToReference();
        }
        finally {
            left.delete();
@ -101,7 +104,7 @@ public class ReverseIndexConstructor {
    }
    @SneakyThrows
-    private void finalizeIndex(ReversePreindexReference finalPR) {
+    private void finalizeIndex(FullPreindexReference finalPR) {
        var finalP = finalPR.open();
        finalP.finalizeIndex(outputFileDocs, outputFileWords);
        finalP.delete();
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java
@ -1,9 +1,13 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.index.ReverseIndexParameters;
 import nu.marginalia.index.construction.CountToOffsetTransformer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.IndexSizeEstimator;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -25,13 +29,13 @@ import static nu.marginalia.array.algo.TwoArrayOperations.*;
 * the union of their data.  This operation requires no additional
 * RAM.
 */
-public class ReversePreindex {
+public class FullPreindex {
-    final ReversePreindexWordSegments segments;
+    final FullPreindexWordSegments segments;
-    final ReversePreindexDocuments documents;
+    final FullPreindexDocuments documents;
-    private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class);
+    private static final Logger logger = LoggerFactory.getLogger(FullPreindex.class);
-    public ReversePreindex(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) {
+    public FullPreindex(FullPreindexWordSegments segments, FullPreindexDocuments documents) {
        this.segments = segments;
        this.documents = documents;
    }
@ -39,27 +43,27 @@ public class ReversePreindex {
    /** Constructs a new preindex with the data associated with reader.  The backing files
     * will have randomly assigned names.
     */
-    public static ReversePreindex constructPreindex(IndexJournalReader reader,
+    public static FullPreindex constructPreindex(IndexJournalReader reader,
-                                                    PositionsFileConstructor positionsFileConstructor,
+                                                 PositionsFileConstructor positionsFileConstructor,
-                                                    DocIdRewriter docIdRewriter,
+                                                 DocIdRewriter docIdRewriter,
-                                                    Path workDir) throws IOException
+                                                 Path workDir) throws IOException
    {
        Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat");
        Path docsFile = Files.createTempFile(workDir, "docs", ".dat");
-        var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
+        var segments = FullPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
+        var docs = FullPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
-        return new ReversePreindex(segments, docs);
+        return new FullPreindex(segments, docs);
    }
    /**  Close the associated memory mapped areas and return
     * a dehydrated version of this object that can be re-opened
     * later.
     */
-    public ReversePreindexReference closeToReference() {
+    public FullPreindexReference closeToReference() {
        try {
-            return new ReversePreindexReference(segments, documents);
+            return new FullPreindexReference(segments, documents);
        }
        finally {
            segments.force();
@ -85,7 +89,7 @@ public class ReversePreindex {
        LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size);
        try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
            offsets.transformEachIO(0, offsets.size(),
-                    new ReverseIndexBTreeTransformer(finalDocs, 2,
+                    new FullIndexBTreeTransformer(finalDocs, 2,
                            ReverseIndexParameters.docsBTreeContext,
                            intermediateDocChannel));
            intermediateDocChannel.force(false);
@ -126,11 +130,11 @@ public class ReversePreindex {
        documents.delete();
    }
-    public static ReversePreindex merge(Path destDir,
+    public static FullPreindex merge(Path destDir,
-                                        ReversePreindex left,
+                                     FullPreindex left,
-                                        ReversePreindex right) throws IOException {
+                                     FullPreindex right) throws IOException {
-        ReversePreindexWordSegments mergingSegment =
+        FullPreindexWordSegments mergingSegment =
                createMergedSegmentWordFile(destDir, left.segments, right.segments);
        var mergingIter = mergingSegment.constructionIterator(2);
@ -198,18 +202,18 @@ public class ReversePreindex {
        mergedDocuments = shrinkMergedDocuments(mergedDocuments,
                docsFile, 2 * mergingSegment.totalSize());
-        return new ReversePreindex(
+        return new FullPreindex(
                mergingSegment,
-                new ReversePreindexDocuments(mergedDocuments, docsFile)
+                new FullPreindexDocuments(mergedDocuments, docsFile)
        );
    }
    /** Create a segment word file with each word from both inputs, with zero counts for all the data.
     * This is an intermediate product in merging.
     */
-    static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir,
+    static FullPreindexWordSegments createMergedSegmentWordFile(Path destDir,
-                                                                   ReversePreindexWordSegments left,
+                                                                FullPreindexWordSegments left,
-                                                                   ReversePreindexWordSegments right) throws IOException {
+                                                                FullPreindexWordSegments right) throws IOException {
        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
@ -228,7 +232,7 @@ public class ReversePreindex {
        LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize);
-        return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
+        return new FullPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
    }
    /** It's possible we overestimated the necessary size of the documents file,
@ -256,12 +260,12 @@ public class ReversePreindex {
    /** Merge contents of the segments indicated by leftIter and rightIter into the destionation
     * segment, and advance the construction iterator with the appropriate size.
     */
-    private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter,
+    private static void mergeSegments(FullPreindexWordSegments.SegmentIterator leftIter,
-                                      ReversePreindexWordSegments.SegmentIterator rightIter,
+                                      FullPreindexWordSegments.SegmentIterator rightIter,
-                                      ReversePreindexDocuments left,
+                                      FullPreindexDocuments left,
-                                      ReversePreindexDocuments right,
+                                      FullPreindexDocuments right,
                                      LongArray dest,
-                                      ReversePreindexWordSegments.SegmentConstructionIterator destIter)
+                                      FullPreindexWordSegments.SegmentConstructionIterator destIter)
    {
        long segSize = mergeArrays2(dest,
                left.documents,
@ -279,10 +283,10 @@ public class ReversePreindex {
    /** Copy the data from the source segment at the position and length indicated by sourceIter,
     * into the destination segment, and advance the construction iterator.
     */
-    private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter,
+    private static boolean copySegment(FullPreindexWordSegments.SegmentIterator sourceIter,
-                                    LongArray dest,
+                                       LongArray dest,
-                                    FileChannel sourceChannel,
+                                       FileChannel sourceChannel,
-                                    ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
+                                       FullPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
        long size = sourceIter.endOffset - sourceIter.startOffset;
        long start = mergingIter.startOffset;
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java
@ -1,8 +1,10 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import lombok.SneakyThrows;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.rwf.RandomFileAssembler;
 import org.slf4j.Logger;
@ -20,35 +22,35 @@ import java.util.concurrent.TimeUnit;
 /** A LongArray with document data, segmented according to
 * the associated ReversePreindexWordSegments data
 */
-public class ReversePreindexDocuments {
+public class FullPreindexDocuments {
    public final LongArray documents;
    private static PositionsFileConstructor positionsFileConstructor;
    private static final int RECORD_SIZE_LONGS = 2;
-    private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class);
+    private static final Logger logger = LoggerFactory.getLogger(FullPreindexDocuments.class);
    public final Path file;
-    public ReversePreindexDocuments(LongArray documents, Path file) {
+    public FullPreindexDocuments(LongArray documents, Path file) {
        this.documents = documents;
        this.file = file;
    }
-    public static ReversePreindexDocuments construct(
+    public static FullPreindexDocuments construct(
            Path docsFile,
            Path workDir,
            IndexJournalReader reader,
            DocIdRewriter docIdRewriter,
            PositionsFileConstructor positionsFileConstructor,
-            ReversePreindexWordSegments segments) throws IOException {
+            FullPreindexWordSegments segments) throws IOException {
-        ReversePreindexDocuments.positionsFileConstructor = positionsFileConstructor;
+        FullPreindexDocuments.positionsFileConstructor = positionsFileConstructor;
        createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter);
        LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile);
        sortDocsFile(docsFileMap, segments);
-        return new ReversePreindexDocuments(docsFileMap, docsFile);
+        return new FullPreindexDocuments(docsFileMap, docsFile);
    }
    public FileChannel createDocumentsFileChannel() throws IOException {
@ -67,7 +69,7 @@ public class ReversePreindexDocuments {
    private static void createUnsortedDocsFile(Path docsFile,
                                               Path workDir,
                                               IndexJournalReader reader,
-                                               ReversePreindexWordSegments segments,
+                                               FullPreindexWordSegments segments,
                                               DocIdRewriter docIdRewriter) throws IOException {
        long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
@ -99,7 +101,7 @@ public class ReversePreindexDocuments {
    }
    @SneakyThrows
-    private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException {
+    private static void sortDocsFile(LongArray docsFileMap, FullPreindexWordSegments segments) throws IOException {
        var iter = segments.iterator(RECORD_SIZE_LONGS);
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexReference.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexReference.java
@ -1,33 +1,33 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.array.LongArrayFactory;
 import java.io.IOException;
 import java.nio.file.Path;
-/** This is a dehydrated version of a ReversePreIndex, that only
+/** This is a dehydrated version of a FullPreIndex, that only
 * keeps references to its location on disk but does not hold associated
 * memory maps.
 */
-public record ReversePreindexReference(
+public record FullPreindexReference(
        Path wordsFile,
        Path countsFile,
        Path documentsFile
 )
 {
-    public ReversePreindexReference(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) {
+    public FullPreindexReference(FullPreindexWordSegments segments, FullPreindexDocuments documents) {
        this(segments.wordsFile, segments.countsFile, documents.file);
    }
-    public ReversePreindex open() throws IOException {
+    public FullPreindex open() throws IOException {
-        return new ReversePreindex(
+        return new FullPreindex(
-            new ReversePreindexWordSegments(
+            new FullPreindexWordSegments(
                    LongArrayFactory.mmapForModifyingShared(wordsFile),
                    LongArrayFactory.mmapForModifyingShared(countsFile),
                    wordsFile,
                    countsFile
            ),
-            new ReversePreindexDocuments(
+            new FullPreindexDocuments(
                    LongArrayFactory.mmapForModifyingShared(documentsFile),
                    documentsFile
            )
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
@ -14,17 +14,17 @@ import java.nio.file.Path;
 /** A pair of file-backed arrays of sorted wordIds
 * and the count of documents associated with each termId.
 */
-public class ReversePreindexWordSegments {
+public class FullPreindexWordSegments {
    public final LongArray wordIds;
    public final LongArray counts;
    final Path wordsFile;
    final Path countsFile;
-    public ReversePreindexWordSegments(LongArray wordIds,
+    public FullPreindexWordSegments(LongArray wordIds,
-                                       LongArray counts,
+                                    LongArray counts,
-                                       Path wordsFile,
+                                    Path wordsFile,
-                                       Path countsFile)
+                                    Path countsFile)
    {
        assert wordIds.size() == counts.size();
@ -51,9 +51,9 @@ public class ReversePreindexWordSegments {
        return ret;
    }
-    public static ReversePreindexWordSegments construct(IndexJournalReader reader,
+    public static FullPreindexWordSegments construct(IndexJournalReader reader,
-                                                        Path wordIdsFile,
+                                                     Path wordIdsFile,
-                                                        Path countsFile)
+                                                     Path countsFile)
    throws IOException
    {
        Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
@ -79,7 +79,7 @@ public class ReversePreindexWordSegments {
            counts.set(i, countsMap.get(words.get(i)));
        }
-        return new ReversePreindexWordSegments(words, counts, wordIdsFile, countsFile);
+        return new FullPreindexWordSegments(words, counts, wordIdsFile, countsFile);
    }
    public SegmentIterator iterator(int recordSize) {
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexBTreeTransformer.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexBTreeTransformer.java
@ -0,0 +1,48 @@
 package nu.marginalia.index.construction.prio;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.LongArrayTransformations;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.btree.model.BTreeContext;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 /** Constructs the BTrees in a reverse index */
 public class PrioIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
    private final BTreeWriter writer;
    private final FileChannel intermediateChannel;
    private final int entrySize;
    long start = 0;
    long writeOffset = 0;
    public PrioIndexBTreeTransformer(LongArray urlsFileMap,
                                     int entrySize,
                                     BTreeContext bTreeContext,
                                     FileChannel intermediateChannel) {
        this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
        this.entrySize = entrySize;
        this.intermediateChannel = intermediateChannel;
    }
    @Override
    public long transform(long pos, long end) throws IOException {
        final int size = (int) ((end - start) / entrySize);
        if (size == 0) {
            return -1;
        }
        final long offsetForBlock = writeOffset;
        writeOffset += writer.write(writeOffset, size,
                mapRegion -> mapRegion.transferFrom(intermediateChannel, start, 0, end - start)
        );
        start = end;
        return offsetForBlock;
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java
@ -0,0 +1,114 @@
 package nu.marginalia.index.construction.prio;
 import lombok.SneakyThrows;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.JournalReaderSource;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.journal.IndexJournalFileNames;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;
 public class PrioIndexConstructor {
    private static final Logger logger = LoggerFactory.getLogger(PrioIndexConstructor.class);
    public enum CreateReverseIndexSteps {
        CONSTRUCT,
        FINALIZE,
        FINISHED
    }
    private final Path outputFileDocs;
    private final Path outputFileWords;
    private final Path outputFilePositions;
    private final JournalReaderSource readerSource;
    private final DocIdRewriter docIdRewriter;
    private final Path tmpDir;
    public PrioIndexConstructor(Path outputFileDocs,
                                Path outputFileWords,
                                Path outputFilePositions,
                                JournalReaderSource readerSource,
                                DocIdRewriter docIdRewriter,
                                Path tmpDir) {
        this.outputFileDocs = outputFileDocs;
        this.outputFileWords = outputFileWords;
        this.outputFilePositions = outputFilePositions;
        this.readerSource = readerSource;
        this.docIdRewriter = docIdRewriter;
        this.tmpDir = tmpDir;
    }
    public void createReverseIndex(ProcessHeartbeat processHeartbeat,
                                   String processName,
                                   Path sourceBaseDir) throws IOException
    {
        var inputs = IndexJournalFileNames.findJournalFiles(sourceBaseDir);
        if (inputs.isEmpty()) {
            logger.error("No journal files in base dir {}", sourceBaseDir);
            return;
        }
        try (var heartbeat = processHeartbeat.createProcessTaskHeartbeat(CreateReverseIndexSteps.class, processName);
             var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes");
             var posConstructor = new PositionsFileConstructor(outputFilePositions)
        ) {
            heartbeat.progress(CreateReverseIndexSteps.CONSTRUCT);
            AtomicInteger progress = new AtomicInteger(0);
            inputs
                .parallelStream()
                .map(in -> {
                    preindexHeartbeat.progress("PREINDEX/MERGE", progress.incrementAndGet(), inputs.size());
                    return construct(in, posConstructor);
                })
                .reduce(this::merge)
                .ifPresent((index) -> {
                    heartbeat.progress(CreateReverseIndexSteps.FINALIZE);
                    finalizeIndex(index);
                    heartbeat.progress(CreateReverseIndexSteps.FINISHED);
                });
            heartbeat.progress(CreateReverseIndexSteps.FINISHED);
        }
    }
    @SneakyThrows
    private PrioPreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
        return PrioPreindex
                .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir)
                .closeToReference();
    }
    @SneakyThrows
    private PrioPreindexReference merge(PrioPreindexReference leftR, PrioPreindexReference rightR) {
        var left = leftR.open();
        var right = rightR.open();
        try {
            return PrioPreindex.merge(tmpDir, left, right).closeToReference();
        }
        finally {
            left.delete();
            right.delete();
        }
    }
    @SneakyThrows
    private void finalizeIndex(PrioPreindexReference finalPR) {
        var finalP = finalPR.open();
        finalP.finalizeIndex(outputFileDocs, outputFileWords);
        finalP.delete();
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java
@ -0,0 +1,310 @@
 package nu.marginalia.index.construction.prio;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.index.ReverseIndexParameters;
 import nu.marginalia.index.construction.CountToOffsetTransformer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.IndexSizeEstimator;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import static nu.marginalia.array.algo.TwoArrayOperations.*;
 /** Contains the data that would go into a reverse index,
 * that is, a mapping from words to documents, minus the actual
 * index structure that makes the data quick to access while
 * searching.
 * <p>
 * Two preindexes can be merged into a third preindex containing
 * the union of their data.  This operation requires no additional
 * RAM.
 */
 public class PrioPreindex {
    final PrioPreindexWordSegments segments;
    final PrioPreindexDocuments documents;
    private static final Logger logger = LoggerFactory.getLogger(PrioPreindex.class);
    public PrioPreindex(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) {
        this.segments = segments;
        this.documents = documents;
    }
    /** Constructs a new preindex with the data associated with reader.  The backing files
     * will have randomly assigned names.
     */
    public static PrioPreindex constructPreindex(IndexJournalReader reader,
                                                 PositionsFileConstructor positionsFileConstructor,
                                                 DocIdRewriter docIdRewriter,
                                                 Path workDir) throws IOException
    {
        Path segmentWordsFile = Files.createTempFile(workDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(workDir, "segment_counts", ".dat");
        Path docsFile = Files.createTempFile(workDir, "docs", ".dat");
        var segments = PrioPreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
        var docs = PrioPreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
        return new PrioPreindex(segments, docs);
    }
    /**  Close the associated memory mapped areas and return
     * a dehydrated version of this object that can be re-opened
     * later.
     */
    public PrioPreindexReference closeToReference() {
        try {
            return new PrioPreindexReference(segments, documents);
        }
        finally {
            segments.force();
            documents.force();
            segments.close();
            documents.close();
        }
    }
    /** Transform the preindex into a reverse index */
    public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
        var offsets = segments.counts;
        Files.deleteIfExists(outputFileDocs);
        Files.deleteIfExists(outputFileWords);
        // Estimate the size of the docs index data
        offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2));
        IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
        offsets.fold(0, 0, offsets.size(), sizeEstimator);
        // Write the docs file
        LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size);
        try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
            offsets.transformEachIO(0, offsets.size(),
                    new PrioIndexBTreeTransformer(finalDocs, 2,
                            ReverseIndexParameters.docsBTreeContext,
                            intermediateDocChannel));
            intermediateDocChannel.force(false);
        }
        LongArray wordIds = segments.wordIds;
        if (offsets.size() != wordIds.size())
            throw new IllegalStateException("Offsets and word-ids of different size");
        if (offsets.size() > Integer.MAX_VALUE) {
            throw new IllegalStateException("offsets.size() too big!");
        }
        // Estimate the size of the words index data
        long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
        // Construct the tree
        LongArray wordsArray = LongArrayFactory.mmapForWritingConfined(outputFileWords, wordsSize);
        new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext)
            .write(0, (int) offsets.size(), mapRegion -> {
            for (long i = 0; i < offsets.size(); i++) {
                mapRegion.set(2*i, wordIds.get(i));
                mapRegion.set(2*i + 1, offsets.get(i));
            }
        });
        finalDocs.force();
        finalDocs.close();
        wordsArray.force();
        wordsArray.close();
    }
    /** Delete all files associated with this pre-index */
    public void delete() throws IOException {
        segments.delete();
        documents.delete();
    }
    public static PrioPreindex merge(Path destDir,
                                     PrioPreindex left,
                                     PrioPreindex right) throws IOException {
        PrioPreindexWordSegments mergingSegment =
                createMergedSegmentWordFile(destDir, left.segments, right.segments);
        var mergingIter = mergingSegment.constructionIterator(2);
        var leftIter = left.segments.iterator(2);
        var rightIter = right.segments.iterator(2);
        Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
        LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, left.documents.size() + right.documents.size());
        leftIter.next();
        rightIter.next();
        try (FileChannel leftChannel = left.documents.createDocumentsFileChannel();
             FileChannel rightChannel = right.documents.createDocumentsFileChannel())
        {
            while (mergingIter.canPutMore()
                    && leftIter.isPositionBeforeEnd()
                    && rightIter.isPositionBeforeEnd())
            {
                final long currentWord = mergingIter.wordId;
                if (leftIter.wordId == currentWord && rightIter.wordId == currentWord)
                {
                    // both inputs have documents for the current word
                    mergeSegments(leftIter, rightIter,
                            left.documents, right.documents,
                            mergedDocuments, mergingIter);
                }
                else if (leftIter.wordId == currentWord) {
                    if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
                        break;
                }
                else if (rightIter.wordId == currentWord) {
                    if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
                        break;
                }
                else assert false : "This should never happen"; // the helvetica scenario
            }
            if (leftIter.isPositionBeforeEnd()) {
                while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
            }
            if (rightIter.isPositionBeforeEnd()) {
                while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
            }
        }
        if (leftIter.isPositionBeforeEnd())
            throw new IllegalStateException("Left has more to go");
        if (rightIter.isPositionBeforeEnd())
            throw new IllegalStateException("Right has more to go");
        if (mergingIter.canPutMore())
            throw new IllegalStateException("Source iters ran dry before merging iter");
        mergingSegment.force();
        // We may have overestimated the size of the merged docs size in the case there were
        // duplicates in the data, so we need to shrink it to the actual size we wrote.
        mergedDocuments = shrinkMergedDocuments(mergedDocuments,
                docsFile, 2 * mergingSegment.totalSize());
        return new PrioPreindex(
                mergingSegment,
                new PrioPreindexDocuments(mergedDocuments, docsFile)
        );
    }
    /** Create a segment word file with each word from both inputs, with zero counts for all the data.
     * This is an intermediate product in merging.
     */
    static PrioPreindexWordSegments createMergedSegmentWordFile(Path destDir,
                                                                PrioPreindexWordSegments left,
                                                                PrioPreindexWordSegments right) throws IOException {
        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
        // We need total size to request a direct LongArray range.  Seems slower, but is faster.
        // ... see LongArray.directRangeIfPossible(long start, long end)
        long segmentsSize = countDistinctElements(left.wordIds, right.wordIds,
                0,  left.wordIds.size(),
                0,  right.wordIds.size());
        LongArray wordIdsFile = LongArrayFactory.mmapForWritingConfined(segmentWordsFile, segmentsSize);
        mergeArrays(wordIdsFile, left.wordIds, right.wordIds,
                0,
                0, left.wordIds.size(),
                0, right.wordIds.size());
        LongArray counts = LongArrayFactory.mmapForWritingConfined(segmentCountsFile, segmentsSize);
        return new PrioPreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
    }
    /** It's possible we overestimated the necessary size of the documents file,
     * this will permit us to shrink it down to the smallest necessary size.
     */
    private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
        mergedDocuments.force();
        long beforeSize = mergedDocuments.size();
        long afterSize = sizeLongs * 8;
        if (beforeSize != afterSize) {
            mergedDocuments.close();
            try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
                bc.truncate(sizeLongs * 8);
            }
            logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
            mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs);
        }
        return mergedDocuments;
    }
    /** Merge contents of the segments indicated by leftIter and rightIter into the destionation
     * segment, and advance the construction iterator with the appropriate size.
     */
    private static void mergeSegments(PrioPreindexWordSegments.SegmentIterator leftIter,
                                      PrioPreindexWordSegments.SegmentIterator rightIter,
                                      PrioPreindexDocuments left,
                                      PrioPreindexDocuments right,
                                      LongArray dest,
                                      PrioPreindexWordSegments.SegmentConstructionIterator destIter)
    {
        long segSize = mergeArrays2(dest,
                left.documents,
                right.documents,
                destIter.startOffset,
                leftIter.startOffset, leftIter.endOffset,
                rightIter.startOffset, rightIter.endOffset);
        long distinct = segSize / 2;
        destIter.putNext(distinct);
        leftIter.next();
        rightIter.next();
    }
    /** Copy the data from the source segment at the position and length indicated by sourceIter,
     * into the destination segment, and advance the construction iterator.
     */
    private static boolean copySegment(PrioPreindexWordSegments.SegmentIterator sourceIter,
                                       LongArray dest,
                                       FileChannel sourceChannel,
                                       PrioPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
        long size = sourceIter.endOffset - sourceIter.startOffset;
        long start = mergingIter.startOffset;
        long end = start + size;
        dest.transferFrom(sourceChannel,
                sourceIter.startOffset,
                mergingIter.startOffset,
                end);
        boolean putNext = mergingIter.putNext(size / 2);
        boolean iterNext = sourceIter.next();
        if (!putNext && iterNext)
            throw new IllegalStateException("Source iterator ran out before dest iterator?!");
        return iterNext;
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java
@ -0,0 +1,141 @@
 package nu.marginalia.index.construction.prio;
 import lombok.SneakyThrows;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.rwf.RandomFileAssembler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 /** A LongArray with document data, segmented according to
 * the associated ReversePreindexWordSegments data
 */
 public class PrioPreindexDocuments {
    public final LongArray documents;
    private static PositionsFileConstructor positionsFileConstructor;
    private static final int RECORD_SIZE_LONGS = 2;
    private static final Logger logger = LoggerFactory.getLogger(PrioPreindexDocuments.class);
    public final Path file;
    public PrioPreindexDocuments(LongArray documents, Path file) {
        this.documents = documents;
        this.file = file;
    }
    public static PrioPreindexDocuments construct(
            Path docsFile,
            Path workDir,
            IndexJournalReader reader,
            DocIdRewriter docIdRewriter,
            PositionsFileConstructor positionsFileConstructor,
            PrioPreindexWordSegments segments) throws IOException {
        PrioPreindexDocuments.positionsFileConstructor = positionsFileConstructor;
        createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter);
        LongArray docsFileMap = LongArrayFactory.mmapForModifyingShared(docsFile);
        sortDocsFile(docsFileMap, segments);
        return new PrioPreindexDocuments(docsFileMap, docsFile);
    }
    public FileChannel createDocumentsFileChannel() throws IOException {
        return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ);
    }
    public LongArray slice(long start, long end) {
        return documents.range(start, end);
    }
    public long size() {
        return documents.size();
    }
    private static void createUnsortedDocsFile(Path docsFile,
                                               Path workDir,
                                               IndexJournalReader reader,
                                               PrioPreindexWordSegments segments,
                                               DocIdRewriter docIdRewriter) throws IOException {
        long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
        try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
             var pointer = reader.newPointer())
        {
            var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
            offsetMap.defaultReturnValue(0);
            while (pointer.nextDocument()) {
                long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
                for (var termData : pointer) {
                    long termId = termData.termId();
                    long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
                    // write position data to the positions file and get the offset
                    long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positionsBuffer());
                    assembly.put(offset + 0, rankEncodedId);
                    assembly.put(offset + 1, encodedPosOffset);
                }
            }
            assembly.write(docsFile);
        }
    }
    @SneakyThrows
    private static void sortDocsFile(LongArray docsFileMap, PrioPreindexWordSegments segments) throws IOException {
        var iter = segments.iterator(RECORD_SIZE_LONGS);
        ExecutorService sortingWorkers = Executors.newWorkStealingPool(Runtime.getRuntime().availableProcessors());
        while (iter.next()) {
            long iterStart = iter.startOffset;
            long iterEnd = iter.endOffset;
            if (iter.size() < 1024) {
                docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd);
            }
            else {
                sortingWorkers.execute(() ->
                    docsFileMap.quickSortN(RECORD_SIZE_LONGS, iterStart, iterEnd));
            }
        }
        sortingWorkers.shutdown();
        while (!sortingWorkers.awaitTermination(1, TimeUnit.HOURS));
        sortingWorkers.close();
    }
    public void delete() throws IOException {
        Files.delete(this.file);
        documents.close();
    }
    public void close() {
        documents.close();
    }
    public void force() {
        documents.force();
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexReference.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexReference.java
@ -0,0 +1,36 @@
 package nu.marginalia.index.construction.prio;
 import nu.marginalia.array.LongArrayFactory;
 import java.io.IOException;
 import java.nio.file.Path;
 /** This is a dehydrated version of a PrioPreIndex, that only
 * keeps references to its location on disk but does not hold associated
 * memory maps.
 */
 public record PrioPreindexReference(
        Path wordsFile,
        Path countsFile,
        Path documentsFile
 )
 {
    public PrioPreindexReference(PrioPreindexWordSegments segments, PrioPreindexDocuments documents) {
        this(segments.wordsFile, segments.countsFile, documents.file);
    }
    public PrioPreindex open() throws IOException {
        return new PrioPreindex(
            new PrioPreindexWordSegments(
                    LongArrayFactory.mmapForModifyingShared(wordsFile),
                    LongArrayFactory.mmapForModifyingShared(countsFile),
                    wordsFile,
                    countsFile
            ),
            new PrioPreindexDocuments(
                    LongArrayFactory.mmapForModifyingShared(documentsFile),
                    documentsFile
            )
        );
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java
@ -0,0 +1,205 @@
 package nu.marginalia.index.construction.prio;
 import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
 import it.unimi.dsi.fastutil.longs.LongIterator;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 /** A pair of file-backed arrays of sorted wordIds
 * and the count of documents associated with each termId.
 */
 public class PrioPreindexWordSegments {
    public final LongArray wordIds;
    public final LongArray counts;
    final Path wordsFile;
    final Path countsFile;
    public PrioPreindexWordSegments(LongArray wordIds,
                                    LongArray counts,
                                    Path wordsFile,
                                    Path countsFile)
    {
        assert wordIds.size() == counts.size();
        this.wordIds = wordIds;
        this.counts = counts;
        this.wordsFile = wordsFile;
        this.countsFile = countsFile;
    }
    /** Returns a long-long hash map where each key is a termId,
     * and each value is the start offset of the data.
     */
    public Long2LongOpenHashMap asMap(int recordSize) {
        if (wordIds.size() > Integer.MAX_VALUE)
            throw new IllegalArgumentException("Cannot create a map with more than Integer.MAX_VALUE entries");
        Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
        var iter = iterator(recordSize);
        while (iter.next()) {
            ret.put(iter.wordId, iter.startOffset);
        }
        return ret;
    }
    public static PrioPreindexWordSegments construct(IndexJournalReader reader,
                                                     Path wordIdsFile,
                                                     Path countsFile)
    throws IOException
    {
        Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
        countsMap.defaultReturnValue(0);
        reader.forEachWordId(wordId -> countsMap.addTo(wordId, 1));
        LongArray words = LongArrayFactory.mmapForWritingConfined(wordIdsFile, countsMap.size());
        LongArray counts = LongArrayFactory.mmapForWritingConfined(countsFile, countsMap.size());
        // Create the words file by iterating over the map and inserting them into
        // the words file in whatever bizarro hash table order they appear in
        long i = 0;
        LongIterator iter = countsMap.keySet().iterator();
        while (iter.hasNext()) {
            words.set(i++, iter.nextLong());
        }
        // Sort the words file
        words.sort(0, counts.size());
        // Populate the counts
        for (i = 0; i < countsMap.size(); i++) {
            counts.set(i, countsMap.get(words.get(i)));
        }
        return new PrioPreindexWordSegments(words, counts, wordIdsFile, countsFile);
    }
    public SegmentIterator iterator(int recordSize) {
        return new SegmentIterator(recordSize);
    }
    public SegmentConstructionIterator constructionIterator(int recordSize) {
        return new SegmentConstructionIterator(recordSize);
    }
    public long totalSize() {
        return counts.fold(0, 0, counts.size(), Long::sum);
    }
    public void delete() throws IOException {
        Files.delete(countsFile);
        Files.delete(wordsFile);
        counts.close();
        wordIds.close();
    }
    public void force() {
        counts.force();
        wordIds.force();
    }
    public void close() {
        wordIds.close();
        counts.close();
    }
    public class SegmentIterator {
        private final int recordSize;
        private final long fileSize;
        long wordId;
        long startOffset = 0;
        long endOffset = 0;
        private SegmentIterator(int recordSize) {
            this.recordSize = recordSize;
            this.fileSize = wordIds.size();
        }
        private long i = -1;
        public long idx() {
            return i;
        }
        public boolean next() {
            if (++i >= fileSize) {
                wordId = Long.MIN_VALUE;
                return false;
            }
            wordId = wordIds.get(i);
            startOffset = endOffset;
            endOffset = startOffset + recordSize * counts.get(i);
            return true;
        }
        public boolean hasMorePositions() {
            return i + 1 < wordIds.size();
        }
        public boolean isPositionBeforeEnd() {
            return i < wordIds.size();
        }
        public long size() {
            return endOffset - startOffset;
        }
    }
    class SegmentConstructionIterator {
        private final int recordSize;
        private final long fileSize;
        long wordId;
        long startOffset = 0;
        long endOffset = 0;
        private SegmentConstructionIterator(int recordSize) {
            this.recordSize = recordSize;
            this.fileSize = wordIds.size();
            if (fileSize == 0) {
                throw new IllegalArgumentException("Cannot construct zero-length word segment file");
            }
            this.wordId = wordIds.get(0);
        }
        private long i = 0;
        public long idx() {
            return i;
        }
        public boolean putNext(long size) {
            if (i >= fileSize)
                return false;
            endOffset = startOffset + recordSize * size;
            counts.set(i, size);
            startOffset = endOffset;
            endOffset = -1;
            i++;
            if (i == fileSize) {
                // We've reached the end of the iteration and there is no
                // "next" termId to fetch
                wordId = Long.MIN_VALUE;
                return false;
            }
            else {
                wordId = wordIds.get(i);
                return true;
            }
        }
        public boolean canPutMore() {
            return i < wordIds.size();
        }
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
@ -4,9 +4,9 @@ import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
-import nu.marginalia.index.construction.ReversePreindex;
+import nu.marginalia.index.construction.full.FullPreindex;
-import nu.marginalia.index.construction.TestJournalFactory;
+import nu.marginalia.index.construction.full.TestJournalFactory;
-import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
+import nu.marginalia.index.construction.full.TestJournalFactory.EntryDataWithWordMeta;
 import nu.marginalia.index.positions.PositionsFileReader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@ -19,7 +19,7 @@ import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
-import static nu.marginalia.index.construction.TestJournalFactory.wm;
+import static nu.marginalia.index.construction.full.TestJournalFactory.wm;
 import static org.junit.jupiter.api.Assertions.*;
 class ReverseIndexReaderTest {
@ -99,7 +99,7 @@ class ReverseIndexReaderTest {
        Path wordsFile = tempDir.resolve("words.dat");
        try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) {
-            var preindex = ReversePreindex.constructPreindex(reader,
+            var preindex = FullPreindex.constructPreindex(reader,
                    positionsFileConstructor,
                    DocIdRewriter.identity(), tempDir);
            preindex.finalizeIndex(docsFile, wordsFile);
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexDocsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexDocsTest.java
@ -1,5 +1,7 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@ -11,10 +13,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import static nu.marginalia.index.construction.TestJournalFactory.EntryData;
+import static nu.marginalia.index.construction.full.TestJournalFactory.EntryData;
 import static org.junit.jupiter.api.Assertions.assertEquals;
-class ReversePreindexDocsTest {
+class FullPreindexDocsTest {
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
@ -57,8 +59,8 @@ class ReversePreindexDocsTest {
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments);
+        var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
@ -86,8 +88,8 @@ class ReversePreindexDocsTest {
                new EntryData(-0xF00BA3L, 0, 4, 4)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
+        var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
                new PositionsFileConstructor(positionsFile),
                segments);
@ -115,8 +117,8 @@ class ReversePreindexDocsTest {
                new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
+        var docs = FullPreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(),
                new PositionsFileConstructor(positionsFile),
                segments);
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java
@ -1,8 +1,10 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.model.BTreeHeader;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@ -12,11 +14,11 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
-import static nu.marginalia.index.construction.TestJournalFactory.*;
+import static nu.marginalia.index.construction.full.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
-class ReversePreindexFinalizeTest {
+class FullPreindexFinalizeTest {
    TestJournalFactory journalFactory;
    Path positionsFile;
    Path countsFile;
@ -52,7 +54,7 @@ class ReversePreindexFinalizeTest {
    @Test
    public void testFinalizeSimple() throws IOException {
        var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
-        var preindex = ReversePreindex.constructPreindex(reader,
+        var preindex = FullPreindex.constructPreindex(reader,
                new PositionsFileConstructor(positionsFile),
                DocIdRewriter.identity(), tempDir);
@ -90,7 +92,7 @@ class ReversePreindexFinalizeTest {
                new EntryDataWithWordMeta(101, 101, wm(51, 52))
                );
-        var preindex = ReversePreindex.constructPreindex(reader,
+        var preindex = FullPreindex.constructPreindex(reader,
                new PositionsFileConstructor(positionsFile),
                DocIdRewriter.identity(), tempDir);
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexMergeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexMergeTest.java
@ -1,6 +1,8 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@ -10,10 +12,10 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
-import static nu.marginalia.index.construction.TestJournalFactory.*;
+import static nu.marginalia.index.construction.full.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.assertEquals;
-class ReversePreindexMergeTest {
+class FullPreindexMergeTest {
    TestJournalFactory journalFactory;
    Path countsFile;
    Path wordsIdFile;
@ -46,19 +48,19 @@ class ReversePreindexMergeTest {
        Files.delete(tempDir);
    }
-    public ReversePreindex runMergeScenario(
+    public FullPreindex runMergeScenario(
            List<EntryDataWithWordMeta> leftData,
            List<EntryDataWithWordMeta> rightData
    ) throws IOException {
        var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new));
        var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new));
-        var left = ReversePreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
+        var left = FullPreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
-        var right = ReversePreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
+        var right = FullPreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir);
-        return ReversePreindex.merge(tempDir, left, right);
+        return FullPreindex.merge(tempDir, left, right);
    }
-    private List<TestSegmentData> getData(ReversePreindex merged) {
+    private List<TestSegmentData> getData(FullPreindex merged) {
        var iter = merged.segments.iterator(2);
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexWordSegmentsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexWordSegmentsTest.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.array.LongArray;
 import org.junit.jupiter.api.AfterEach;
@ -11,10 +11,10 @@ import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
-import static nu.marginalia.index.construction.TestJournalFactory.*;
+import static nu.marginalia.index.construction.full.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.*;
-class ReversePreindexWordSegmentsTest {
+class FullPreindexWordSegmentsTest {
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
@ -51,7 +51,7 @@ class ReversePreindexWordSegmentsTest {
                new EntryData(-0xF00BA3L, 0, 1L<<33)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
@ -72,7 +72,7 @@ class ReversePreindexWordSegmentsTest {
                new EntryData(-0xF00BA3L, 0, 5, 5)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
@ -94,7 +94,7 @@ class ReversePreindexWordSegmentsTest {
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
@ -120,7 +120,7 @@ class ReversePreindexWordSegmentsTest {
                new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
        );
-        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var segments = FullPreindexWordSegments.construct(reader, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
@ -148,7 +148,7 @@ class ReversePreindexWordSegmentsTest {
        LongArray countsArray = LongArray.allocate(4);
        wordsArray.set(0, -1, -2, -3, -4);
        countsArray.set(0, 2, 1, 3, 5);
-        var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null);
+        var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null);
        var ritr = segments.iterator(1);
        assertTrue(ritr.hasMorePositions());
@ -196,7 +196,7 @@ class ReversePreindexWordSegmentsTest {
        LongArray wordsArray = LongArray.allocate(4);
        LongArray countsArray = LongArray.allocate(4);
        wordsArray.set(0, -1, -2, -3, -4);
-        var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null);
+        var segments = new FullPreindexWordSegments(wordsArray, countsArray, null, null);
        var citr = segments.constructionIterator(1);
        assertEquals(-1, citr.wordId);
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestJournalFactory.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestJournalFactory.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestSegmentData.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/TestSegmentData.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.construction;
+package nu.marginalia.index.construction.full;
 import java.util.Arrays;
--- a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
+++ b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
@ -3,13 +3,11 @@ package nu.marginalia.index;
 import com.google.inject.Guice;
 import com.google.inject.Inject;
 import it.unimi.dsi.fastutil.ints.IntList;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongList;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
-import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.construction.full.FullIndexConstructor;
 import nu.marginalia.index.domainrankings.DomainRankings;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
@ -33,7 +31,6 @@ import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.service.control.ServiceHeartbeat;
 import nu.marginalia.service.server.Initialization;
 import nu.marginalia.storage.FileStorageService;
 import org.junit.jupiter.api.AfterEach;
@ -247,7 +244,7 @@ public class CombinedIndexReaderTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
        var constructor =
-                new ReverseIndexConstructor(
+                new FullIndexConstructor(
                    outputFileDocs,
                    outputFileWords,
                    outputFilePositions,
@ -267,7 +264,7 @@ public class CombinedIndexReaderTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
@ -14,7 +14,7 @@ import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.index.construction.DocIdRewriter;
-import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.construction.full.FullIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -291,7 +291,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                    outputFileDocs,
                    outputFileWords,
                    outputFilePositions,
@ -313,7 +313,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                    outputFileDocs,
                    outputFileWords,
                    outputFilePositions,
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
@ -7,13 +7,13 @@ import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
 import nu.marginalia.api.searchquery.model.query.SearchSpecification;
 import nu.marginalia.api.searchquery.model.query.SearchQuery;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
 import nu.marginalia.index.construction.full.FullIndexConstructor;
 import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -493,7 +493,7 @@ public class IndexQueryServiceIntegrationTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
        var constructor =
-                new ReverseIndexConstructor(
+                new FullIndexConstructor(
                    outputFileDocs,
                    outputFileWords,
                    outputFilePositions,
@ -513,7 +513,7 @@ public class IndexQueryServiceIntegrationTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
--- a/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java
+++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java
@ -6,10 +6,11 @@ import com.google.inject.Inject;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.ProcessConfiguration;
 import nu.marginalia.ProcessConfigurationModule;
 import nu.marginalia.index.construction.prio.PrioIndexConstructor;
 import nu.marginalia.index.domainrankings.DomainRankings;
 import nu.marginalia.service.ProcessMainClass;
 import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.construction.full.FullIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
@ -117,7 +118,7 @@ public class IndexConstructorMain extends ProcessMainClass {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
@ -142,7 +143,7 @@ public class IndexConstructorMain extends ProcessMainClass {
        // important to the document.  This filter will act on the encoded {@see WordMetadata}
        LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter();
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new PrioIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
--- a/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java
+++ b/code/tools/integration-test/test/nu/marginalia/IntegrationTest.java
@ -17,7 +17,7 @@ import nu.marginalia.functions.searchquery.QueryFactory;
 import nu.marginalia.index.IndexGrpcService;
 import nu.marginalia.index.ReverseIndexFullFileNames;
 import nu.marginalia.index.ReverseIndexPrioFileNames;
-import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.construction.full.FullIndexConstructor;
 import nu.marginalia.index.domainrankings.DomainRankings;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
@ -244,7 +244,7 @@ public class IntegrationTest {
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
@ -269,7 +269,7 @@ public class IntegrationTest {
        // important to the document.  This filter will act on the encoded {@see WordMetadata}
        LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter();
-        var constructor = new ReverseIndexConstructor(
+        var constructor = new FullIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
`@ -1,4 +1,4 @@`
	`package nu.marginalia.index.construction;`	`package nu.marginalia.index.construction.full;`

	`import java.util.Arrays;`	`import java.util.Arrays;`