(index) Move to a lexicon-free index design

This is a system-wide change. The index used to have a lexicon, mapping words to wordIds using a large in-memory hash table. This made index-construction easier, but it also added a fairly significant RAM penalty to both the index service and the loader. The new design moves to 64 bit word identifiers calculated using the murmur hash of the keyword, and an index construction based on merging smaller indices. It also became necessary half-way through to upgrade guice as its error reporting wasn't *quite* compatible with JDK20.
2025-02-23 13:09:00 +00:00 · 2023-08-28 12:58:18 +02:00 · 2023-08-28 12:58:18 +02:00 · 3101b74580
commit 3101b74580
parent 4e694fdff6
111 changed files with 2567 additions and 2744 deletions
--- a/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java
@ -7,8 +7,8 @@ import nu.marginalia.model.EdgeDomain;
 import org.slf4j.LoggerFactory;
 import org.slf4j.Logger;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
--- a/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java
@ -5,8 +5,8 @@ import nu.marginalia.db.storage.model.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
--- a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java
@ -5,11 +5,9 @@ public enum FileStorageType {
    CRAWL_DATA,
    PROCESSED_DATA,
    INDEX_STAGING,
    LEXICON_STAGING,
    LINKDB_STAGING,
    LINKDB_LIVE,
    INDEX_LIVE,
    LEXICON_LIVE,
    BACKUP,
    EXPORT,
    SEARCH_SETS
--- a/code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql
+++ b/code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql
@ -0,0 +1 @@
 DELETE FROM FILE_STORAGE WHERE TYPE IN ('LEXICON_STAGING', 'LEXICON_LIVE');
--- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java
+++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java
@ -7,7 +7,7 @@ import nu.marginalia.language.model.WordRep;
 import nu.marginalia.term_frequency_dict.TermFrequencyDict;
 import nu.marginalia.model.EdgeUrl;
-import javax.inject.Inject;
+import com.google.inject.Inject;
 import java.util.*;
 import java.util.stream.Stream;
--- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java
+++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java
@ -2,6 +2,7 @@ package nu.marginalia.ranking;
 import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
 import it.unimi.dsi.fastutil.ints.Int2ShortOpenHashMap;
 import nu.marginalia.model.id.UrlIdCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -37,7 +38,8 @@ public class DomainRankings {
        return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE);
    }
-    public float getSortRanking(int domainId) {
+    public float getSortRanking(long docId) {
        int domainId = UrlIdCodec.getDomainId(docId);
        return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE) / (float) MAX_RANK_VALUE;
    }
--- a/code/features-index/index-forward/build.gradle
+++ b/code/features-index/index-forward/build.gradle
@ -16,7 +16,6 @@ dependencies {
    implementation project(':code:features-index:domain-ranking')
    implementation project(':code:features-index:index-query')
    implementation project(':code:features-index:index-journal')
    implementation project(':code:features-index:lexicon')
    implementation project(':code:common:model')
    implementation project(':code:common:process')
--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java
@ -21,23 +21,23 @@ import java.nio.file.Path;
 public class ForwardIndexConverter {
    private final ProcessHeartbeat heartbeat;
    private final File inputFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final IndexJournalReader journalReader;
    private final Path outputFileDocsId;
    private final Path outputFileDocsData;
    private final DomainRankings domainRankings;
    public ForwardIndexConverter(ProcessHeartbeat heartbeat,
-                                 File inputFile,
+                                 IndexJournalReader journalReader,
                                 Path outputFileDocsId,
                                 Path outputFileDocsData,
                                 DomainRankings domainRankings
                                 ) {
        this.heartbeat = heartbeat;
-        this.inputFile = inputFile;
+        this.journalReader = journalReader;
        this.outputFileDocsId = outputFileDocsId;
        this.outputFileDocsData = outputFileDocsData;
        this.domainRankings = domainRankings;
@ -54,14 +54,6 @@ public class ForwardIndexConverter {
    public void convert() throws IOException {
        deleteOldFiles();
        IndexJournalReaderSingleCompressedFile journalReader = new IndexJournalReaderSingleCompressedFile(inputFile.toPath());
        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
            logger.warn("Bailing: Journal is empty!");
            return;
        }
        logger.info("Converting  {} {}", inputFile, journalReader.fileHeader);
        logger.info("Domain Rankings size = {}", domainRankings.size());
        try (var progress = heartbeat.createProcessTaskHeartbeat(TaskSteps.class, "forwardIndexConverter")) {
--- a/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@ -2,15 +2,13 @@ package nu.marginalia.index.forward;
 import lombok.SneakyThrows;
 import nu.marginalia.index.journal.model.IndexJournalEntry;
-import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
+import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
-import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
+import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.process.control.ProcessTaskHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@ -29,7 +27,6 @@ import static org.mockito.Mockito.when;
 class ForwardIndexConverterTest {
    KeywordLexicon keywordLexicon;
    IndexJournalWriter writer;
    Path indexFile;
@ -50,12 +47,9 @@ class ForwardIndexConverterTest {
        dictionaryFile = Files.createTempFile("tmp", ".dict");
        dictionaryFile.toFile().deleteOnExit();
        keywordLexicon = new KeywordLexicon(new KeywordLexiconJournal(dictionaryFile.toFile(), KeywordLexiconJournalMode.READ_WRITE));
        keywordLexicon.getOrInsert("0");
        indexFile = Files.createTempFile("tmp", ".idx");
        indexFile.toFile().deleteOnExit();
-        writer = new IndexJournalWriterImpl(keywordLexicon, indexFile);
+        writer = new IndexJournalWriterSingleFileImpl(indexFile);
        wordsFile1 = Files.createTempFile("words1", ".idx");
        urlsFile1 = Files.createTempFile("urls1", ".idx");
@ -63,11 +57,9 @@ class ForwardIndexConverterTest {
        dataDir = Files.createTempDirectory(getClass().getSimpleName());
        for (int i = 1; i < workSetSize; i++) {
-            createEntry(writer, keywordLexicon, i);
+            createEntry(writer, i);
        }
        keywordLexicon.commitToDisk();
        writer.close();
@ -88,13 +80,13 @@ class ForwardIndexConverterTest {
        return UrlIdCodec.encodeId((int) domain, (int) url);
    }
-    public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
+    public void createEntry(IndexJournalWriter writer, int id) {
        int[] factors = getFactorsI(id);
        var entryBuilder = IndexJournalEntry.builder(createId(id, id/20), id%5);
        for (int i = 0; i+1 < factors.length; i+=2) {
-            entryBuilder.add(keywordLexicon.getOrInsert(Integer.toString(factors[i])), -factors[i+1]);
+            entryBuilder.add(factors[i], -factors[i+1]);
        }
        writer.put(entryBuilder.build());
@ -108,7 +100,7 @@ class ForwardIndexConverterTest {
        when(serviceHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
-        new ForwardIndexConverter(serviceHeartbeat, indexFile.toFile(), docsFileId, docsFileData, new DomainRankings()).convert();
+        new ForwardIndexConverter(serviceHeartbeat, new IndexJournalReaderSingleCompressedFile(indexFile), docsFileId, docsFileData, new DomainRankings()).convert();
        var forwardReader = new ForwardIndexReader(docsFileId, docsFileData);
--- a/code/features-index/index-journal/build.gradle
+++ b/code/features-index/index-journal/build.gradle
@ -13,7 +13,6 @@ java {
 dependencies {
    implementation project(':code:libraries:array')
    implementation project(':code:common:model')
    implementation project(':code:features-index:lexicon')
    implementation libs.lombok
    annotationProcessor libs.lombok
@ -22,6 +21,7 @@ dependencies {
    implementation libs.prometheus
    implementation libs.notnull
    implementation libs.rxjava
    implementation libs.guava
    implementation libs.trove
    implementation libs.zstd
    implementation libs.commons.lang3
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryData.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryData.java
@ -58,9 +58,9 @@ public class IndexJournalEntryData implements Iterable<IndexJournalEntryData.Rec
        public Record next() {
            pos+=ENTRY_SIZE;
-            return new Record((int) underlyingArray[pos], underlyingArray[pos+1]);
+            return new Record(underlyingArray[pos], underlyingArray[pos+1]);
        }
    }
-    public record Record(int wordId, long metadata) {}
+    public record Record(long wordId, long metadata) {}
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReadEntry.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReadEntry.java
@ -8,6 +8,7 @@ import java.io.DataInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.LongBuffer;
 import java.util.Arrays;
 public class IndexJournalReadEntry {
    public final IndexJournalEntryHeader header;
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java
@ -3,25 +3,33 @@ package nu.marginalia.index.journal.reader;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalFileHeader;
 import nu.marginalia.index.journal.model.IndexJournalStatistics;
 import nu.marginalia.model.idx.WordFlags;
 import org.jetbrains.annotations.NotNull;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.Iterator;
 import java.util.function.IntConsumer;
 import java.util.function.LongConsumer;
 import java.util.function.Predicate;
 public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {
    int FILE_HEADER_SIZE_LONGS = 2;
    int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;
-    IndexJournalFileHeader fileHeader();
+    static IndexJournalReader singleFile(Path fileName) throws IOException {
        return new IndexJournalReaderSingleCompressedFile(fileName);
    }
    static IndexJournalReader paging(Path baseDir) throws IOException {
        return new IndexJournalReaderPagingImpl(baseDir);
    }
-    IndexJournalStatistics getStatistics();
+    static IndexJournalReader withFilters(Path path, Predicate<IndexJournalReadEntry> entryPredicate, Predicate<IndexJournalEntryData.Record> recordPredicate) throws IOException {
-
+        return new IndexJournalReaderSingleCompressedFile(path, entryPredicate, recordPredicate);
-    void forEachWordId(IntConsumer consumer);
+    }
-    void forEachDocIdWordId(LongIntConsumer consumer);
+    void forEachWordId(LongConsumer consumer);
    void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer);
@ -33,13 +41,23 @@ public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {
    void close() throws IOException;
-    interface BiIntConsumer {
+    static IndexJournalReader singleFileWithPriorityFilters(Path path) throws IOException {
-        void accept(int left, int right);
+
        long highPriorityFlags =
                WordFlags.Title.asBit()
                        | WordFlags.Subjects.asBit()
                        | WordFlags.TfIdfHigh.asBit()
                        | WordFlags.NamesWords.asBit()
                        | WordFlags.UrlDomain.asBit()
                        | WordFlags.UrlPath.asBit()
                        | WordFlags.Site.asBit()
                        | WordFlags.SiteAdjacent.asBit();
        return new IndexJournalReaderSingleCompressedFile(path, null,
                r -> (r.metadata() & highPriorityFlags) != 0);
    }
-    interface LongIntConsumer {
+
        void accept(long left, int right);
    }
    interface LongObjectConsumer<T> {
        void accept(long left, T right);
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderPagingImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderPagingImpl.java
@ -0,0 +1,61 @@
 package nu.marginalia.index.journal.reader;
 import com.google.common.collect.Iterators;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalStatistics;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.jetbrains.annotations.NotNull;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.function.LongConsumer;
 public class IndexJournalReaderPagingImpl implements IndexJournalReader {
    private final List<IndexJournalReader> readers;
    public IndexJournalReaderPagingImpl(Path baseDir) throws IOException {
        var inputFiles = IndexJournalFileNames.findJournalFiles(baseDir);
        this.readers = new ArrayList<>(inputFiles.size());
        for (var inputFile : inputFiles) {
            readers.add(new IndexJournalReaderSingleCompressedFile(inputFile));
        }
    }
    @Override
    public void forEachWordId(LongConsumer consumer) {
        for (var reader : readers) {
            reader.forEachWordId(consumer);
        }
    }
    @Override
    public void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer) {
        for (var reader : readers) {
            reader.forEachDocIdRecord(consumer);
        }
    }
    @Override
    public void forEachDocId(LongConsumer consumer) {
        for (var reader : readers) {
            reader.forEachDocId(consumer);
        }
    }
    @Override
    public @NotNull Iterator<IndexJournalReadEntry> iterator() {
        return Iterators.concat(readers.stream().map(IndexJournalReader::iterator).iterator());
    }
    @Override
    public void close() throws IOException {
        for (var reader : readers) {
            reader.close();
        }
    }
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderSingleCompressedFile.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderSingleCompressedFile.java
@ -12,6 +12,7 @@ import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.function.IntConsumer;
 import java.util.function.LongConsumer;
@ -19,15 +20,22 @@ import java.util.function.Predicate;
 public class IndexJournalReaderSingleCompressedFile implements IndexJournalReader {
-    private static Path journalFile;
+    private Path journalFile;
    public final IndexJournalFileHeader fileHeader;
    @Override
    public String toString() {
        return "IndexJournalReaderSingleCompressedFile{" + journalFile + " }";
    }
    private DataInputStream dataInputStream = null;
    final Predicate<IndexJournalReadEntry> entryPredicate;
    final Predicate<IndexJournalEntryData.Record> recordPredicate;
    public IndexJournalReaderSingleCompressedFile(Path file) throws IOException {
        this.journalFile = file;
        fileHeader = readHeader(file);
        this.recordPredicate = null;
@ -35,7 +43,8 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
    }
    public IndexJournalReaderSingleCompressedFile(Path file, Predicate<IndexJournalReadEntry> entryPredicate, Predicate<IndexJournalEntryData.Record> recordPredicate) throws IOException {
-        journalFile = file;
+        this.journalFile = file;
        fileHeader = readHeader(file);
        this.recordPredicate = recordPredicate;
@ -43,8 +52,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
    }
    private static IndexJournalFileHeader readHeader(Path file) throws IOException {
        journalFile = file;
        try (var raf = new RandomAccessFile(file.toFile(), "r")) {
            long unused = raf.readLong();
            long wordCount = raf.readLong();
@ -62,10 +69,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
        return new DataInputStream(new ZstdInputStream(new BufferedInputStream(fileInputStream)));
    }
    public IndexJournalFileHeader fileHeader() {
        return fileHeader;
    }
    public boolean filter(IndexJournalReadEntry entry) {
        return entryPredicate == null || entryPredicate.test(entry);
    }
@ -81,31 +84,7 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
    @Override
-    public IndexJournalStatistics getStatistics() {
+    public void forEachWordId(LongConsumer consumer) {
        int highestWord = 0;
        // Docs cardinality is a candidate for a HyperLogLog
        Roaring64Bitmap docsBitmap = new Roaring64Bitmap();
        for (var entry : this) {
            var entryData = entry.readEntry();
            if (filter(entry)) {
                docsBitmap.addLong(entry.docId() & 0x0000_0000_FFFF_FFFFL);
                for (var item : entryData) {
                    if (filter(entry, item)) {
                        highestWord = Integer.max(item.wordId(), highestWord);
                    }
                }
            }
        }
        return new IndexJournalStatistics(highestWord, docsBitmap.getIntCardinality());
    }
    @Override
    public void forEachWordId(IntConsumer consumer) {
        for (var entry : this) {
            var data = entry.readEntry();
            for (var post : data) {
@ -116,19 +95,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
        }
    }
    @Override
    public void forEachDocIdWordId(LongIntConsumer consumer) {
        for (var entry : this) {
            var data = entry.readEntry();
            for (var post : data) {
                if (filter(entry, post)) {
                    consumer.accept(entry.docId(), post.wordId());
                }
            }
        }
    }
    @Override
    public void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer) {
        for (var entry : this) {
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterPagingImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterPagingImpl.java
@ -0,0 +1,48 @@
 package nu.marginalia.index.journal.writer;
 import lombok.SneakyThrows;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Path;
 public class IndexJournalWriterPagingImpl implements IndexJournalWriter {
    private final Path outputDir;
    private int fileNumber = 0;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private IndexJournalWriter currentWriter = null;
    private int inputsForFile = 0;
    public IndexJournalWriterPagingImpl(Path outputDir) throws IOException {
        this.outputDir = outputDir;
        switchToNextWriter();
        logger.info("Creating Journal Writer {}", outputDir);
    }
    private void switchToNextWriter() throws IOException {
        if (currentWriter != null)
            currentWriter.close();
        currentWriter = new IndexJournalWriterSingleFileImpl(IndexJournalFileNames.allocateName(outputDir, fileNumber++));
    }
    @Override
    @SneakyThrows
    public void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
        if (++inputsForFile > 100_000) {
            inputsForFile = 0;
            switchToNextWriter();
        }
        currentWriter.put(header, entry);
    }
    public void close() throws IOException {
        currentWriter.close();
    }
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java
@ -1,12 +1,11 @@
 package nu.marginalia.index.journal.writer;
 import com.github.luben.zstd.ZstdDirectBufferCompressingStream;
 import com.github.luben.zstd.ZstdOutputStream;
 import lombok.SneakyThrows;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
-import nu.marginalia.lexicon.KeywordLexicon;
+import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -16,27 +15,34 @@ import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.nio.file.attribute.PosixFilePermissions;
-public class IndexJournalWriterImpl implements IndexJournalWriter{
+public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
    private final KeywordLexicon lexicon;
    private static final int ZSTD_BUFFER_SIZE = 8192;
    private static final int DATA_BUFFER_SIZE = 8192;
    private final ByteBuffer dataBuffer = ByteBuffer.allocateDirect(DATA_BUFFER_SIZE);
    private final ZstdDirectBufferCompressingStream compressingStream;
    private int numEntries = 0;
    private final FileChannel fileChannel;
-    public IndexJournalWriterImpl(KeywordLexicon lexicon, Path outputFile) throws IOException {
+    private int numEntries = 0;
-        this.lexicon = lexicon;
+
    private final Logger logger = LoggerFactory.getLogger(getClass());
    public IndexJournalWriterSingleFileImpl(Path outputFile) throws IOException {
        logger.info("Creating Journal Writer {}", outputFile);
        Files.deleteIfExists(outputFile);
        Files.createFile(outputFile, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
        fileChannel = FileChannel.open(outputFile, StandardOpenOption.CREATE,
                StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
        writeHeaderPlaceholder(fileChannel);
        compressingStream = new ZstdDirectBufferCompressingStream(ByteBuffer.allocateDirect(ZSTD_BUFFER_SIZE), 3) {
            protected ByteBuffer flushBuffer(ByteBuffer toFlush) throws IOException {
                toFlush.flip();
@ -64,7 +70,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{
    @Override
    @SneakyThrows
-    public synchronized void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
+    public void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
            dataBuffer.flip();
            compressingStream.compress(dataBuffer);
@ -84,6 +90,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{
                dataBuffer.clear();
            }
            else while (remaining-- > 0 && i < entry.size()) {
                dataBuffer.putLong(entry.underlyingArray[i++]);
            }
        }
@ -103,7 +110,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{
        ByteBuffer header = ByteBuffer.allocate(16);
        header.putLong(numEntries);
-        header.putLong(lexicon.size());
+        header.putLong(0);
        header.flip();
        while (header.position() < header.limit()) {
--- a/code/features-index/index-journal/src/main/java/nu/marginallia/index/journal/IndexJournalFileNames.java
+++ b/code/features-index/index-journal/src/main/java/nu/marginallia/index/journal/IndexJournalFileNames.java
@ -1,9 +1,30 @@
 package nu.marginallia.index.journal;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 public class IndexJournalFileNames {
-    public static Path resolve(Path base) {
+    public static Path allocateName(Path base, int idx) {
-        return base.resolve("page-index.dat");
+        return base.resolve(String.format("page-index-%04d.dat", idx));
    }
    public static List<Path> findJournalFiles(Path baseDirectory) throws IOException {
        List<Path> ret = new ArrayList<>();
        try (var listStream = Files.list(baseDirectory)) {
            listStream
                    .filter(IndexJournalFileNames::isJournalFile)
                    .sorted()
                    .forEach(ret::add);
        }
        return ret;
    }
    public static boolean isJournalFile(Path file) {
        return file.toFile().getName().matches("page-index-\\d{4}.dat");
    }
 }
--- a/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java
+++ b/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java
@ -4,14 +4,12 @@ import nu.marginalia.index.journal.model.IndexJournalEntry;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
-import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
+import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.model.id.UrlIdCodec;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import java.io.IOException;
 import java.nio.file.Files;
@ -23,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 public class IndexJournalTest {
    Path tempFile;
    KeywordLexicon lexicon;
    IndexJournalReader reader;
    long firstDocId = UrlIdCodec.encodeId(44, 10);
@ -32,9 +29,8 @@ public class IndexJournalTest {
    @BeforeEach
    public void setUp() throws IOException {
        tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
        lexicon = Mockito.mock(KeywordLexicon.class);
-        var journalWriter = new IndexJournalWriterImpl(lexicon, tempFile);
+        var journalWriter = new IndexJournalWriterSingleFileImpl( tempFile);
        journalWriter.put(IndexJournalEntry.builder(44, 10, 55)
                .add(1, 2)
                .add(2, 3)
@ -82,22 +78,7 @@ public class IndexJournalTest {
        List<Integer> expected = List.of(1, 2, 3, 5, 5 ,6);
        List<Integer> actual = new ArrayList<>();
-        reader.forEachWordId(actual::add);
+        reader.forEachWordId(i -> actual.add((int) i));
        assertEquals(expected, actual);
    }
    @Test
    public void forEachDocIdWordId() {
        List<Pair<Long, Integer>> expected = List.of(
                Pair.of(firstDocId, 1),
                Pair.of(firstDocId, 2),
                Pair.of(firstDocId, 3),
                Pair.of(firstDocId, 5),
                Pair.of(secondDocId, 5),
                Pair.of(secondDocId, 6));
        List<Pair<Long, Integer>> actual = new ArrayList<>();
        reader.forEachDocIdWordId((url, word) -> actual.add(Pair.of(url, word)));
        assertEquals(expected, actual);
    }
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java
@ -9,16 +9,16 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
 public interface IndexQueryBuilder {
    /** Filters documents that also contain termId, within the full index.
     */
-    IndexQueryBuilder alsoFull(int termId);
+    IndexQueryBuilder alsoFull(long termId);
    /**
     * Filters documents that also contain the termId, within the priority index.
     */
-    IndexQueryBuilder alsoPrio(int termIds);
+    IndexQueryBuilder alsoPrio(long termIds);
    /** Excludes documents that contain termId, within the full index
     */
-    IndexQueryBuilder notFull(int termId);
+    IndexQueryBuilder notFull(long termId);
    IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java
@ -21,7 +21,7 @@ public class QueryFilterLetThrough implements QueryFilterStepIf {
    }
    public String describe() {
-        return "[NoPass]";
+        return "[PassThrough]";
    }
 }
--- a/code/features-index/index-reverse/build.gradle
+++ b/code/features-index/index-reverse/build.gradle
@ -18,15 +18,15 @@ dependencies {
    implementation project(':code:features-index:domain-ranking')
    implementation project(':code:features-index:index-query')
    implementation project(':code:features-index:index-journal')
    implementation project(':code:features-index:lexicon')
    implementation project(':code:common:model')
    implementation project(':code:common:process')
    implementation libs.lombok
    annotationProcessor libs.lombok
    implementation libs.bundles.slf4j
-    implementation libs.prometheus
+    implementation libs.fastutil
    testImplementation libs.bundles.slf4j.test
    testImplementation libs.bundles.junit
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullEntrySource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullEntrySource.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.full;
+package nu.marginalia.index;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.btree.BTreeReader;
@ -6,18 +6,18 @@ import nu.marginalia.index.query.EntrySource;
 import static java.lang.Math.min;
-public class ReverseIndexFullEntrySource implements EntrySource {
+public class ReverseIndexEntrySource implements EntrySource {
    private final BTreeReader reader;
    int pos;
    int endOffset;
    final int entrySize;
-    private final int wordId;
+    private final long wordId;
-    public ReverseIndexFullEntrySource(BTreeReader reader,
+    public ReverseIndexEntrySource(BTreeReader reader,
-                                       int entrySize,
+                                   int entrySize,
-                                       int wordId) {
+                                   long wordId) {
        this.reader = reader;
        this.entrySize = entrySize;
        this.wordId = wordId;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullFileNames.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullFileNames.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.full;
+package nu.marginalia.index;
 import java.nio.file.Path;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java
@ -0,0 +1,10 @@
 package nu.marginalia.index;
 import nu.marginalia.btree.model.BTreeBlockSize;
 import nu.marginalia.btree.model.BTreeContext;
 public class ReverseIndexParameters
 {
    public static final BTreeContext docsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
    public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPrioFileNames.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPrioFileNames.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.priority;
+package nu.marginalia.index;
 import java.nio.file.Path;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java
@ -1,11 +1,11 @@
-package nu.marginalia.index.full;
+package nu.marginalia.index;
 import nu.marginalia.index.query.ReverseIndexRejectFilter;
 import nu.marginalia.index.query.ReverseIndexRetainFilter;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.EntrySource;
 import nu.marginalia.index.query.ReverseIndexRejectFilter;
 import nu.marginalia.index.query.ReverseIndexRetainFilter;
 import nu.marginalia.index.query.filter.QueryFilterLetThrough;
 import nu.marginalia.index.query.filter.QueryFilterNoPass;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
@ -15,18 +15,22 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
-public class ReverseIndexFullReader {
+public class ReverseIndexReader {
    private final LongArray words;
    private final LongArray documents;
-
+    private final long wordsDataOffset;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final BTreeReader wordsBTreeReader;
-    public ReverseIndexFullReader(Path words, Path documents) throws IOException {
+
    public ReverseIndexReader(Path words, Path documents) throws IOException {
        if (!Files.exists(words) || !Files.exists(documents)) {
            this.words = null;
            this.documents = null;
            this.wordsBTreeReader = null;
            this.wordsDataOffset = -1;
            return;
        }
@ -34,62 +38,52 @@ public class ReverseIndexFullReader {
        this.words = LongArray.mmapRead(words);
        this.documents = LongArray.mmapRead(documents);
        wordsBTreeReader = new BTreeReader(this.words, ReverseIndexParameters.wordsBTreeContext, 0);
        wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
    }
    public boolean isWordInDoc(int wordId, long documentId) {
        if (wordId < 0) {
            return false;
        }
-        long offset = words.get(wordId);
+    private long wordOffset(long wordId) {
        long idx = wordsBTreeReader.findEntry(wordId);
-        if (offset < 0) {
+        if (idx < 0)
-            return false;
+            return -1L;
        }
-        return createReaderNew(offset).findEntry(documentId) >= 0;
+        return words.get(wordsDataOffset + idx + 1);
    }
-    public EntrySource documents(int wordId) {
+    public EntrySource documents(long wordId) {
        if (null == words) {
            logger.warn("Reverse index is not ready, dropping query");
            return new EmptyEntrySource();
        }
-        if (wordId < 0 || wordId >= words.size()) return new EmptyEntrySource();
+        long offset = wordOffset(wordId);
        long offset = words.get(wordId);
        if (offset < 0) return new EmptyEntrySource();
-        return new ReverseIndexFullEntrySource(createReaderNew(offset), ReverseIndexFullParameters.ENTRY_SIZE, wordId);
+        return new ReverseIndexEntrySource(createReaderNew(offset), 2, wordId);
    }
-    public QueryFilterStepIf also(int wordId) {
+    public QueryFilterStepIf also(long wordId) {
-        if (wordId < 0) return new QueryFilterNoPass();
+        long offset = wordOffset(wordId);
        long offset = words.get(wordId);
        if (offset < 0) return new QueryFilterNoPass();
        return new ReverseIndexRetainFilter(createReaderNew(offset), "full", wordId);
    }
-    public QueryFilterStepIf not(int wordId) {
+    public QueryFilterStepIf not(long wordId) {
-        if (wordId < 0) return new QueryFilterLetThrough();
+        long offset = wordOffset(wordId);
        long offset = words.get(wordId);
        if (offset < 0) return new QueryFilterLetThrough();
        return new ReverseIndexRejectFilter(createReaderNew(offset));
    }
-    public int numDocuments(int wordId) {
+    public int numDocuments(long wordId) {
-        if (wordId < 0)
+        long offset = wordOffset(wordId);
            return 0;
        long offset = words.get(wordId);
        if (offset < 0)
            return 0;
@ -98,15 +92,12 @@ public class ReverseIndexFullReader {
    }
    private BTreeReader createReaderNew(long offset) {
-        return new BTreeReader(documents, ReverseIndexFullParameters.bTreeContext, offset);
+        return new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, offset);
    }
-    public long[] getTermMeta(int wordId, long[] docIds) {
+    public long[] getTermMeta(long wordId, long[] docIds) {
-        if (wordId < 0) {
+        long offset = wordOffset(wordId);
            return new long[docIds.length];
        }
        long offset = words.get(wordId);
        if (offset < 0) {
            return new long[docIds.length];
        }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java
@ -0,0 +1,9 @@
 package nu.marginalia.index.construction;
 public interface DocIdRewriter {
    long rewriteDocId(long docId);
    static DocIdRewriter identity() {
        return l -> l;
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java
@ -0,0 +1,10 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import java.io.IOException;
 import java.nio.file.Path;
 public interface JournalReaderSource {
    IndexJournalReader construct(Path sourceFile) throws IOException;
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java
@ -4,7 +4,6 @@ import nu.marginalia.array.LongArray;
 import nu.marginalia.array.functional.LongIOTransformer;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.btree.model.BTreeContext;
 import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@ -0,0 +1,81 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 public class ReverseIndexConstructor {
    private static final Logger logger = LoggerFactory.getLogger(ReverseIndexConstructor.class);
    public static void createReverseIndex(
                                    JournalReaderSource readerSource,
                                    Path sourceBaseDir,
                                    Path tmpDir,
                                    Path outputFileDocs,
                                    Path outputFileWords) throws IOException
    {
        var inputs = IndexJournalFileNames.findJournalFiles(sourceBaseDir);
        if (inputs.isEmpty()) {
            logger.error("No journal files in base dir {}", sourceBaseDir);
            return;
        }
        List<ReversePreindex> preindexes = new ArrayList<>();
        for (var input : inputs) {
            logger.info("Construcing preindex from {}", input);
            var preindex = ReversePreindex.constructPreindex(readerSource.construct(input),
                    tmpDir, tmpDir);
            preindexes.add(preindex);
        }
        logger.info("Merging");
        var finalPreindex = mergePreindexes(tmpDir, preindexes);
        logger.info("Finalizing");
        finalPreindex.finalizeIndex(outputFileDocs, outputFileWords);
        logger.info("Done");
        finalPreindex.delete();
    }
    private static ReversePreindex mergePreindexes(Path workDir, List<ReversePreindex> preindexes) throws IOException {
        assert !preindexes.isEmpty();
        if (preindexes.size() == 1) {
            logger.info("Single preindex, no merge necessary");
            return preindexes.get(0);
        }
        List<ReversePreindex> toMerge = new ArrayList<>(preindexes);
        List<ReversePreindex> merged = new ArrayList<>();
        while (toMerge.size() != 1) {
            for (int i = 0; i < toMerge.size(); i+=2) {
                var left = toMerge.get(i);
                var right = toMerge.get(i+1);
                merged.add(ReversePreindex.merge(workDir, left, right));
                left.delete();
                right.delete();
            }
            if ((toMerge.size() % 2) != 0) {
                merged.add(toMerge.get(toMerge.size()-1));
            }
            toMerge.clear();
            toMerge.addAll(merged);
            merged.clear();
        }
        return toMerge.get(0);
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java
@ -0,0 +1,256 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.index.ReverseIndexParameters;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import static nu.marginalia.array.algo.TwoArrayOperations.*;
 public class ReversePreindex {
    public final ReversePreindexWordSegments segments;
    public final ReversePreindexDocuments documents;
    private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class);
    public ReversePreindex(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) {
        this.segments = segments;
        this.documents = documents;
    }
    public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
        var offsets = segments.counts;
        Files.deleteIfExists(outputFileDocs);
        Files.deleteIfExists(outputFileWords);
        // Estimate the size of the docs index data
        offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2));
        IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
        offsets.fold(0, 0, offsets.size(), sizeEstimator);
        System.out.println("size estimate = " + sizeEstimator.size);
        // Write the docs file
        LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
        try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
            offsets.transformEachIO(0, offsets.size(), new ReverseIndexBTreeTransformer(finalDocs, 2, ReverseIndexParameters.docsBTreeContext, intermediateDocChannel));
            intermediateDocChannel.force(false);
        }
        LongArray wordIds = segments.wordIds;
        // Estimate the size of the words index data
        long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
        // Construct the tree
        LongArray wordsArray = LongArray.mmapForWriting(outputFileWords, wordsSize);
        new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext)
            .write(0, (int) offsets.size(), mapRegion -> {
            for (long i = 0; i < offsets.size(); i++) {
                mapRegion.set(2*i, wordIds.get(i));
                mapRegion.set(2*i + 1, offsets.get(i));
            }
        });
        wordsArray.force();
    }
    /** Delete all files associated with this pre-index */
    public void delete() throws IOException {
        segments.delete();
        documents.delete();
    }
    public static ReversePreindex constructPreindex(IndexJournalReader reader,
                                                    Path tempDir,
                                                    Path destDir) throws IOException
    {
        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
        Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
        SortingContext ctx = new SortingContext(tempDir, 1<<31);
        logger.info("Segmenting");
        var segments = ReversePreindexWordSegments.construct(reader, ctx, segmentWordsFile, segmentCountsFile);
        logger.info("Mapping docs");
        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), ctx, segments);
        logger.info("Done");
        return new ReversePreindex(segments, docs);
    }
    /** Create a segment word file with each word from both inputs, with zero counts for all the data.
     * This is an intermediate product in merging.
     */
    static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir,
                                                                           ReversePreindexWordSegments left,
                                                                           ReversePreindexWordSegments right) throws IOException {
        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
        long segmentsSize = countDistinctElements(left.wordIds, right.wordIds,
                0,  left.wordIds.size(),
                0,  right.wordIds.size());
        LongArray wordIdsFile = LongArray.mmapForWriting(segmentWordsFile, segmentsSize);
        mergeArrays(wordIdsFile, left.wordIds, right.wordIds,
                0, wordIdsFile.size(),
                0, left.wordIds.size(),
                0, right.wordIds.size());
        LongArray counts = LongArray.mmapForWriting(segmentCountsFile, 8*segmentsSize);
        return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
    }
    public static ReversePreindex merge(Path destDir,
                                 ReversePreindex left,
                                 ReversePreindex right) throws IOException {
        ReversePreindexWordSegments mergingSegment = createMergedSegmentWordFile(destDir,
                left.segments,
                right.segments);
        var mergingIter = mergingSegment.constructionIterator(2);
        var leftIter = left.segments.iterator(2);
        var rightIter = right.segments.iterator(2);
        Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
        LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
        leftIter.next();
        rightIter.next();
        FileChannel leftChannel = left.documents.createDocumentsFileChannel();
        FileChannel rightChannel = right.documents.createDocumentsFileChannel();
        while (mergingIter.canPutMore()
                && leftIter.isPositionBeforeEnd()
                && rightIter.isPositionBeforeEnd())
        {
            if (leftIter.wordId == mergingIter.wordId
            && rightIter.wordId == mergingIter.wordId) {
                mergeSegments(leftIter,
                        rightIter,
                        left.documents,
                        right.documents,
                        mergedDocuments,
                        mergingIter);
            }
            else if (leftIter.wordId == mergingIter.wordId) {
                if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
                    break;
            }
            else if (rightIter.wordId == mergingIter.wordId) {
                if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
                    break;
            }
            else {
                assert false : "This should never happen";
            }
        }
        if (leftIter.isPositionBeforeEnd()) {
            while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
        }
        if (rightIter.isPositionBeforeEnd()) {
            while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
        }
        assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
        assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
        assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
        // We may have overestimated the size of the merged docs size in the case there were
        // duplicates in the data, so we need to shrink it to the actual size we wrote.
        mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
        return new ReversePreindex(
                mergingSegment,
                new ReversePreindexDocuments(mergedDocuments, docsFile)
        );
    }
    private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
        mergedDocuments.force();
        long beforeSize = mergedDocuments.size();
        try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
            bc.truncate(sizeLongs * 8);
        }
        long afterSize = mergedDocuments.size();
        mergedDocuments = LongArray.mmapForWriting(docsFile, sizeLongs);
        if (beforeSize != afterSize) {
            logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
        }
        return mergedDocuments;
    }
    private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter,
                                      ReversePreindexWordSegments.SegmentIterator rightIter,
                                      ReversePreindexDocuments left,
                                      ReversePreindexDocuments right,
                                      LongArray documentsFile,
                                      ReversePreindexWordSegments.SegmentConstructionIterator mergingIter)
    {
        long distinct = countDistinctElementsN(2,
                left.documents,
                right.documents,
                leftIter.startOffset, leftIter.endOffset,
                rightIter.startOffset, rightIter.endOffset);
        mergeArrays2(documentsFile,
                left.documents,
                right.documents,
                mergingIter.startOffset,
                mergingIter.startOffset + 2*distinct,
                leftIter.startOffset, leftIter.endOffset,
                rightIter.startOffset, rightIter.endOffset);
        mergingIter.putNext(distinct);
        leftIter.next();
        rightIter.next();
    }
    private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter,
                                    LongArray documentsFile,
                                    FileChannel leftChannel,
                                    ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
        long size = sourceIter.endOffset - sourceIter.startOffset;
        long start = mergingIter.startOffset;
        long end = start + size;
        documentsFile.transferFrom(leftChannel,
                sourceIter.startOffset,
                mergingIter.startOffset,
                end);
        boolean putNext = mergingIter.putNext(size / 2);
        boolean iterNext = sourceIter.next();
        if (!putNext) {
            assert !iterNext: "Source iterator ran out before dest iterator?!";
        }
        return iterNext;
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@ -0,0 +1,120 @@
 package nu.marginalia.index.construction;
 import lombok.SneakyThrows;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 /** A LongArray with document data, segmented according to
 * the associated ReversePReindexWordSegments data
 */
 public class ReversePreindexDocuments {
    private final Path file;
    public  final LongArray documents;
    private static final int RECORD_SIZE_LONGS = 2;
    private static final Logger logger= LoggerFactory.getLogger(ReversePreindexDocuments.class);
    public ReversePreindexDocuments(LongArray documents, Path file) {
        this.documents = documents;
        this.file = file;
    }
    public static ReversePreindexDocuments construct(
            Path docsFile,
            IndexJournalReader reader,
            DocIdRewriter docIdRewriter,
            SortingContext sortingContext,
            ReversePreindexWordSegments segments) throws IOException {
        logger.info("Transfering data");
        createUnsortedDocsFile(docsFile, reader, segments, docIdRewriter);
        LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile));
        logger.info("Sorting data");
        sortDocsFile(docsFileMap, segments, sortingContext);
        return new ReversePreindexDocuments(docsFileMap, docsFile);
    }
    public FileChannel createDocumentsFileChannel() throws IOException {
        return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ);
    }
    public LongArray slice(long start, long end) {
        return documents.range(start, end);
    }
    public long size() {
        return documents.size();
    }
    private static void createUnsortedDocsFile(Path docsFile,
                                               IndexJournalReader reader,
                                               ReversePreindexWordSegments segments,
                                               DocIdRewriter docIdRewriter) throws IOException {
        long fileSize = 8 * segments.totalSize();
        LongArray outArray = LongArray.mmapForWriting(docsFile, fileSize);
        var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
        offsetMap.defaultReturnValue(0);
        reader.forEachDocIdRecord((docId, rec) -> {
            long wordId = rec.wordId();
            long meta = rec.metadata();
            long rankEncodedId = docIdRewriter.rewriteDocId(docId);
            long offset = offsetMap.addTo(wordId, RECORD_SIZE_LONGS);
            outArray.set(offset + 0, rankEncodedId);
            outArray.set(offset + 1, meta);
        });
        outArray.force();
    }
    @SneakyThrows
    private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments, SortingContext sortingContext) throws IOException {
        var iter = segments.iterator(RECORD_SIZE_LONGS);
        ExecutorService sortingWorkers = Executors.newWorkStealingPool(Runtime.getRuntime().availableProcessors());
        while (iter.next()) {
            if (iter.size() < 1024) {
                docsFileMap.quickSortN(RECORD_SIZE_LONGS,
                        iter.startOffset,
                        iter.endOffset);
            }
            else {
                sortingWorkers.execute(() ->
                        docsFileMap.quickSortN(RECORD_SIZE_LONGS,
                                iter.startOffset,
                                iter.endOffset));
            }
        }
        sortingWorkers.shutdown();
        logger.info("Awaiting shutdown");
        while (!sortingWorkers.awaitTermination(1, TimeUnit.HOURS));
        sortingWorkers.close();
    }
    public void delete() throws IOException {
        Files.delete(this.file);
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
@ -0,0 +1,191 @@
 package nu.marginalia.index.construction;
 import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
 import it.unimi.dsi.fastutil.longs.LongIterator;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 /** A pair of file-backed arrays of sorted wordIds
 * and the count of documents associated with each wordId.
 */
 public class ReversePreindexWordSegments {
    public final LongArray wordIds;
    public final LongArray counts;
    private final Path wordsFile;
    private final Path countsFile;
    public ReversePreindexWordSegments(LongArray wordIds,
                                       LongArray counts,
                                       Path wordsFile,
                                       Path countsFile)
    {
        this.wordIds = wordIds;
        this.counts = counts;
        this.wordsFile = wordsFile;
        this.countsFile = countsFile;
    }
    /** Returns a long-long hash map where each key is a wordId,
     * and each value is the start offset of the data.
     */
    public Long2LongOpenHashMap asMap(int recordSize) {
        Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
        var iter = iterator(recordSize);
        while (iter.next()) {
            ret.put(iter.wordId, iter.startOffset);
        }
        return ret;
    }
    public static ReversePreindexWordSegments construct(IndexJournalReader reader,
                                                        SortingContext ctx,
                                                        Path wordIdsFile,
                                                        Path countsFile)
    throws IOException
    {
        Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
        countsMap.defaultReturnValue(0);
        reader.forEachWordId(wordId -> countsMap.addTo(wordId, 1));
        LongArray words = LongArray.mmapForWriting(wordIdsFile, countsMap.size());
        LongArray counts = LongArray.mmapForWriting(countsFile, countsMap.size());
        // Create the words file by iterating over the map and inserting them into
        // the words file in whatever bizarro hash table order they appear in
        int i = 0;
        LongIterator iter = countsMap.keySet().iterator();
        while (iter.hasNext()) {
            words.set(i, iter.nextLong());
            i++;
        }
        // Sort the words file
        words.sortLargeSpan(ctx, 0, counts.size());
        // Populate the counts
        for (i = 0; i < countsMap.size(); i++) {
            counts.set(i, countsMap.get(words.get(i)));
        }
        return new ReversePreindexWordSegments(words, counts, wordIdsFile, countsFile);
    }
    public SegmentIterator iterator(int recordSize) {
        return new SegmentIterator(recordSize);
    }
    public SegmentConstructionIterator constructionIterator(int recordSize) {
        return new SegmentConstructionIterator(recordSize);
    }
    public long totalSize() {
        return counts.fold(0, 0, counts.size(), Long::sum);
    }
    public void delete() throws IOException {
        Files.delete(countsFile);
        Files.delete(wordsFile);
    }
    public class SegmentIterator {
        private final int recordSize;
        private final long fileSize;
        long wordId;
        long startOffset = 0;
        long endOffset = 0;
        private SegmentIterator(int recordSize) {
            this.recordSize = recordSize;
            this.fileSize = wordIds.size();
        }
        private int i = -1;
        public int idx() {
            return i;
        }
        public boolean next() {
            if (++i >= fileSize) {
                wordId = Long.MIN_VALUE;
                return false;
            }
            wordId = wordIds.get(i);
            startOffset = endOffset;
            endOffset = startOffset + recordSize * counts.get(i);
            return true;
        }
        public boolean hasMorePositions() {
            return i + 1 < wordIds.size();
        }
        public boolean isPositionBeforeEnd() {
            return i < wordIds.size();
        }
        public long size() {
            return endOffset - startOffset;
        }
    }
    class SegmentConstructionIterator {
        private final int recordSize;
        private final long fileSize;
        long wordId;
        long startOffset = 0;
        long endOffset = 0;
        private SegmentConstructionIterator(int recordSize) {
            this.recordSize = recordSize;
            this.fileSize = wordIds.size();
            if (fileSize == 0) {
                throw new IllegalArgumentException("Cannot construct zero-length word segment file");
            }
            this.wordId = wordIds.get(0);
        }
        private int i = 0;
        public int idx() {
            return i;
        }
        public boolean putNext(long size) {
            if (i >= fileSize)
                return false;
            endOffset = startOffset + recordSize * size;
            counts.set(i, size);
            startOffset = endOffset;
            endOffset = -1;
            i++;
            if (i == fileSize) {
                // We've reached the end of the iteration and there is no
                // "next" wordId to fetch
                wordId = Long.MIN_VALUE;
                return false;
            }
            else {
                wordId = wordIds.get(i);
                return true;
            }
        }
        public boolean canPutMore() {
            return i < wordIds.size();
        }
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java
@ -1,206 +0,0 @@
 package nu.marginalia.index.full;
 import lombok.SneakyThrows;
 import nu.marginalia.index.construction.CountToOffsetTransformer;
 import nu.marginalia.index.construction.ReverseIndexBTreeTransformer;
 import nu.marginalia.index.construction.IndexSizeEstimator;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalStatistics;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.rwf.RandomWriteFunnel;
 import nu.marginalia.array.IntArray;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import static nu.marginalia.index.full.ReverseIndexFullParameters.bTreeContext;
 public class ReverseIndexFullConverter {
    private static final int RWF_BIN_SIZE = 10_000_000;
    private final ProcessHeartbeat heartbeat;
    private final Path tmpFileDir;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final IndexJournalReader journalReader;
    private final DomainRankings domainRankings;
    private final Path outputFileWords;
    private final Path outputFileDocs;
    private final SortingContext sortingContext;
    public ReverseIndexFullConverter(ProcessHeartbeat heartbeat,
                                     Path tmpFileDir,
                                     IndexJournalReader journalReader,
                                     DomainRankings domainRankings,
                                     Path outputFileWords,
                                     Path outputFileDocs) {
        this.heartbeat = heartbeat;
        this.tmpFileDir = tmpFileDir;
        this.journalReader = journalReader;
        this.domainRankings = domainRankings;
        this.outputFileWords = outputFileWords;
        this.outputFileDocs = outputFileDocs;
        this.sortingContext = new SortingContext(tmpFileDir, 64_000);
    }
    public enum TaskSteps {
        ACCUMULATE_STATISTICS,
        INCREMENT_OFFSETS,
        COUNT_OFFSETS,
        CREATE_INTERMEDIATE_DOCS,
        SORT_INTERMEDIATE_DOCS,
        SIZING,
        FINALIZING_DOCS,
        FORCE,
        FINISHED,
    }
    public void convert() throws IOException {
        deleteOldFiles();
        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
            logger.warn("Bailing: Journal is empty!");
            return;
        }
        final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
        try (var progress = heartbeat.createProcessTaskHeartbeat(TaskSteps.class, "reverseIndexFullConverter")) {
            progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
            final IndexJournalStatistics statistics = journalReader.getStatistics();
            final long wordsFileSize = statistics.highestWord() + 1;
            progress.progress(TaskSteps.INCREMENT_OFFSETS);
            logger.debug("Words file size: {}", wordsFileSize);
            // Create a count of how many documents has contains each word
            final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
            journalReader.forEachWordId(wordsOffsets::increment);
            progress.progress(TaskSteps.COUNT_OFFSETS);
            wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexFullParameters.ENTRY_SIZE));
            progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
            // Construct an intermediate representation of the reverse documents index
            try (FileChannel intermediateDocChannel =
                         (FileChannel) Files.newByteChannel(intermediateUrlsFile,
                                 StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
            {
                // Construct intermediate index
                try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
                     IntermediateIndexConstructor intermediateIndexConstructor = new IntermediateIndexConstructor(tmpFileDir, wordsOffsets, intermediateDocumentWriteFunnel)
                )
                {
                    journalReader.forEachDocIdRecord(intermediateIndexConstructor);
                    intermediateDocumentWriteFunnel.write(intermediateDocChannel);
                }
                intermediateDocChannel.force(false);
                progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
                // Sort each segment of the intermediate file
                {
                    LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
                    wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
                        intermediateDocs.sortLargeSpanN(sortingContext, ReverseIndexFullParameters.ENTRY_SIZE, s, e);
                        return e;
                    });
                    intermediateDocs.force();
                }
                progress.progress(TaskSteps.SIZING);
                IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
                        ReverseIndexFullParameters.bTreeContext,
                        ReverseIndexFullParameters.ENTRY_SIZE);
                wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
                progress.progress(TaskSteps.FINALIZING_DOCS);
                LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
                // Construct the proper reverse index
                wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexFullParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
                wordsOffsets.write(outputFileWords);
                progress.progress(TaskSteps.FORCE);
                // Attempt to clean up before forcing (important disk space preservation)
                Files.deleteIfExists(intermediateUrlsFile);
                wordsOffsets.force();
                finalDocs.force();
                progress.progress(TaskSteps.FINISHED);
            }
        } catch (IOException ex) {
            logger.error("Failed to convert", ex);
            throw ex;
        } finally {
            Files.deleteIfExists(intermediateUrlsFile);
        }
    }
    private void deleteOldFiles() throws IOException {
        Files.deleteIfExists(outputFileWords);
        Files.deleteIfExists(outputFileDocs);
    }
    private class IntermediateIndexConstructor implements IndexJournalReader.LongObjectConsumer<IndexJournalEntryData.Record>, AutoCloseable {
        private final LongArray wordRangeEnds;
        private final IntArray wordRangeOffset;
        private final RandomWriteFunnel documentsFile;
        private final Path tempFile;
        public IntermediateIndexConstructor(Path tempDir, LongArray wordRangeEnds, RandomWriteFunnel documentsFile) throws IOException {
            tempFile = Files.createTempFile(tempDir, "iic", "dat");
            this.wordRangeEnds = wordRangeEnds;
            this.wordRangeOffset = IntArray.mmapForWriting(tempFile, wordRangeEnds.size());
            this.documentsFile = documentsFile;
        }
        @SneakyThrows
        @Override
        public void accept(long docId, IndexJournalEntryData.Record record) {
            int domainId = UrlIdCodec.getDomainId(docId);
            float rankingPart = domainRankings.getSortRanking(domainId);
            long rankEncodedId = UrlIdCodec.addRank(rankingPart, docId);
            final int wordId = record.wordId();
            long offset = startOfRange(wordId);
            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), rankEncodedId);
            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), record.metadata());
        }
        private long startOfRange(int wordId) {
            if (wordId == 0) return 0;
            return wordRangeEnds.get(wordId - 1);
        }
        public void close() throws IOException {
            Files.delete(tempFile);
        }
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullParameters.java
@ -1,16 +0,0 @@
 package nu.marginalia.index.full;
 import nu.marginalia.btree.model.BTreeBlockSize;
 import nu.marginalia.btree.model.BTreeContext;
 public class ReverseIndexFullParameters {
    static final int ENTRY_SIZE = 2;
    // This is the byte size per index page on disk, the data pages are twice as large due to ENTRY_SIZE = 2.
    //
    // Given a hardware limit of 4k reads, 2k block size should be optimal.
    static final BTreeBlockSize blockSize = BTreeBlockSize.BS_2048;
    static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, blockSize);
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java
@ -1,204 +0,0 @@
 package nu.marginalia.index.priority;
 import lombok.SneakyThrows;
 import nu.marginalia.array.IntArray;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.index.construction.CountToOffsetTransformer;
 import nu.marginalia.index.construction.ReverseIndexBTreeTransformer;
 import nu.marginalia.index.construction.IndexSizeEstimator;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalStatistics;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.rwf.RandomWriteFunnel;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import static nu.marginalia.index.priority.ReverseIndexPriorityParameters.bTreeContext;
 public class ReverseIndexPriorityConverter {
    private static final int RWF_BIN_SIZE = 10_000_000;
    private final ProcessHeartbeat heartbeat;
    private final Path tmpFileDir;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final IndexJournalReader journalReader;
    private final DomainRankings domainRankings;
    private final Path outputFileWords;
    private final Path outputFileDocs;
    private final SortingContext sortingContext;
    public ReverseIndexPriorityConverter(ProcessHeartbeat heartbeat,
                                         Path tmpFileDir,
                                         IndexJournalReader journalReader,
                                         DomainRankings domainRankings,
                                         Path outputFileWords,
                                         Path outputFileDocs) {
        this.heartbeat = heartbeat;
        this.tmpFileDir = tmpFileDir;
        this.journalReader = journalReader;
        this.domainRankings = domainRankings;
        this.outputFileWords = outputFileWords;
        this.outputFileDocs = outputFileDocs;
        this.sortingContext = new SortingContext(tmpFileDir, 64_000);
    }
    public enum TaskSteps {
        ACCUMULATE_STATISTICS,
        INCREMENT_OFFSETS,
        COUNT_OFFSETS,
        CREATE_INTERMEDIATE_DOCS,
        SORT_INTERMEDIATE_DOCS,
        SIZING,
        FINALIZING_DOCS,
        FORCE,
        FINISHED,
    }
    public void convert() throws IOException {
        deleteOldFiles();
        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
            logger.warn("Bailing: Journal is empty!");
            return;
        }
        final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
        try (var progress = heartbeat.createProcessTaskHeartbeat(TaskSteps.class, "reverseIndexPriorityConverter")) {
            progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
            final IndexJournalStatistics statistics = journalReader.getStatistics();
            final long wordsFileSize = statistics.highestWord() + 1;
            progress.progress(TaskSteps.INCREMENT_OFFSETS);
            logger.debug("Words file size: {}", wordsFileSize);
            // Create a count of how many documents has contains each word
            final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
            journalReader.forEachWordId(wordsOffsets::increment);
            progress.progress(TaskSteps.COUNT_OFFSETS);
            wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexPriorityParameters.ENTRY_SIZE));
            progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
            // Construct an intermediate representation of the reverse documents index
            try (FileChannel intermediateDocChannel =
                         (FileChannel) Files.newByteChannel(intermediateUrlsFile,
                                 StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
            {
                // Construct intermediate index
                try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
                     IntermediateIndexConstructor intermediateIndexConstructor = new IntermediateIndexConstructor(tmpFileDir, wordsOffsets, intermediateDocumentWriteFunnel)
                )
                {
                    journalReader.forEachDocIdRecord(intermediateIndexConstructor);
                    intermediateDocumentWriteFunnel.write(intermediateDocChannel);
                }
                intermediateDocChannel.force(false);
                progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
                // Sort each segment of the intermediate file
                {
                    LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
                    wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
                        intermediateDocs.sortLargeSpan(sortingContext, s, e);
                        return e;
                    });
                    intermediateDocs.force();
                }
                progress.progress(TaskSteps.SIZING);
                IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
                        bTreeContext,
                        ReverseIndexPriorityParameters.ENTRY_SIZE);
                wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
                progress.progress(TaskSteps.FINALIZING_DOCS);
                LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
                // Construct the proper reverse index
                wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexPriorityParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
                wordsOffsets.write(outputFileWords);
                progress.progress(TaskSteps.FORCE);
                // Attempt to clean up before forcing (important disk space preservation)
                Files.deleteIfExists(intermediateUrlsFile);
                wordsOffsets.force();
                finalDocs.force();
                progress.progress(TaskSteps.FINISHED);
            }
        } catch (IOException ex) {
            logger.error("Failed to convert", ex);
            throw ex;
        } finally {
            Files.deleteIfExists(intermediateUrlsFile);
        }
    }
    private void deleteOldFiles() throws IOException {
        Files.deleteIfExists(outputFileWords);
        Files.deleteIfExists(outputFileDocs);
    }
    private class IntermediateIndexConstructor implements IndexJournalReader.LongObjectConsumer<IndexJournalEntryData.Record>, AutoCloseable {
        private final LongArray wordRangeEnds;
        private final IntArray wordRangeOffset;
        private final RandomWriteFunnel documentsFile;
        private final Path tempFile;
        public IntermediateIndexConstructor(Path tempDir, LongArray wordRangeEnds, RandomWriteFunnel documentsFile) throws IOException {
            tempFile = Files.createTempFile(tempDir, "iic", "dat");
            this.wordRangeEnds = wordRangeEnds;
            this.wordRangeOffset = IntArray.mmapForWriting(tempFile, wordRangeEnds.size());
            this.documentsFile = documentsFile;
        }
        @SneakyThrows
        @Override
        public void accept(long docId, IndexJournalEntryData.Record record) {
            int domainId = UrlIdCodec.getDomainId(docId);
            float rankingPart = domainRankings.getSortRanking(domainId);
            long rankEncodedId = UrlIdCodec.addRank(rankingPart, docId);
            final int wordId = record.wordId();
            long offset = startOfRange(wordId);
            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), rankEncodedId);
        }
        private long startOfRange(int wordId) {
            if (wordId == 0) return 0;
            return wordRangeEnds.get(wordId - 1);
        }
        public void close() throws IOException {
            Files.delete(tempFile);
        }
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityEntrySource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityEntrySource.java
@ -1,48 +0,0 @@
 package nu.marginalia.index.priority;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.EntrySource;
 import static java.lang.Math.min;
 public class ReverseIndexPriorityEntrySource implements EntrySource {
    private final BTreeReader reader;
    int pos;
    int endOffset;
    private final int wordId;
    public ReverseIndexPriorityEntrySource(BTreeReader reader, int wordId) {
        this.reader = reader;
        this.wordId = wordId;
        pos = 0;
        endOffset = pos + reader.numEntries();
    }
    @Override
    public void skip(int n) {
        pos += n;
    }
    @Override
    public void read(LongQueryBuffer buffer) {
        buffer.end = min(buffer.end, endOffset - pos);
        reader.readData(buffer.data, buffer.end, pos);
        pos += buffer.end;
        buffer.uniq();
    }
    @Override
    public boolean hasMore() {
        return pos < endOffset;
    }
    @Override
    public String indexName() {
        return "Priority:" + wordId;
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityParameters.java
@ -1,31 +0,0 @@
 package nu.marginalia.index.priority;
 import nu.marginalia.btree.model.BTreeBlockSize;
 import nu.marginalia.btree.model.BTreeContext;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.model.idx.WordFlags;
 public class ReverseIndexPriorityParameters {
    static final int ENTRY_SIZE = 1;
    static final BTreeBlockSize blockSize = BTreeBlockSize.BS_4096;
    static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, blockSize);
    private static final long highPriorityFlags =
            WordFlags.Title.asBit()
            | WordFlags.Subjects.asBit()
            | WordFlags.TfIdfHigh.asBit()
            | WordFlags.NamesWords.asBit()
            | WordFlags.UrlDomain.asBit()
            | WordFlags.UrlPath.asBit()
            | WordFlags.Site.asBit()
            | WordFlags.SiteAdjacent.asBit();
    public static boolean filterPriorityRecord(IndexJournalEntryData.Record record) {
        long meta = record.metadata();
        return (meta & highPriorityFlags) != 0;
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java
@ -1,77 +0,0 @@
 package nu.marginalia.index.priority;
 import nu.marginalia.index.query.EntrySource;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.ReverseIndexRetainFilter;
 import nu.marginalia.index.query.filter.QueryFilterNoPass;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 public class ReverseIndexPriorityReader {
    private final LongArray words;
    private final LongArray documents;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    public ReverseIndexPriorityReader(Path words, Path documents) throws IOException {
        if (!Files.exists(words) || !Files.exists(documents)) {
            this.words = null;
            this.documents = null;
            return;
        }
        logger.info("Switching prio reverse index");
        this.words = LongArray.mmapRead(words);
        this.documents = LongArray.mmapRead(documents);
    }
    public EntrySource priorityDocuments(int wordId) {
        if (words == null) {
            // index not loaded
            return new EmptyEntrySource();
        }
        if (wordId < 0 || wordId >= words.size()) return new EmptyEntrySource();
        long offset = words.get(wordId);
        if (offset < 0) return new EmptyEntrySource();
        return new ReverseIndexPriorityEntrySource(createReaderNew(offset), wordId);
    }
    private BTreeReader createReaderNew(long offset) {
        return new BTreeReader(documents, ReverseIndexPriorityParameters.bTreeContext, offset);
    }
    public QueryFilterStepIf also(int wordId) {
        if (wordId < 0) return new QueryFilterNoPass();
        long offset = words.get(wordId);
        if (offset < 0) return new QueryFilterNoPass();
        return new ReverseIndexRetainFilter(createReaderNew(offset), "priority", wordId);
    }
    public int numDocuments(int wordId) {
        if (wordId < 0)
            return 0;
        long offset = words.get(wordId);
        if (offset < 0)
            return 0;
        return createReaderNew(offset).numEntries();
    }
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
@ -4,7 +4,7 @@ import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
-public record ReverseIndexRetainFilter(BTreeReader range, String name, int wordId) implements QueryFilterStepIf {
+public record ReverseIndexRetainFilter(BTreeReader range, String name, long wordId) implements QueryFilterStepIf {
    @Override
    public void apply(LongQueryBuffer buffer) {
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java
@ -0,0 +1,108 @@
 package nu.marginalia.index;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.index.construction.ReversePreindex;
 import nu.marginalia.index.construction.TestJournalFactory;
 import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import static nu.marginalia.index.construction.TestJournalFactory.wm;
 import static org.junit.jupiter.api.Assertions.*;
 class ReverseIndexReaderTest {
    TestJournalFactory journalFactory;
    Path tempDir;
    SortingContext sortingContext;
    @BeforeEach
    public void setUp() throws IOException {
        journalFactory = new TestJournalFactory();
        tempDir = Files.createTempDirectory("sort");
        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
    }
    @AfterEach
    public void tearDown() throws IOException {
        journalFactory.clear();
        List<Path> contents = new ArrayList<>();
        Files.list(tempDir).forEach(contents::add);
        for (var tempFile : contents) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    @Test
    public void testSimple() throws IOException {
        var indexReader = createIndex(
                new EntryDataWithWordMeta(100, 101, wm(50, 51))
        );
        assertEquals(1, indexReader.numDocuments(50));
        long[] meta = indexReader.getTermMeta(50, new long[] { 100 });
        assertArrayEquals(new long[] { 51 }, meta);
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
    }
    @Test
    public void test2x2() throws IOException {
        var indexReader = createIndex(
                new EntryDataWithWordMeta(100, 101, wm(50, 51), wm(51, 52)),
                new EntryDataWithWordMeta(101, 101, wm(51, 53), wm(52, 54))
        );
        assertEquals(1, indexReader.numDocuments(50));
        assertEquals(2, indexReader.numDocuments(51));
        assertEquals(1, indexReader.numDocuments(52));
        assertArrayEquals(new long[] { 51 }, indexReader.getTermMeta(50, new long[] { 100 }));
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
        assertArrayEquals(new long[] { 52, 53 }, indexReader.getTermMeta(51, new long[] { 100, 101 }));
        assertArrayEquals(new long[] { 100, 101 }, readEntries(indexReader, 51));
        assertArrayEquals(new long[] { 54 }, indexReader.getTermMeta(52, new long[] { 101 }));
        assertArrayEquals(new long[] { 101 }, readEntries(indexReader, 52));
    }
    private long[] readEntries(ReverseIndexReader reader, long wordId) {
        var es = reader.documents(wordId);
        assertTrue(es.hasMore());
        LongQueryBuffer buffer = new LongQueryBuffer(4);
        es.read(buffer);
        assertFalse(es.hasMore());
        return buffer.copyData();
    }
    private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
        var reader = journalFactory.createReader(scenario);
        var preindex = ReversePreindex.constructPreindex(reader, tempDir, tempDir);
        Path docsFile = tempDir.resolve("docs.dat");
        Path wordsFile = tempDir.resolve("words.dat");
        preindex.finalizeIndex(docsFile, wordsFile);
        preindex.delete();
        return new ReverseIndexReader(wordsFile, docsFile);
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@ -0,0 +1,173 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.array.algo.SortingContext;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import static nu.marginalia.index.construction.TestJournalFactory.EntryData;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 class ReversePreindexDocsTest {
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
    Path tempDir;
    SortingContext sortingContext;
    TestJournalFactory journalFactory;
    @BeforeEach
    public void setUp() throws IOException  {
        journalFactory = new TestJournalFactory();
        countsFile = Files.createTempFile("counts", ".dat");
        wordsIdFile = Files.createTempFile("words", ".dat");
        docsFile = Files.createTempFile("docs", ".dat");
        tempDir = Files.createTempDirectory("sort");
        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
    }
    @AfterEach
    public void tearDown() throws IOException {
        journalFactory.clear();
        Files.deleteIfExists(countsFile);
        Files.deleteIfExists(wordsIdFile);
        List<Path> contents = new ArrayList<>();
        Files.list(tempDir).forEach(contents::add);
        for (var tempFile : contents) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    @Test
    public void testDocs() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
                new TestSegmentData(10, 2, 4, new long[] { -0xF00BA3L, 0 }),
                new TestSegmentData(33, 4, 6, new long[] { -0xF00BA3L, 0 }),
                new TestSegmentData(40, 6, 8, new long[] { -0xF00BA3L, 0 })
        );
        List<TestSegmentData> actual = new ArrayList<>();
        var iter = segments.iterator(2);
        while (iter.next()) {
            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
                    data));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testDocsRepeatedWord() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 4, 4)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 })
        );
        List<TestSegmentData> actual = new ArrayList<>();
        var iter = segments.iterator(2);
        while (iter.next()) {
            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
                    data));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testDocs2() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33),
                new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }),
                new TestSegmentData(10, 4, 6, new long[] { -0xF00BA3L, 0}),
                new TestSegmentData(15, 6, 8, new long[] { 0xF00BA4L, 0}),
                new TestSegmentData(30, 8, 10, new long[] { 0xF00BA4L, 0}),
                new TestSegmentData(33, 10, 14, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0}),
                new TestSegmentData(40, 14, 16, new long[] { -0xF00BA3L, 0})
        );
        List<TestSegmentData> actual = new ArrayList<>();
        var iter = segments.iterator(2);
        while (iter.next()) {
            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
                    data));
        }
    }
    record TestSegmentData(long wordId, long start, long end, long[] data) {
        public TestSegmentData(long wordId, long start, long end) {
            this(wordId, start, end, null);
        }
        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            TestSegmentData that = (TestSegmentData) o;
            if (wordId != that.wordId) return false;
            if (start != that.start) return false;
            if (end != that.end) return false;
            return Arrays.equals(data, that.data);
        }
        @Override
        public int hashCode() {
            int result = (int) (wordId ^ (wordId >>> 32));
            result = 31 * result + (int) (start ^ (start >>> 32));
            result = 31 * result + (int) (end ^ (end >>> 32));
            result = 31 * result + Arrays.hashCode(data);
            return result;
        }
        @Override
        public String toString() {
            return "TestSegmentData{" +
                    "wordId=" + wordId +
                    ", start=" + start +
                    ", end=" + end +
                    ", data=" + Arrays.toString(data) +
                    '}';
        }
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
@ -0,0 +1,143 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.btree.model.BTreeHeader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
 import static nu.marginalia.index.construction.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 class ReversePreindexFinalizeTest {
    TestJournalFactory journalFactory;
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
    Path tempDir;
    SortingContext sortingContext;
    @BeforeEach
    public void setUp() throws IOException  {
        journalFactory = new TestJournalFactory();
        countsFile = Files.createTempFile("counts", ".dat");
        wordsIdFile = Files.createTempFile("words", ".dat");
        docsFile = Files.createTempFile("docs", ".dat");
        tempDir = Files.createTempDirectory("sort");
        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
    }
    @AfterEach
    public void tearDown() throws IOException {
        journalFactory.clear();
        Files.deleteIfExists(countsFile);
        Files.deleteIfExists(wordsIdFile);
        List<Path> contents = new ArrayList<>();
        Files.list(tempDir).forEach(contents::add);
        for (var tempFile : contents) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    @Test
    public void testFinalizeSimple() throws IOException {
        var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
        var preindex = ReversePreindex.constructPreindex(reader, tempDir, tempDir);
        preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
        preindex.delete();
        Path wordsFile = tempDir.resolve("words.dat");
        Path docsFile =  tempDir.resolve("docs.dat");
        assertTrue(Files.exists(wordsFile));
        assertTrue(Files.exists(docsFile));
        System.out.println(Files.size(wordsFile));
        System.out.println(Files.size(docsFile));
        var docsArray = LongArray.mmapRead(docsFile);
        var wordsArray = LongArray.mmapRead(wordsFile);
        var docsHeader = BTreeReader.readHeader(docsArray, 0);
        var wordsHeader = BTreeReader.readHeader(wordsArray, 0);
        assertEquals(1, docsHeader.numEntries());
        assertEquals(1, wordsHeader.numEntries());
        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
    }
    @Test
    public void testFinalizeSimple2x2() throws IOException {
        var reader = journalFactory.createReader(
                new EntryDataWithWordMeta(100, 101, wm(50, 51)),
                new EntryDataWithWordMeta(101, 101, wm(51, 52))
                );
        var preindex = ReversePreindex.constructPreindex(reader, tempDir, tempDir);
        preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
        preindex.delete();
        Path wordsFile = tempDir.resolve("words.dat");
        Path docsFile =  tempDir.resolve("docs.dat");
        assertTrue(Files.exists(wordsFile));
        assertTrue(Files.exists(docsFile));
        System.out.println(Files.size(wordsFile));
        System.out.println(Files.size(docsFile));
        var docsArray = LongArray.mmapRead(docsFile);
        var wordsArray = LongArray.mmapRead(wordsFile);
        var wordsHeader = BTreeReader.readHeader(wordsArray, 0);
        System.out.println(wordsHeader);
        assertEquals(2, wordsHeader.numEntries());
        long offset1 = wordsArray.get(wordsHeader.dataOffsetLongs() + 1);
        long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
        BTreeHeader docsHeader;
        docsHeader  = BTreeReader.readHeader(docsArray, offset1);
        System.out.println(docsHeader);
        assertEquals(1, docsHeader.numEntries());
        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
        docsHeader  = BTreeReader.readHeader(docsArray, offset2);
        System.out.println(docsHeader);
        assertEquals(1, docsHeader.numEntries());
        assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(52, docsArray.get(docsHeader.dataOffsetLongs() + 1));
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java
@ -0,0 +1,427 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.array.algo.SortingContext;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
 import static nu.marginalia.index.construction.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 class ReversePreindexMergeTest {
    TestJournalFactory journalFactory;
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
    Path tempDir;
    SortingContext sortingContext;
    @BeforeEach
    public void setUp() throws IOException  {
        journalFactory = new TestJournalFactory();
        countsFile = Files.createTempFile("counts", ".dat");
        wordsIdFile = Files.createTempFile("words", ".dat");
        docsFile = Files.createTempFile("docs", ".dat");
        tempDir = Files.createTempDirectory("sort");
        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
    }
    @AfterEach
    public void tearDown() throws IOException {
        journalFactory.clear();
        Files.deleteIfExists(countsFile);
        Files.deleteIfExists(wordsIdFile);
        List<Path> contents = new ArrayList<>();
        Files.list(tempDir).forEach(contents::add);
        for (var tempFile : contents) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    public ReversePreindex runMergeScenario(
            List<EntryDataWithWordMeta> leftData,
            List<EntryDataWithWordMeta> rightData
    ) throws IOException {
        var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new));
        var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new));
        var left = ReversePreindex.constructPreindex(reader1, tempDir, tempDir);
        var right = ReversePreindex.constructPreindex(reader2, tempDir, tempDir);
        return ReversePreindex.merge(tempDir, left, right);
    }
    private List<TestSegmentData> getData(ReversePreindex merged) {
        var iter = merged.segments.iterator(2);
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
            merged.documents.slice(iter.startOffset, iter.endOffset).get(0, data);
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
                    data));
        }
        return actual;
    }
    @Test
    public void testDocsMergeSingleNoOverlap() throws IOException {
        IdSequence docIds = new IdSequence();
        IdSequence docMetas = new IdSequence();
        IdSequence wordMetas = new IdSequence();
        IdSequence wordIds = new IdSequence();
        var leftSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(), wm(wordIds.nextUnique(), wordMetas.nextUnique())));
        var rightSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(), wm(wordIds.nextUnique(), wordMetas.nextUnique())));
        var merged = runMergeScenario(
                leftSequence,
                rightSequence
        );
        var actual = getData(merged);
        var expected = simulateMerge(leftSequence, rightSequence);
        System.out.println(actual);
        assertEquals(expected, actual);
    }
    @Test
    public void testDocsMergeSingleOnlyOverlap() throws IOException {
        IdSequence docIds = new IdSequence();
        IdSequence docMetas = new IdSequence();
        IdSequence wordMetas = new IdSequence();
        IdSequence wordIds = new IdSequence();
        var leftSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(), wm(wordIds.nextUnique(), wordMetas.nextUnique())));
        var rightSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(), wm(wordIds.alreadySeenSameSequence(), wordMetas.nextUnique())));
        var merged = runMergeScenario(
                leftSequence,
                rightSequence
        );
        var actual = getData(merged);
        var expected = simulateMerge(leftSequence, rightSequence);
        System.out.println(actual);
        assertEquals(expected, actual);
    }
    @Test
    public void testDocsMergeSingleOnlyOverlap2() throws IOException {
        long wid1 = 1;
        long wid2 = 2;
        IdSequence docIds = new IdSequence();
        IdSequence docMetas = new IdSequence();
        IdSequence wordMetas = new IdSequence();
        var leftSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(),
                wm(wid1, wordMetas.nextUnique()),
                wm(wid2, wordMetas.nextUnique())
                ));
        var rightSequence = List.of(new EntryDataWithWordMeta(docIds.nextUnique(), docMetas.nextUnique(),
                wm(wid1, wordMetas.nextUnique()),
                wm(wid2, wordMetas.nextUnique())
        ));
        var merged = runMergeScenario(
                leftSequence,
                rightSequence
        );
        var actual = getData(merged);
        var expected = simulateMerge(leftSequence, rightSequence);
        System.out.println(actual);
        assertEquals(expected, actual);
    }
    @Test
    public void testBadCase1() throws IOException {
        long wordId = 0xF00F00BA3L;
        List<EntryDataWithWordMeta> leftSequence = List.of(new EntryDataWithWordMeta(40, 50,
                wm(wordId, 5))
        );
        List<EntryDataWithWordMeta> rightSequence = List.of(new EntryDataWithWordMeta(41, 51,
                wm(wordId, 3),
                wm(wordId, 4))
        );
        var mergedLR = runMergeScenario(
                leftSequence,
                rightSequence
        );
        var mergedRL = runMergeScenario(
                rightSequence,
                leftSequence
        );
        var actualLR = getData(mergedLR);
        var actualRL = getData(mergedRL);
        var expected = simulateMerge(leftSequence, rightSequence);
        assertEquals(actualLR, actualRL);
        if (!expected.equals(actualLR)) {
            System.out.println("*fail*");
            System.out.println(leftSequence);
            System.out.println(rightSequence);
        }
        else {
            System.out.println("*pass*");
        }
        assertEquals(expected, actualLR);
    }
    @Test
    public void testBadCase2() throws IOException {
        long wordId = 100;
        List<EntryDataWithWordMeta> leftSequence = List.of(
                new EntryDataWithWordMeta(1, 50, wm(wordId, 5)),
                new EntryDataWithWordMeta(2, 50, wm(wordId, 5))
        );
        List<EntryDataWithWordMeta> rightSequence = List.of(
                new EntryDataWithWordMeta(3, 50, wm(wordId, 5))
        );
        var mergedLR = runMergeScenario(
                leftSequence,
                rightSequence
        );
        var mergedRL = runMergeScenario(
                rightSequence,
                leftSequence
        );
        var actualLR = getData(mergedLR);
        var actualRL = getData(mergedRL);
        var expected = simulateMerge(leftSequence, rightSequence);
        assertEquals(actualLR, actualRL);
        if (!expected.equals(actualLR)) {
            System.out.println("*fail*");
            System.out.println(leftSequence);
            System.out.println(rightSequence);
        }
        else {
            System.out.println("*pass*");
        }
        assertEquals(expected, actualLR);
    }
    @Test
    public void testFuzz() throws IOException {
        Random r = new Random();
        int maxDocs = 150;
        int maxWords = 160;
        int nIters = 1000;
        for (int i = 0; i < nIters; i++) {
            int nLeft = 1 + r.nextInt(maxDocs);
            int nRight = 1 + r.nextInt(maxDocs);
            IdSequence docIdsLeft = new IdSequence();
            IdSequence docIdsRight = new IdSequence();
            IdSequence docMetas = new IdSequence();
            IdSequence wordMetas = new IdSequence();
            IdSequence wordIds = new IdSequence();
            List<EntryDataWithWordMeta> leftSequence = new ArrayList<>(nLeft);
            for (int j = 0; j < nLeft; j++) {
                WordWithMeta[] words = new WordWithMeta[maxWords == 1 ? 1 : r.nextInt(1, maxWords)];
                Arrays.setAll(words, idx -> {
                    long wordId = wordIds.seenWithP(1.0);
                    long wordMeta = wordMetas.nextUniqueAssociatedWithKey(wordId);
                    return wm(wordId, wordMeta);
                });
                long docId = docIdsLeft.nextUnique();
                long docMeta = docMetas.nextUniqueAssociatedWithKey(docId);
                leftSequence.add(new EntryDataWithWordMeta(docId, docMeta, words));
            }
            List<EntryDataWithWordMeta> rightSequence = new ArrayList<>(nLeft);
            for (int j = 0; j < nRight; j++) {
                WordWithMeta[] words = new WordWithMeta[maxWords == 1 ? 1 : r.nextInt(1, maxWords)];
                Arrays.setAll(words, idx -> {
                    long wordId = wordIds.seenWithP(1.0);
                    long wordMeta = wordMetas.nextUniqueAssociatedWithKey(wordId);
                    return wm(wordId, wordMeta);
                });
                long docId = docIdsRight.seenWithP(docIdsLeft, 0.1);
                long docMeta = docMetas.nextUniqueAssociatedWithKey(docId);
                rightSequence.add(new EntryDataWithWordMeta(docId, docMeta, words));
            }
            var mergedLR = runMergeScenario(
                    leftSequence,
                    rightSequence
            );
            var mergedRL = runMergeScenario(
                    rightSequence,
                    leftSequence
            );
            var actualLR = getData(mergedLR);
            var actualRL = getData(mergedRL);
            var expected = simulateMerge(leftSequence, rightSequence);
            assertEquals(actualLR, actualRL);
            if (!expected.equals(actualLR)) {
                System.out.println("*fail*");
                System.out.println(leftSequence);
                System.out.println(rightSequence);
            }
            else {
                System.out.println("*pass*");
            }
            assertEquals(expected, actualLR);
        }
    }
    public List<TestSegmentData> simulateMerge(
            Collection<EntryDataWithWordMeta> leftInputs,
            Collection<EntryDataWithWordMeta> rightInputs
            ) {
        TreeMap<Long, List<DocWithMeta>> wordToDocs = new TreeMap<>();
        for (var entry : leftInputs) {
            for (var wm : entry.wordIds()) {
                wordToDocs.computeIfAbsent(wm.wordId(), w -> new ArrayList<>()).add(
                        new DocWithMeta(entry.docId(), wm.meta())
                );
            }
        }
        for (var entry : rightInputs) {
            for (var wm : entry.wordIds()) {
                wordToDocs.computeIfAbsent(wm.wordId(), w -> new ArrayList<>()).add(
                        new DocWithMeta(entry.docId(), wm.meta())
                );
            }
        }
        List<TestSegmentData> ret = new ArrayList<>();
        int[] start = new int[1];
        wordToDocs.forEach((wordId, docsList) -> {
            docsList.sort(Comparator.naturalOrder());
            var iter = docsList.iterator();
            DocWithMeta prevVal = null;
            DocWithMeta currentVal;
            while (iter.hasNext()) {
                currentVal = iter.next();
                if (prevVal != null) {
                    if (currentVal.docId == prevVal.docId) {
                        iter.remove();
                    }
                }
                prevVal = currentVal;
            }
            long[] data = new long[docsList.size()*2];
            for (int i = 0; i < docsList.size(); i++) {
                data[2*i] = docsList.get(i).docId;
                data[2*i + 1] = docsList.get(i).meta;
            }
            ret.add(new TestSegmentData(wordId, start[0], start[0] + data.length, data));
            start[0] += data.length;
        });
        return ret;
    }
    record DocWithMeta(long docId, long meta) implements Comparable<DocWithMeta> {
        @Override
        public int compareTo(DocWithMeta o) {
            return Long.compare(docId, o.docId);
        }
    }
    class IdSequence {
        Set<Long> seen = new HashSet<>();
        Map<Long, Long> associatedValues = new HashMap<>();
        private Random random = new Random();
        /** Return alreadySeen() with probability p,
         * else nextUnique()
         */
        public long seenWithP(double p) {
            if (isEmpty() || random.nextDouble() > p)
                return nextUnique();
            return alreadySeenSameSequence();
        }
        public long seenWithP(IdSequence other, double p) {
            if (isEmpty() || random.nextDouble() > p)
                return nextUnique();
            return alreadySeenOtherSequence(other);
        }
        public long nextUnique() {
            for (;;) {
                long val = random.nextLong();
                if (seen.add(val)) {
                    return val;
                }
            }
        }
        public long nextUniqueAssociatedWithKey(long key) {
            return associatedValues.computeIfAbsent(key, k -> nextUnique());
        }
        public long alreadySeenSameSequence() {
            long[] values = seen.stream().mapToLong(Long::longValue).toArray();
            int idx = random.nextInt(0, values.length);
            return values[idx];
        }
        public long alreadySeenOtherSequence(IdSequence other) {
            List<Long> values = new ArrayList<>(other.seen);
            Collections.shuffle(values);
            for (Long maybe : values) {
                if (seen.add(maybe))
                    return maybe;
            }
            return nextUnique();
        }
        public boolean isEmpty() {
            return seen.isEmpty();
        }
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java
@ -0,0 +1,234 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.algo.SortingContext;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import static nu.marginalia.index.construction.TestJournalFactory.*;
 import static org.junit.jupiter.api.Assertions.*;
 class ReversePreindexWordSegmentsTest {
    Path countsFile;
    Path wordsIdFile;
    Path docsFile;
    Path tempDir;
    TestJournalFactory journalFactory;
    SortingContext sortingContext;
    @BeforeEach
    public void setUp() throws IOException  {
        journalFactory = new TestJournalFactory();
        countsFile = Files.createTempFile("counts", ".dat");
        wordsIdFile = Files.createTempFile("words", ".dat");
        docsFile = Files.createTempFile("docs", ".dat");
        tempDir = Files.createTempDirectory("sort");
        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
    }
    @AfterEach
    public void tearDown() throws IOException {
        journalFactory.clear();
        Files.deleteIfExists(countsFile);
        Files.deleteIfExists(wordsIdFile);
        List<Path> contents = new ArrayList<>();
        Files.list(tempDir).forEach(contents::add);
        for (var tempFile : contents) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    @Test
    public void testWordSegmentsLongWordId() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 1L<<33)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(1L<<33, 0, 1)
        );
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testWordSegmentsRepeatedWordId() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 5, 5)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(5, 0, 2)
        );
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testWordSegments1() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 1),
                new TestSegmentData(10, 1, 2),
                new TestSegmentData(33, 2, 3),
                new TestSegmentData(40, 3, 4)
        );
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testWordSegments2() throws IOException {
        var reader = journalFactory.createReader(
                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33),
                new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
        );
        var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
        var iter = segments.iterator(1);
        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 2),
                new TestSegmentData(10, 2, 3),
                new TestSegmentData(15, 3, 4),
                new TestSegmentData(30, 4, 5),
                new TestSegmentData(33, 5, 7),
                new TestSegmentData(40, 7, 8)
        );
        List<TestSegmentData> actual = new ArrayList<>();
        while (iter.next()) {
            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset));
        }
        assertEquals(expected, actual);
    }
    @Test
    public void testWordSegments_ReadIterator() {
        LongArray wordsArray = LongArray.allocate(4);
        LongArray countsArray = LongArray.allocate(4);
        wordsArray.set(0, -1, -2, -3, -4);
        countsArray.set(0, 2, 1, 3, 5);
        var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null);
        var ritr = segments.iterator(1);
        assertTrue(ritr.hasMorePositions());
        assertTrue(ritr.next());
        assertTrue(ritr.isPositionBeforeEnd());
        assertEquals(-1, ritr.wordId);
        assertEquals(0, ritr.idx());
        assertEquals(0, ritr.startOffset);
        assertEquals(2, ritr.endOffset);
        assertTrue(ritr.hasMorePositions());
        assertTrue(ritr.next());
        assertTrue(ritr.isPositionBeforeEnd());
        assertEquals(-2, ritr.wordId);
        assertEquals(1, ritr.idx());
        assertEquals(2, ritr.startOffset);
        assertEquals(3, ritr.endOffset);
        assertTrue(ritr.hasMorePositions());
        assertTrue(ritr.next());
        assertTrue(ritr.isPositionBeforeEnd());
        assertEquals(-3, ritr.wordId);
        assertEquals(2, ritr.idx());
        assertEquals(3, ritr.startOffset);
        assertEquals(6, ritr.endOffset);
        assertTrue(ritr.hasMorePositions());
        assertTrue(ritr.next());
        assertTrue(ritr.isPositionBeforeEnd());
        assertEquals(-4, ritr.wordId);
        assertEquals(3, ritr.idx());
        assertEquals(6, ritr.startOffset);
        assertEquals(11, ritr.endOffset);
        assertFalse(ritr.hasMorePositions());
        assertFalse(ritr.next());
        assertFalse(ritr.isPositionBeforeEnd());
        assertEquals(Long.MIN_VALUE, ritr.wordId);
    }
    @Test
    public void testWordSegments_ConstructionIterator() {
        LongArray wordsArray = LongArray.allocate(4);
        LongArray countsArray = LongArray.allocate(4);
        wordsArray.set(0, -1, -2, -3, -4);
        var segments = new ReversePreindexWordSegments(wordsArray, countsArray, null, null);
        var citr = segments.constructionIterator(1);
        assertEquals(-1, citr.wordId);
        assertEquals(0, citr.idx());
        assertTrue(citr.canPutMore());
        assertTrue(citr.putNext(1));
        assertEquals(1, countsArray.get(0));
        assertEquals(-2, citr.wordId);
        assertEquals(1, citr.idx());
        assertTrue(citr.canPutMore());
        assertTrue(citr.putNext(2));
        assertEquals(2, countsArray.get(1));
        assertEquals(-3, citr.wordId);
        assertEquals(2, citr.idx());
        assertTrue(citr.canPutMore());
        assertTrue(citr.putNext(3));
        assertEquals(3, countsArray.get(2));
        assertEquals(-4, citr.wordId);
        assertEquals(3, citr.idx());
        assertTrue(citr.canPutMore());
        assertFalse(citr.putNext(4));
        assertEquals(4, countsArray.get(3));
        assertEquals(4, citr.idx());
        assertFalse(citr.canPutMore());
        assertEquals(Long.MIN_VALUE, citr.wordId);
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestJournalFactory.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestJournalFactory.java
@ -0,0 +1,93 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 public class TestJournalFactory {
    Path tempDir = Files.createTempDirectory("journal");
    public TestJournalFactory() throws IOException {}
    public void clear() throws IOException {
        List<Path> toDelete = new ArrayList<>();
        try (var dirStream = Files.list(tempDir)) {
            dirStream.forEach(toDelete::add);
        }
        for (var tempFile : toDelete) {
            Files.delete(tempFile);
        }
        Files.delete(tempDir);
    }
    public record EntryData(long docId, long docMeta, long... wordIds) {
        @Override
        public String toString() {
            return "EntryData{" +
                    "docId=" + docId +
                    ", docMeta=" + docMeta +
                    ", wordIds=" + Arrays.toString(wordIds) +
                    '}';
        }
    }
    public record EntryDataWithWordMeta(long docId, long docMeta, WordWithMeta... wordIds) {
        @Override
        public String toString() {
            return "EntryDataWithWordMeta{" +
                    "docId=" + docId +
                    ", docMeta=" + docMeta +
                    ", wordIds=" + Arrays.toString(wordIds) +
                    '}';
        }
    }
    public record WordWithMeta(long wordId, long meta) {}
    public static WordWithMeta wm(long wordId, long meta) {
        return new WordWithMeta(wordId, meta);
    }
    IndexJournalReader createReader(EntryData... entries) throws IOException {
        Path jf = Files.createTempFile(tempDir, "journal", ".dat");
        var writer = new IndexJournalWriterSingleFileImpl(jf);
        for (var entry : entries) {
            long[] data = new long[entry.wordIds.length * 2];
            for (int i = 0; i < entry.wordIds.length; i++)
                data[i*2] = entry.wordIds[i];
            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(data));
        }
        writer.close();
        var ret = new IndexJournalReaderSingleCompressedFile(jf);
        return ret;
    }
    public IndexJournalReader createReader(EntryDataWithWordMeta... entries) throws IOException {
        Path jf = Files.createTempFile(tempDir, "journal", ".dat");
        var writer = new IndexJournalWriterSingleFileImpl(jf);
        for (var entry : entries) {
            long[] data = new long[entry.wordIds.length * 2];
            for (int i = 0; i < entry.wordIds.length; i++) {
                data[i * 2] = entry.wordIds[i].wordId;
                data[i * 2 + 1] = entry.wordIds[i].meta;
            }
            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(data));
        }
        writer.close();
        var ret = new IndexJournalReaderSingleCompressedFile(jf);
        return ret;
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestSegmentData.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestSegmentData.java
@ -0,0 +1,41 @@
 package nu.marginalia.index.construction;
 import java.util.Arrays;
 record TestSegmentData(long wordId, long start, long end, long[] data) {
    public TestSegmentData(long wordId, long start, long end) {
        this(wordId, start, end, null);
    }
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        TestSegmentData that = (TestSegmentData) o;
        if (wordId != that.wordId) return false;
        if (start != that.start) return false;
        if (end != that.end) return false;
        return Arrays.equals(data, that.data);
    }
    @Override
    public int hashCode() {
        int result = (int) (wordId ^ (wordId >>> 32));
        result = 31 * result + (int) (start ^ (start >>> 32));
        result = 31 * result + (int) (end ^ (end >>> 32));
        result = 31 * result + Arrays.hashCode(data);
        return result;
    }
    @Override
    public String toString() {
        return "TestSegmentData{" +
                "wordId=" + wordId +
                ", start=" + start +
                ", end=" + end +
                ", data=" + Arrays.toString(data) +
                '}';
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexFullConverterTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexFullConverterTest.java
@ -1,146 +0,0 @@
 package nu.marginalia.index.reverse;
 import lombok.SneakyThrows;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.index.full.ReverseIndexFullConverter;
 import nu.marginalia.index.full.ReverseIndexFullReader;
 import nu.marginalia.index.journal.model.IndexJournalEntry;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.process.control.ProcessTaskHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.stream.IntStream;
 import java.util.stream.LongStream;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.mockito.Mockito.when;
 class ReverseIndexFullConverterTest {
    KeywordLexicon keywordLexicon;
    Path indexFile;
    Path wordsFile1;
    Path urlsFile1;
    Path dictionaryFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    @BeforeEach
    @SneakyThrows
    void setUp() {
        dictionaryFile = Files.createTempFile("tmp", ".dict");
        dictionaryFile.toFile().deleteOnExit();
        keywordLexicon = new KeywordLexicon(new KeywordLexiconJournal(dictionaryFile.toFile(), KeywordLexiconJournalMode.READ_WRITE));
        keywordLexicon.getOrInsert("0");
        indexFile = Files.createTempFile("tmp", ".idx");
        indexFile.toFile().deleteOnExit();
        wordsFile1 = Files.createTempFile("words1", ".idx");
        urlsFile1 = Files.createTempFile("urls1", ".idx");
    }
    public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
        var entryBuilder = IndexJournalEntry.builder(id, DocumentMetadata.defaultValue());
        for (int i = 0; i < factors.length; i++) {
            entryBuilder.add(keywordLexicon.getOrInsert(Integer.toString(factors[i])), -factors[i]);
        }
        writer.put(entryBuilder.build());
    }
    @Test
    void testReverseIndex() throws IOException {
        var writer = new IndexJournalWriterImpl(keywordLexicon, indexFile);
        for (int i = 1; i < 512; i++) {
            createEntry(writer, keywordLexicon, i);
        }
        writer.close();
        Path tmpDir = Path.of("/tmp");
        Path dataDir = Files.createTempDirectory(getClass().getSimpleName());
        var wordsFile = dataDir.resolve("urls.dat");
        var docsFile = dataDir.resolve("docs.dat");
        var journalReader = new IndexJournalReaderSingleCompressedFile(indexFile);
        // RIP fairies
        var processHeartbeat = Mockito.mock(ProcessHeartbeat.class);
        when(processHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
        new ReverseIndexFullConverter(
                processHeartbeat,
                tmpDir, journalReader, new DomainRankings(), wordsFile, docsFile)
                .convert();
        var reverseIndexReader = new ReverseIndexFullReader(wordsFile, docsFile);
        System.out.println(reverseIndexReader.numDocuments(keywordLexicon.getReadOnly("1")));
        System.out.println(reverseIndexReader.numDocuments(keywordLexicon.getReadOnly("2")));
        System.out.println(reverseIndexReader.numDocuments(keywordLexicon.getReadOnly("3")));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("1"), 1));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("2"), 1));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("1"), 2));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("2"), 2));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("1"), 3));
        System.out.println(reverseIndexReader.isWordInDoc(keywordLexicon.getReadOnly("2"), 3));
        var buffer = new LongQueryBuffer(32);
        reverseIndexReader.documents(keywordLexicon.getReadOnly("1")).read(buffer);
        assertArrayEquals(LongStream.range(1, 17).map(this::addMaxRank).toArray(), buffer.copyData());
        System.out.println(buffer);
        buffer.reset();
        reverseIndexReader.documents(keywordLexicon.getReadOnly("2")).read(buffer);
        assertArrayEquals(LongStream.range(1, 17).map(v -> v*2).map(this::addMaxRank).toArray(), buffer.copyData());
        System.out.println(buffer);
        buffer.reset();
        reverseIndexReader.documents(keywordLexicon.getReadOnly("3")).read(buffer);
        assertArrayEquals(LongStream.range(1, 17).map(v -> v*3).map(this::addMaxRank).toArray(), buffer.copyData());
        System.out.println(buffer);
        buffer.reset();
        var es = reverseIndexReader.documents(keywordLexicon.getReadOnly("7"));
        do {
            buffer.reset();
            es.read(buffer);
            System.out.println(buffer);
        } while (es.hasMore());
        TestUtil.clearTempDir(dataDir);
    }
    // Add a max domain rank component to the input, when interpreted as an ID
    private long addMaxRank(long in) {
        return UrlIdCodec.addRank(1f, in);
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexFullConverterTest2.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexFullConverterTest2.java
@ -1,178 +0,0 @@
 package nu.marginalia.index.reverse;
 import lombok.SneakyThrows;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.index.full.ReverseIndexFullConverter;
 import nu.marginalia.index.full.ReverseIndexFullReader;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.process.control.ProcessTaskHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.stream.IntStream;
 import java.util.stream.LongStream;
 import static org.mockito.Mockito.when;
 class ReverseIndexFullConverterTest2 {
    KeywordLexicon keywordLexicon;
    IndexJournalWriter writer;
    Path indexFile;
    Path wordsFile1;
    Path urlsFile1;
    Path dictionaryFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    Path dataDir;
    private Path wordsFile;
    private Path docsFile;
    int workSetSize = 8192;
    int workSetStart = 8000;
    @BeforeEach
    @SneakyThrows
    void setUp() {
        dictionaryFile = Files.createTempFile("tmp", ".dict");
        dictionaryFile.toFile().deleteOnExit();
        keywordLexicon = new KeywordLexicon(new KeywordLexiconJournal(dictionaryFile.toFile(), KeywordLexiconJournalMode.READ_WRITE));
        keywordLexicon.getOrInsert("0");
        indexFile = Files.createTempFile("tmp", ".idx");
        indexFile.toFile().deleteOnExit();
        writer = new IndexJournalWriterImpl(keywordLexicon, indexFile);
        wordsFile1 = Files.createTempFile("words1", ".idx");
        urlsFile1 = Files.createTempFile("urls1", ".idx");
        dataDir = Files.createTempDirectory(getClass().getSimpleName());
        for (int i = 1; i < workSetSize; i++) {
            if (i < workSetStart) {
                keywordLexicon.getOrInsert(Integer.toString(i));
            }
            else {
                createEntry(writer, keywordLexicon, i);
            }
        }
        keywordLexicon.commitToDisk();
        Thread.sleep(1000);
        writer.close();
        var reader = new IndexJournalReaderSingleCompressedFile(indexFile);
        wordsFile = dataDir.resolve("words.dat");
        docsFile = dataDir.resolve("docs.dat");
    }
    @AfterEach
    public void tearDown() {
        TestUtil.clearTempDir(dataDir);
    }
    public int[] getFactorsI(int id) {
        return IntStream.rangeClosed(1, id-1).toArray();
    }
    public long[] getFactorsL(int id) {
        return LongStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
    }
    long createId(int url, int domain) {
        return UrlIdCodec.encodeId(domain, url);
    }
    public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
        int[] factors = getFactorsI(id);
        var header = new IndexJournalEntryHeader(factors.length, 0, createId(id, id/20), id % 5);
        long[] data = new long[factors.length*2];
        for (int i = 0; i < factors.length; i++) {
            data[2*i] = keywordLexicon.getOrInsert(Integer.toString(factors[i]));
            data[2*i + 1] = (i % 21 != 0) ? 0 : -factors[i];
        }
        writer.put(header, new IndexJournalEntryData(data));
    }
    @Test
    void testRev2() throws IOException {
        Path tmpDir = Path.of("/tmp");
        var processHeartbeat = Mockito.mock(ProcessHeartbeat.class);
        when(processHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
        new ReverseIndexFullConverter(processHeartbeat, tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
        var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
        for (int i = workSetStart; i < workSetSize; i++) {
            var es = reverseReader.documents(i);
            LongQueryBuffer lqb = new LongQueryBuffer(100);
            while (es.hasMore()) {
                lqb.reset();
                es.read(lqb);
                System.out.println(Arrays.toString(Arrays.copyOf(lqb.data, lqb.end)));
            }
            System.out.println("--");
        }
        TestUtil.clearTempDir(dataDir);
    }
    @Test
    void testRevP() throws IOException {
        Path tmpDir = Path.of("/tmp");
        var processHeartbeat = Mockito.mock(ProcessHeartbeat.class);
        when(processHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
        new ReverseIndexFullConverter(processHeartbeat, tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
        var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
        for (int i = workSetStart; i < workSetSize; i++) {
            var es = reverseReader.documents(i);
            LongQueryBuffer lqb = new LongQueryBuffer(100);
            while (es.hasMore()) {
                lqb.reset();
                es.read(lqb);
                System.out.println(Arrays.toString(Arrays.copyOf(lqb.data, lqb.end)));
            }
            System.out.println("--");
        }
        TestUtil.clearTempDir(dataDir);
    }
 }
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexPriorityConverterTest2.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/reverse/ReverseIndexPriorityConverterTest2.java
@ -1,179 +0,0 @@
 package nu.marginalia.index.reverse;
 import lombok.SneakyThrows;
 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
 import nu.marginalia.index.priority.ReverseIndexPriorityReader;
 import nu.marginalia.index.priority.ReverseIndexPriorityConverter;
 import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.process.control.ProcessTaskHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.stream.IntStream;
 import java.util.stream.LongStream;
 import static org.mockito.Mockito.when;
 class ReverseIndexPriorityConverterTest2 {
    KeywordLexicon keywordLexicon;
    IndexJournalWriter writer;
    Path indexFile;
    Path wordsFile1;
    Path urlsFile1;
    Path dictionaryFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    Path dataDir;
    private Path wordsFile;
    private Path docsFile;
    int workSetSize = 8192;
    int workSetStart = 8000;
    @BeforeEach
    @SneakyThrows
    void setUp() {
        dictionaryFile = Files.createTempFile("tmp", ".dict");
        dictionaryFile.toFile().deleteOnExit();
        keywordLexicon = new KeywordLexicon(new KeywordLexiconJournal(dictionaryFile.toFile(), KeywordLexiconJournalMode.READ_WRITE));
        keywordLexicon.getOrInsert("0");
        indexFile = Files.createTempFile("tmp", ".idx");
        indexFile.toFile().deleteOnExit();
        writer = new IndexJournalWriterImpl(keywordLexicon, indexFile);
        wordsFile1 = Files.createTempFile("words1", ".idx");
        urlsFile1 = Files.createTempFile("urls1", ".idx");
        dataDir = Files.createTempDirectory(getClass().getSimpleName());
        for (int i = 1; i < workSetSize; i++) {
            if (i < workSetStart) {
                keywordLexicon.getOrInsert(Integer.toString(i));
            }
            else {
                createEntry(writer, keywordLexicon, i);
            }
        }
        keywordLexicon.commitToDisk();
        Thread.sleep(1000);
        writer.close();
        var reader = new IndexJournalReaderSingleCompressedFile(indexFile);
        wordsFile = dataDir.resolve("words.dat");
        docsFile = dataDir.resolve("docs.dat");
    }
    @AfterEach
    public void tearDown() {
        TestUtil.clearTempDir(dataDir);
    }
    public int[] getFactorsI(int id) {
        return IntStream.rangeClosed(1, id-1).toArray();
    }
    public long[] getFactorsL(int id) {
        return LongStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
    }
    long createId(int url, int domain) {
        return UrlIdCodec.encodeId(domain, url);
    }
    public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
        int[] factors = getFactorsI(id);
        var header = new IndexJournalEntryHeader(factors.length, 0, createId(id, id/20), id % 5);
        long[] data = new long[factors.length*2];
        for (int i = 0; i < factors.length; i++) {
            data[2*i] = keywordLexicon.getOrInsert(Integer.toString(factors[i]));
            data[2*i + 1] = (i % 21 != 0) ? 0 : -factors[i];
        }
        writer.put(header, new IndexJournalEntryData(data));
    }
    @Test
    void testRev2() throws IOException {
        Path tmpDir = Path.of("/tmp");
        var processHeartbeat = Mockito.mock(ProcessHeartbeat.class);
        when(processHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
        new ReverseIndexPriorityConverter(processHeartbeat, tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
        var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
        for (int i = workSetStart; i < workSetSize; i++) {
            var es = reverseReader.priorityDocuments(i);
            LongQueryBuffer lqb = new LongQueryBuffer(100);
            while (es.hasMore()) {
                lqb.reset();
                es.read(lqb);
                System.out.println(Arrays.toString(Arrays.copyOf(lqb.data, lqb.end)));
            }
            System.out.println("--");
        }
        TestUtil.clearTempDir(dataDir);
    }
    @Test
    void testRevP() throws IOException {
        Path tmpDir = Path.of("/tmp");
        var processHeartbeat = Mockito.mock(ProcessHeartbeat.class);
        when(processHeartbeat.createProcessTaskHeartbeat(Mockito.any(), Mockito.any()))
                .thenReturn(Mockito.mock(ProcessTaskHeartbeat.class));
        new ReverseIndexPriorityConverter(processHeartbeat, tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
        var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
        for (int i = workSetStart; i < workSetSize; i++) {
            var es = reverseReader.priorityDocuments(i);
            LongQueryBuffer lqb = new LongQueryBuffer(100);
            while (es.hasMore()) {
                lqb.reset();
                es.read(lqb);
                System.out.println(Arrays.toString(Arrays.copyOf(lqb.data, lqb.end)));
            }
            System.out.println("--");
        }
        TestUtil.clearTempDir(dataDir);
    }
 }
--- a/code/features-index/lexicon/build.gradle
+++ b/code/features-index/lexicon/build.gradle
@ -1,40 +0,0 @@
 plugins {
    id 'java'
    id "io.freefair.lombok" version "8.2.2"
    id 'jvm-test-suite'
 }
 java {
    toolchain {
        languageVersion.set(JavaLanguageVersion.of(20))
    }
 }
 dependencies {
    implementation project(':code:libraries:next-prime')
    implementation libs.lombok
    annotationProcessor libs.lombok
    implementation libs.bundles.slf4j
    implementation libs.prometheus
    implementation libs.guava
    implementation libs.fastutil
    implementation project(':third-party:commons-codec')
    testImplementation libs.bundles.slf4j.test
    testImplementation libs.bundles.junit
    testImplementation libs.mockito
 }
 test {
    useJUnitPlatform()
 }
 task fastTests(type: Test) {
    useJUnitPlatform {
        excludeTags "slow"
    }
 }
--- a/code/features-index/lexicon/readme.md
+++ b/code/features-index/lexicon/readme.md
@ -1,19 +0,0 @@
 # Lexicon
 The lexicon contains a mapping for words to identifiers. 
 To ease index construction, it makes calculations easier if the domain of word identifiers is dense, that is, there is no gaps between ids; if there are 100 words, they're indexed 0-99 and not 5, 23, 107, 9999, 819235 etc. The lexicon exists to create such a mapping.
 This lexicon is populated from a journal. The actual word data isn't mapped, but rather a 64 bit hash. As a result of the <a href="https://en.wikipedia.org/wiki/Birthday_problem">birthday paradox</a>, colissions will be rare up until about to 2<sup>32</sup> words.
 The lexicon is constructed by [processes/loading-process](../../processes/loading-process) and read when
 [services-core/index-service](../../services-core/index-service) interprets queries.
 ## Central Classes
 * [KeywordLexicon](src/main/java/nu/marginalia/lexicon/KeywordLexicon.java)
 * [KeywordLexiconJournal](src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournal.java)
 * [DictionaryMap](src/main/java/nu/marginalia/dict/DictionaryMap.java) comes in two versions
 * * [OnHeapDictionaryMap](src/main/java/nu/marginalia/dict/OnHeapDictionaryMap.java) - basically just a fastutil Long2IntOpenHashMap
 * * [OffHeapDictionaryHashMap](src/main/java/nu/marginalia/dict/OffHeapDictionaryHashMap.java) - a heavily modified trove TLongIntHashMap that uses off heap memory
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryData.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryData.java
@ -1,42 +0,0 @@
 package nu.marginalia.dict;
 import java.util.ArrayList;
 public class DictionaryData {
    private final int bankSize;
    private final ArrayList<DictionaryDataBank> banks = new ArrayList<>(100);
    public DictionaryData(int bankSize) {
        this.bankSize = bankSize;
        banks.add(new DictionaryDataBank(0, bankSize));
    }
    public int add(long key) {
        var activeBank = banks.get(banks.size()-1);
        int rb = activeBank.add(key);
        if (rb == -1) {
            int end = activeBank.getEnd();
            var newBank = new DictionaryDataBank(end, bankSize);
            rb = newBank.add(key);
            banks.add(newBank);
        }
        return rb;
    }
    public long getKey(int offset) {
        return banks.get(offset/ bankSize).getKey(offset);
    }
    public boolean keyEquals(int offset, long otherKey) {
        return banks.get(offset/ bankSize).keyEquals(offset, otherKey);
    }
    public void clear() {
        banks.clear();
        banks.add(new DictionaryDataBank(0, bankSize));
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryDataBank.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryDataBank.java
@ -1,63 +0,0 @@
 package nu.marginalia.dict;
 import java.nio.ByteBuffer;
 import java.nio.LongBuffer;
 class DictionaryDataBank {
    private final int start_idx;
    // Humongous long-lived arrays seem to sometimes yield considerable memory overhead and
    // can make the GC behave poorly. Using off-heap memory seems preferred when their
    // lifetime is "forever"
    private final LongBuffer keys;
    private int size;
    private final int capacity;
    public DictionaryDataBank(int start_idx, int sz) {
        this.start_idx = start_idx;
        this.capacity = sz;
        keys = ByteBuffer.allocateDirect(8 * capacity).asLongBuffer();
        size = 0;
    }
    public int getStart() {
        return start_idx;
    }
    public int getEnd() {
        return start_idx + size;
    }
    public long getKey(int idx) {
        if (idx < start_idx || idx - start_idx >= size) {
            throw new IndexOutOfBoundsException(idx);
        }
        return keys.get(idx - start_idx);
    }
    public boolean keyEquals(int idx, long other) {
        if (idx < start_idx || idx - start_idx >= size) {
            throw new IndexOutOfBoundsException(idx);
        }
        return keys.get(idx - start_idx) == other;
    }
    public int add(long newKey) {
        if (size >= capacity)
            return -1;
        keys.put(size, newKey);
        return start_idx + size++;
    }
    public int getSize() {
        return size;
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryMap.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/dict/DictionaryMap.java
@ -1,27 +0,0 @@
 package nu.marginalia.dict;
 /** Backing store for the KeywordLexicon, available in on and off-heap versions.
 * <p>
 * The off-heap version is necessary when loading a lexicon that is too large to fit in RAM, due
 * to Java's 2GB limit on the size of a single array.  It is slower and less optimized than the on-heap version.
 * <p>
 * The off-heap version is on the precipice of being deprecated and its use is discouraged.
 */
 public interface DictionaryMap {
    int NO_VALUE = Integer.MIN_VALUE;
    static DictionaryMap create() {
        // Default to on-heap version
        // TODO: Make this configurable
        return new OnHeapDictionaryMap();
    }
    void clear();
    int size();
    int put(long key);
    int get(long key);
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/dict/OffHeapDictionaryHashMap.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/dict/OffHeapDictionaryHashMap.java
@ -1,172 +0,0 @@
 package nu.marginalia.dict;
 import nu.marginalia.util.NextPrimeUtil;
 import java.nio.ByteBuffer;
 import java.nio.IntBuffer;
 import java.util.concurrent.atomic.AtomicInteger;
 import static java.lang.Math.round;
 /**
 *  Spiritually influenced by GNU Trove's hash maps
 *  LGPL 2.1
 */
 public class OffHeapDictionaryHashMap implements DictionaryMap {
    private final int bufferCount;
    private final IntBuffer[] buffers;
    private final DictionaryData dictionaryData;
    private final long hashTableSize;
    private final int bufferSizeBytes;
    private final int intsPerBuffer;
    private final long maxProbeLength;
    private final AtomicInteger sz = new AtomicInteger(0);
    public OffHeapDictionaryHashMap(long sizeMemory) {
        final int intSize = 4;
        bufferCount = 1 + (int) ((intSize*sizeMemory) / (1<<30));
        buffers = new IntBuffer[bufferCount];
        // Actually use a prime size for Donald Knuth reasons
        hashTableSize = NextPrimeUtil.nextPrime(sizeMemory, -1);
        intsPerBuffer = 1 + (int)(sizeMemory/ bufferCount);
        bufferSizeBytes = intSize*intsPerBuffer;
        maxProbeLength = sizeMemory/10;
        if (((long) bufferCount * intsPerBuffer) < sizeMemory) {
            throw new Error("Buffer memory is less than requested memory; this data structure is not safe to use");
        }
        dictionaryData = new DictionaryData((int)Math.min(1<<27, Math.max(32L, sizeMemory/4)));
        initializeBuffers();
    }
    private void initializeBuffers() {
        for (int b = 0; b < bufferCount; b++) {
            buffers[b] = ByteBuffer.allocateDirect(bufferSizeBytes).asIntBuffer();
            for (int i = 0; i < intsPerBuffer; i++) {
                buffers[b].put(i, NO_VALUE);
            }
        }
    }
    @Override
    public void clear() {
        dictionaryData.clear();
        initializeBuffers();
        sz.set(0);
    }
    @Override
    public int size() {
        return sz.get();
    }
    private int getCell(long idx) {
        int buffer = (int)(idx / intsPerBuffer);
        int bufferIdx = (int)(idx % intsPerBuffer);
        return buffers[buffer].get(bufferIdx);
    }
    private void setCell(long idx, int val) {
        int buffer = (int)(idx / intsPerBuffer);
        int bufferIdx = (int)(idx % intsPerBuffer);
        buffers[buffer].put(bufferIdx, val);
    }
    @Override
    public int put(long key) {
        long hash = key & 0x7FFF_FFFF_FFFF_FFFFL;
        long idx = hash % hashTableSize;
        if (getCell(idx) == NO_VALUE) {
            return setValue(key, idx);
        }
        return putRehash(key, idx, hash);
    }
    private int putRehash(long key, long idx, long hash) {
        final long pStride = 1 + (hash % (hashTableSize - 2));
        for (long j = 1; j < maxProbeLength; j++) {
            idx = idx - pStride;
            if (idx < 0) {
                idx += hashTableSize;
            }
            final int val = getCell(idx);
            if (val == NO_VALUE) {
                return setValue(key, idx);
            }
            else if (dictionaryData.keyEquals(val, key)) {
                return val;
            }
        }
        throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%");
    }
    private int setValue(long key, long cell) {
        sz.incrementAndGet();
        int di = dictionaryData.add(key);
        setCell(cell, di);
        return di;
    }
    @Override
    public int get(long key) {
        final long hash = key & 0x7FFF_FFFF_FFFF_FFFFL;
        final long cell = hash % hashTableSize;
        if (getCell(cell) == NO_VALUE) {
            return NO_VALUE;
        }
        else {
            int val = getCell(cell);
            if (dictionaryData.keyEquals(val, key)) {
                return val;
            }
        }
        return getRehash(key, cell, hash);
    }
    private int getRehash(long key, long idx, long hash) {
        final long pStride = 1 + (hash % (hashTableSize - 2));
        for (long j = 1; j < maxProbeLength; j++) {
            idx = idx - pStride;
            if (idx < 0) {
                idx += hashTableSize;
            }
            final var val = getCell(idx);
            if (val == NO_VALUE) {
                return NO_VALUE;
            }
            else if (dictionaryData.keyEquals(val, key)) {
                return val;
            }
        }
        throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%");
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/dict/OnHeapDictionaryMap.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/dict/OnHeapDictionaryMap.java
@ -1,61 +0,0 @@
 package nu.marginalia.dict;
 import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 public class OnHeapDictionaryMap implements DictionaryMap {
    /* Use three different hash tables to get around the limitations of Java's array sizes.
     *
     * Each map fits 0.75 * 2^30 entries (~800mn); the three maps together fit a bit over 2^31 entries.
     * We're happy with 2^31.
     *
     * We'll assign each term to one of the three maps based on their modulo of 3.  We'll pray each
     * night that Long2IntOpenHashMap hash function is good enough to cope with this.  The keys we are
     * inserting are 64 bit hashes already, so odds are the rest of the bits have very good entropy.
     */
    private static final int DEFAULT_SIZE = Integer.getInteger("lexiconSizeHint", 100_000)/3;
    private final Long2IntOpenHashMap[] entries = new Long2IntOpenHashMap[3];
    public OnHeapDictionaryMap() {
        for (int i = 0; i < entries.length; i++) {
            entries[i] = new Long2IntOpenHashMap(DEFAULT_SIZE, 0.75f);
        }
    }
    @Override
    public void clear() {
        for (var map : entries) {
            map.clear();
        }
    }
    @Override
    public int size() {
        int totalSize = 0;
        for (var map : entries) {
            totalSize += map.size();
        }
        return totalSize;
    }
    @Override
    public int put(long key) {
        int shardIdx = (int) Long.remainderUnsigned(key, 3);
        var shard = entries[shardIdx];
        int size = size();
        if (size == Integer.MAX_VALUE)
            throw new IllegalStateException("DictionaryMap is full");
        shard.putIfAbsent(key, size);
        return get(key);
    }
    @Override
    public int get(long key) {
        int shardIdx = (int) Long.remainderUnsigned(key, 3);
        var shard = entries[shardIdx];
        return shard.getOrDefault(key, NO_VALUE);
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/KeywordLexicon.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/KeywordLexicon.java
@ -1,170 +0,0 @@
 package nu.marginalia.lexicon;
 import io.prometheus.client.Gauge;
 import lombok.SneakyThrows;
 import nu.marginalia.dict.DictionaryMap;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalFingerprint;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 /** The keyword lexicon is used to map keywords to unique numeric IDs.
 *  This class is used to both construct the lexicon, and to read from it.
 *  <p>
 *  Readers will want to use the KeywordLexiconReadOnlyView wrapper, as it
 *  only exposes read-only methods and hides the mutating methods.
 *  <p>
 *  Between instances, the lexicon is stored in a journal file, exactly in the
 *  order they were received by the writer.  The journal file is then replayed
 *  on startup to reconstruct the lexicon, giving each term an ID according to
 *  the order they are loaded.  It is therefore important that the journal file
 *  is not tampered with, as this will cause the lexicon to be corrupted.
 * */
 public class KeywordLexicon implements AutoCloseable {
    private final DictionaryMap reverseIndex;
    private final ReadWriteLock memoryLock = new ReentrantReadWriteLock();
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private static final AtomicInteger instances = new AtomicInteger();
    private static final Gauge request_time_metrics
            = Gauge.build("wmsa_edge_index_dictionary_size", "Dictionary Size")
            .register();
    private final KeywordLexiconJournal journal;
    private volatile KeywordLexiconJournalFingerprint fingerprint = null;
    private final MurmurHash3_128 hasher = new MurmurHash3_128();
    @SneakyThrows
    public KeywordLexicon(KeywordLexiconJournal keywordLexiconJournal) {
        journal = keywordLexiconJournal;
        reverseIndex = DictionaryMap.create();
        logger.info("Creating dictionary writer");
        if (!instances.compareAndSet(0, 1)) {
            logger.error("MULTIPLE LEXICON INSTANCES!");
        }
        reload();
        logger.info("Done creating dictionary writer");
    }
    public boolean needsReload() throws IOException {
        var newFingerprint = journal.journalFingerprint();
        return !newFingerprint.equals(fingerprint);
    }
    /** Reload the lexicon from the journal */
    public void reload() throws IOException {
        var lock = memoryLock.writeLock();
        lock.lock();
        try {
            reverseIndex.clear();
            journal.loadFile(bytes -> reverseIndex.put(hasher.hash(bytes)));
            fingerprint = journal.journalFingerprint();
        }
        finally {
            lock.unlock();
        }
    }
    /** Get method that inserts the word into the lexicon if it is not present */
    public int getOrInsert(String macroWord) {
        return getOrInsert(macroWord.getBytes(StandardCharsets.UTF_8));
    }
    /** Get method that inserts the word into the lexicon if it is not present */
    @SneakyThrows
    private int getOrInsert(byte[] bytes) {
        if (bytes.length >= Byte.MAX_VALUE) {
            logger.warn("getOrInsert({}), illegal length {}", new String(bytes), bytes.length);
            return DictionaryMap.NO_VALUE;
        }
        final long key = hasher.hash(bytes);
        int idx = getReadOnly(key);
        if (idx < 0) {
            idx = insertNew(key, bytes);
        }
        return idx;
    }
    private int insertNew(long key, byte[] bytes) throws InterruptedException {
        Lock lock = memoryLock.writeLock();
        int idx;
        try {
            lock.lock();
            // Check again to prevent race condition
            if ((idx = reverseIndex.get(key)) >= 0)
                return idx;
            journal.enqueue(bytes);
            idx = reverseIndex.put(key);
            request_time_metrics.set(reverseIndex.size());
            return idx;
        }
        finally {
            lock.unlock();
        }
    }
    /** Get method that does not modify the lexicon if the word is not present */
    public int getReadOnly(String word) {
        final byte[] bytes = word.getBytes(StandardCharsets.UTF_8);
        return getReadOnly(hasher.hash(bytes));
    }
    /** Get method that does not modify the lexicon if the word is not present */
    public int getReadOnly(long hashedKey) {
        Lock lock = memoryLock.readLock();
        try {
            lock.lock();
            return reverseIndex.get(hashedKey);
        }
        finally {
            lock.unlock();
        }
    }
    public long size() {
        Lock lock = memoryLock.readLock();
        try {
            lock.lock();
            return reverseIndex.size();
        }
        finally {
            lock.unlock();
        }
    }
    @Override
    public void close() throws Exception {
        logger.warn("Closing Lexicon");
        journal.close();
    }
    public void commitToDisk() {
        journal.commitToDisk();
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/KeywordLexiconReadOnlyView.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/KeywordLexiconReadOnlyView.java
@ -1,42 +0,0 @@
 package nu.marginalia.lexicon;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import lombok.SneakyThrows;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.util.concurrent.TimeUnit;
 /** A read-only view of a keyword lexicon.
 *
 * @see KeywordLexicon
 * */
 public class KeywordLexiconReadOnlyView {
    private final KeywordLexicon writer;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final Cache<String, Integer> cache = CacheBuilder.newBuilder().maximumSize(10_000).expireAfterAccess(60, TimeUnit.SECONDS).build();
    @SneakyThrows
    public KeywordLexiconReadOnlyView(KeywordLexicon writer) {
        this.writer = writer;
    }
    @SneakyThrows
    public int get(String word) {
        return cache.get(word, () -> writer.getReadOnly(word));
    }
    public boolean suggestReload() throws IOException {
        if (writer.needsReload()) {
            logger.info("Reloading lexicon");
            writer.reload();
            cache.invalidateAll();
        }
        else {
            logger.info("Foregoing lexicon reload");
        }
        return true;
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournal.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournal.java
@ -1,114 +0,0 @@
 package nu.marginalia.lexicon.journal;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.List;
 import java.util.function.Consumer;
 /** The journal for the keyword lexicon.
 *  It's used both for writing the lexicon, but also for reconstructing it for reading later.
 */
 public class KeywordLexiconJournal {
    private static final boolean noCommit = Boolean.getBoolean("DictionaryJournal.noCommit");
    private final KeywordLexiconJournalCommitQueue commitQueue;
    private KeywordLexiconJournalFile journalFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final Thread commitToDiskThread;
    private volatile boolean running = true;
    private final Path journalFilePath;
    /** Create a new journal.
     *
     *  @param file The file to use for the journal.
     *  @param mode The mode to use for the journal.  If READ_ONLY, the journal will be read-only and refuse
     *              to accept new entries.
     */
    public KeywordLexiconJournal(File file, KeywordLexiconJournalMode mode) throws IOException {
        journalFilePath = file.toPath();
        if (mode == KeywordLexiconJournalMode.READ_WRITE) {
            commitQueue = new KeywordLexiconJournalCommitQueue();
            journalFile = new KeywordLexiconJournalFile(file);
            commitToDiskThread = new Thread(this::commitToDiskRunner, "CommitToDiskThread");
            commitToDiskThread.start();
            Runtime.getRuntime().addShutdownHook(new Thread(this::commitToDisk));
        }
        else {
            journalFile = new KeywordLexiconJournalFile(file);
            commitQueue = null;
            commitToDiskThread = null;
        }
    }
    public void enqueue(byte[] word) throws InterruptedException {
        if (null == commitQueue)
            throw new UnsupportedOperationException("Lexicon journal is read-only");
        commitQueue.enqueue(word);
    }
    public KeywordLexiconJournalFingerprint journalFingerprint() throws IOException {
        var attributes = Files.readAttributes(journalFilePath, BasicFileAttributes.class);
        long cTime = attributes.creationTime().toMillis();
        long mTime = attributes.lastModifiedTime().toMillis();
        long size = attributes.size();
        return new KeywordLexiconJournalFingerprint(cTime, mTime, size);
    }
    public void commitToDiskRunner() {
        if (noCommit) return;
        while (running) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            commitToDisk();
        }
    }
    public void commitToDisk() {
        List<byte[]> entries = commitQueue.getQueuedEntries();
        journalFile.writeEntriesToJournal(entries);
    }
    public void close() throws Exception {
        logger.info("Closing Journal");
        running = false;
        if (commitToDiskThread != null) {
            commitToDiskThread.join();
            commitToDisk();
        }
        if (journalFile != null) {
            journalFile.close();
        }
    }
    public void loadFile(Consumer<byte[]> loadJournalEntry) throws IOException {
        if (journalFile != null) {
            journalFile.close();
        }
        journalFile = new KeywordLexiconJournalFile(journalFilePath.toFile());
        journalFile.loadFile(loadJournalEntry);
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalCommitQueue.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalCommitQueue.java
@ -1,48 +0,0 @@
 package nu.marginalia.lexicon.journal;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 /** An in-memory queue for lexicon journal entries used to improve the performance of
 * large bursts of insert-operations.
 */
 class KeywordLexiconJournalCommitQueue {
    private final ArrayList<byte[]> commitQueue = new ArrayList<>(10_000);
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private static final long BACK_PRESSURE_LIMIT = 25_000;
    public synchronized void enqueue(byte[] word) throws InterruptedException {
        for (int queueSize = commitQueue.size();
             queueSize >= BACK_PRESSURE_LIMIT;
             queueSize = commitQueue.size())
        {
            wait();
        }
        commitQueue.add(word);
    }
    public synchronized List<byte[]> getQueuedEntries() {
        List<byte[]> data;
        if (commitQueue.isEmpty()) {
            return Collections.emptyList();
        }
        else {
            data = new ArrayList<>(commitQueue);
            commitQueue.clear();
        }
        notifyAll();
        if (data.size() > BACK_PRESSURE_LIMIT) {
            logger.warn("Lexicon Journal Backpressure: {}", data.size());
        }
        return data;
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalFile.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalFile.java
@ -1,162 +0,0 @@
 package nu.marginalia.lexicon.journal;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
 import java.util.List;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Consumer;
 public class KeywordLexiconJournalFile implements AutoCloseable {
    private final RandomAccessFile journalFileRAF;
    private final File journalFile;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final ReadWriteLock diskLock = new ReentrantReadWriteLock();
    public KeywordLexiconJournalFile(File journalFile) throws IOException {
        this.journalFileRAF = new RandomAccessFile(journalFile, "rw");
        this.journalFile = journalFile;
    }
    public void rewind() throws IOException {
        journalFileRAF.seek(0);
    }
    public void loadFile(Consumer<byte[]> acceptEntry) throws IOException {
        if (!journalFile.exists()) {
            logger.info("File {} does not exist, can't load", journalFile);
            return;
        }
        logger.info("Reading {}", journalFile);
        long pos;
        if (journalFileRAF.length() < 8) {
            pos = 8;
            journalFileRAF.writeLong(pos);
        }
        else {
            pos = journalFileRAF.readLong();
        }
        logger.info("Length {} ({})", pos, journalFileRAF.length());
        if (pos == 8) {
            logger.info("Empty DB");
        }
        ByteBuffer buffer = ByteBuffer.allocateDirect(8192);
        var channel = journalFileRAF.getChannel();
        long cp = channel.position();
        try {
            buffer.limit(0);
            long loaded = 0;
            while (cp < pos || buffer.hasRemaining()) {
                if (buffer.limit() - buffer.position() < 4) {
                    buffer.compact();
                    long rb = channel.read(buffer);
                    if (rb <= 0) {
                        break;
                    }
                    cp += rb;
                    buffer.flip();
                }
                int len = buffer.get() & 0xFF;
                if (len > Byte.MAX_VALUE) {
                    logger.warn("Found keyword with impossible length {} near {}, likely corruption", len, cp);
                }
                while (buffer.limit() - buffer.position() < len) {
                    buffer.compact();
                    int rb = channel.read(buffer);
                    if (rb <= 0) break;
                    cp += rb;
                    buffer.flip();
                }
                if (buffer.limit() < len) {
                    logger.warn("Partial write at end-of-file!");
                    if (cp >= pos) {
                        logger.info("... but it's ok");
                    }
                    break;
                }
                byte[] data = new byte[len];
                buffer.get(data);
                if ((++loaded % 10_000_000) == 0L) {
                    logger.info("Loaded {} million items", loaded/1_000_000);
                }
                acceptEntry.accept(data);
            }
        }
        catch (Exception ex) {
            logger.error("IO Exception", ex);
        }
        journalFileRAF.seek(pos);
    }
    private final ByteBuffer writeBuffer = ByteBuffer.allocateDirect(4096);
    public void writeEntriesToJournal(List<byte[]> data) {
        if (data.isEmpty())
            return;
        final FileChannel channel = journalFileRAF.getChannel();
        if (!channel.isOpen()) {
            throw new IllegalStateException("commitToDisk() with closed channel! Cannot commit!");
        }
        Lock writeLock = diskLock.writeLock();
        try {
            writeLock.lock();
            long start = System.currentTimeMillis();
            int ct = data.size();
            for (byte[] itemBytes : data) {
                writeBuffer.clear();
                writeBuffer.put((byte) itemBytes.length);
                writeBuffer.put(itemBytes);
                writeBuffer.flip();
                while (writeBuffer.position() < writeBuffer.limit())
                    channel.write(writeBuffer, channel.size());
            }
            writeBuffer.clear();
            writeBuffer.putLong(channel.size());
            writeBuffer.flip();
            channel.write(writeBuffer, 0);
            channel.force(false);
            logger.debug("Comitted {} items in {} ms", ct, System.currentTimeMillis() - start);
        }
        catch (Exception ex) {
            logger.error("Error during dictionary commit!!!", ex);
        }
        finally {
            writeLock.unlock();
        }
    }
    public void close() throws IOException {
        journalFileRAF.close();
    }
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalFingerprint.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalFingerprint.java
@ -1,10 +0,0 @@
 package nu.marginalia.lexicon.journal;
 /** Contains values used to assess whether the lexicon is in sync with the journal
 *  or if it has been replaced with a newer version and should be reloaded
 * */
 public record KeywordLexiconJournalFingerprint(long createdTime,
                                               long mTime,
                                               long sizeBytes)
 {
 }
--- a/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalMode.java
+++ b/code/features-index/lexicon/src/main/java/nu/marginalia/lexicon/journal/KeywordLexiconJournalMode.java
@ -1,6 +0,0 @@
 package nu.marginalia.lexicon.journal;
 public enum KeywordLexiconJournalMode {
    READ_ONLY,
    READ_WRITE
 }
--- a/code/features-index/lexicon/src/test/java/nu/marginalia/lexicon/KeywordLexiconTest.java
+++ b/code/features-index/lexicon/src/test/java/nu/marginalia/lexicon/KeywordLexiconTest.java
@ -1,78 +0,0 @@
 package nu.marginalia.lexicon;
 import nu.marginalia.dict.OnHeapDictionaryMap;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 public class KeywordLexiconTest {
    private Path journalFile;
    private KeywordLexicon lexicon;
    @BeforeEach
    public void setUp() throws IOException {
        journalFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
        var lexiconJournal = new KeywordLexiconJournal(journalFile.toFile(), KeywordLexiconJournalMode.READ_WRITE);
        lexicon = new KeywordLexicon(lexiconJournal);
    }
    @AfterEach
    public void tearDown() throws Exception {
        lexicon.close();
        Files.delete(journalFile);
    }
    @Test
    public void testConsistentInserts() {
        int a = lexicon.getOrInsert("aaa");
        int b = lexicon.getOrInsert("bbb");
        int a2 = lexicon.getOrInsert("aaa");
        int c = lexicon.getOrInsert("ccc");
        assertEquals(a, a2);
        assertNotEquals(a, b);
        assertNotEquals(a, c);
        assertNotEquals(b, c);
    }
    @Test
    public void testInsertReplay() {
        int a = lexicon.getOrInsert("aaa");
        int b = lexicon.getOrInsert("bbb");
        int c = lexicon.getOrInsert("ccc");
        assertEquals(a, lexicon.getReadOnly("aaa"));
        assertEquals(b, lexicon.getReadOnly("bbb"));
        assertEquals(c, lexicon.getReadOnly("ccc"));
    }
    @Test
    public void testReload() throws IOException {
        int a = lexicon.getOrInsert("aaa");
        int b = lexicon.getOrInsert("bbb");
        int c = lexicon.getOrInsert("ccc");
        lexicon.commitToDisk();
        var lexiconJournal = new KeywordLexiconJournal(journalFile.toFile(), KeywordLexiconJournalMode.READ_WRITE);
        try (var anotherLexicon = new KeywordLexicon(lexiconJournal)) {
            assertEquals(a, anotherLexicon.getReadOnly("aaa"));
            assertEquals(b, anotherLexicon.getReadOnly("bbb"));
            assertEquals(c, anotherLexicon.getReadOnly("ccc"));
        }
        catch (Exception ex) {
            Assertions.fail("???", ex);
        }
    }
 }
--- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java
+++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java
@ -9,8 +9,8 @@ import nu.marginalia.model.idx.DocumentFlags;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.ranking.factors.*;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.util.ArrayList;
 import java.util.List;
--- a/code/libraries/array/src/main/java/nu/marginalia/array/algo/TwoArrayOperations.java
+++ b/code/libraries/array/src/main/java/nu/marginalia/array/algo/TwoArrayOperations.java
@ -369,7 +369,8 @@ public class TwoArrayOperations {
        }
        while (aPos < aEnd) {
-            long val = a.get(aPos+=stepSize);
+            long val = a.get(aPos);
            aPos+=stepSize;
            if (distinct == 0 || val != lastValue) {
                distinct++;
            }
@ -377,7 +378,8 @@ public class TwoArrayOperations {
        }
        while (bPos < bEnd) {
-            long val = b.get(bPos+=stepSize);
+            long val = b.get(bPos);
            bPos+=stepSize;
            if (distinct == 0 || val != lastValue) {
                distinct++;
            }
--- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/TwoArrayOperationsTest.java
+++ b/code/libraries/array/src/test/java/nu/marginalia/array/algo/TwoArrayOperationsTest.java
@ -5,6 +5,7 @@ import nu.marginalia.array.LongArray;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.stream.Collectors;
 import java.util.stream.LongStream;
@ -118,4 +119,31 @@ class TwoArrayOperationsTest {
        assertEquals(distinctSize, mergedSize);
    }
    @Test
    public void mergeArrays2() {
        LongArray left = LongArray.allocate(4);
        LongArray right = LongArray.allocate(2);
        LongArray out = LongArray.allocate(4);
        left.set(0, 40, 3, 41, 4);
        right.set(0, 40, 5);
        System.out.println(Arrays.toString(longArrayToJavaArray(left)));
        System.out.println(Arrays.toString(longArrayToJavaArray(right)));
        System.out.println(Arrays.toString(longArrayToJavaArray(out)));
        long numDistinct = TwoArrayOperations.countDistinctElementsN(2, left, right, 0, 4, 0, 2);
        System.out.println(numDistinct);
        System.out.println(numDistinct);
        TwoArrayOperations.mergeArrays2(out, left, right, 0, 4, 0, 4, 0, 2);
        System.out.println(Arrays.toString(longArrayToJavaArray(out)));
    }
    long[] longArrayToJavaArray(LongArray longArray) {
        long[] vals = new long[(int) longArray.size()];
        longArray.get(0, vals);
        return vals;
    }
 }
--- a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java
+++ b/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java
@ -16,7 +16,7 @@ import org.jsoup.nodes.Element;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
--- a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MessageQueueFactory.java
+++ b/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MessageQueueFactory.java
@ -1,5 +1,7 @@
 package nu.marginalia.mq;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.mq.inbox.MqAsynchronousInbox;
 import nu.marginalia.mq.inbox.MqInboxIf;
 import nu.marginalia.mq.inbox.MqSingleShotInbox;
@ -7,8 +9,6 @@ import nu.marginalia.mq.inbox.MqSynchronousInbox;
 import nu.marginalia.mq.outbox.MqOutbox;
 import nu.marginalia.mq.persistence.MqPersistence;
 import javax.inject.Inject;
 import javax.inject.Singleton;
 import java.util.UUID;
@Singleton
--- a/code/libraries/term-frequency-dict/src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java
+++ b/code/libraries/term-frequency-dict/src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java
@ -10,8 +10,8 @@ import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
--- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java
+++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java
@ -8,8 +8,8 @@ import org.jsoup.nodes.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.util.Optional;
 import java.util.Set;
--- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/MetaRobotsTag.java
+++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/MetaRobotsTag.java
@ -2,7 +2,7 @@ package nu.marginalia.converting.processor;
 import org.jsoup.nodes.Document;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
@Singleton
 public class MetaRobotsTag {
--- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/SiteWords.java
+++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/SiteWords.java
@ -9,7 +9,7 @@ import nu.marginalia.model.EdgeUrl;
 import nu.marginalia.converting.processor.logic.links.CommonKeywordExtractor;
 import nu.marginalia.converting.processor.logic.links.TopKeywords;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
--- a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java
+++ b/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java
@ -6,13 +6,12 @@ import com.google.inject.Inject;
 import nu.marginalia.db.storage.FileStorageService;
 import nu.marginalia.db.storage.model.FileStorage;
 import nu.marginalia.db.storage.model.FileStorageType;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
-import nu.marginalia.index.full.ReverseIndexFullConverter;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
-import nu.marginalia.index.full.ReverseIndexFullFileNames;
+import nu.marginalia.index.journal.reader.IndexJournalReadEntry;
-import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.priority.ReverseIndexPrioFileNames;
 import nu.marginalia.index.priority.ReverseIndexPriorityConverter;
 import nu.marginalia.model.gson.GsonFactory;
 import nu.marginalia.mq.MessageQueueFactory;
 import nu.marginalia.mq.MqMessage;
@ -23,7 +22,6 @@ import nu.marginalia.mqapi.index.IndexName;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.service.module.DatabaseModule;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -97,52 +95,35 @@ public class IndexConstructorMain {
        FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
        FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
        Path inputFile = IndexJournalFileNames.resolve(indexStaging.asPath());
        Path outputFileDocs = ReverseIndexFullFileNames.resolve(indexLive.asPath(), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
        Path outputFileWords = ReverseIndexFullFileNames.resolve(indexLive.asPath(), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
        Path tmpDir = indexStaging.asPath().resolve("tmp");
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
        var journalReader = new IndexJournalReaderSingleCompressedFile(inputFile);
-        ReverseIndexFullConverter converter = new ReverseIndexFullConverter(
+        ReverseIndexConstructor.
-                heartbeat,
+                createReverseIndex(IndexJournalReader::singleFile,
-                tmpDir,
+                        indexStaging.asPath(),
-                journalReader,
+                        tmpDir,
-                domainRankings,
+                        outputFileDocs,
-                outputFileWords,
+                        outputFileWords);
                outputFileDocs
        );
        converter.convert();
    }
    private void createPrioReverseIndex() throws SQLException, IOException {
        FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
        FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
        Path inputFile = IndexJournalFileNames.resolve(indexStaging.asPath());
        Path outputFileDocs = ReverseIndexPrioFileNames.resolve(indexLive.asPath(), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
        Path outputFileWords = ReverseIndexPrioFileNames.resolve(indexLive.asPath(), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
        Path tmpDir = indexStaging.asPath().resolve("tmp");
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
-        var journalReader = new IndexJournalReaderSingleCompressedFile(inputFile);
+        ReverseIndexConstructor.
-
+            createReverseIndex(IndexJournalReader::singleFileWithPriorityFilters,
-        ReverseIndexPriorityConverter converter = new ReverseIndexPriorityConverter(
+                    indexStaging.asPath(), tmpDir, outputFileDocs, outputFileWords);
                heartbeat,
                tmpDir,
                journalReader,
                domainRankings,
                outputFileWords,
                outputFileDocs
        );
        converter.convert();
    }
    private void createForwardIndex() throws SQLException, IOException {
@ -150,12 +131,11 @@ public class IndexConstructorMain {
        FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
        FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
        Path inputFile = IndexJournalFileNames.resolve(indexStaging.asPath());
        Path outputFileDocsId = ForwardIndexFileNames.resolve(indexLive.asPath(), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
        Path outputFileDocsData = ForwardIndexFileNames.resolve(indexLive.asPath(), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
        ForwardIndexConverter converter = new ForwardIndexConverter(heartbeat,
-                inputFile.toFile(),
+                IndexJournalReader.paging(indexStaging.asPath()),
                outputFileDocsId,
                outputFileDocsData,
                domainRankings
--- a/code/processes/loading-process/build.gradle
+++ b/code/processes/loading-process/build.gradle
@ -28,7 +28,6 @@ dependencies {
    implementation project(':code:common:service-discovery')
    implementation project(':code:common:service-client')
    implementation project(':code:common:linkdb')
    implementation project(':code:features-index:lexicon')
    implementation project(':code:features-index:index-journal')
    implementation project(':code:libraries:message-queue')
    implementation project(':code:libraries:language-processing')
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/IndexLoadKeywords.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/IndexLoadKeywords.java
@ -1,56 +1,27 @@
 package nu.marginalia.loading.loader;
 import com.google.inject.Inject;
 import lombok.SneakyThrows;
 import nu.marginalia.keyword.model.DocumentKeywords;
 import nu.marginalia.model.EdgeUrl;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.model.EdgeUrl;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import java.util.concurrent.LinkedBlockingQueue;
+public class IndexLoadKeywords {
 import java.util.concurrent.TimeUnit;
 public class IndexLoadKeywords implements Runnable {
    private static final Logger logger = LoggerFactory.getLogger(IndexLoadKeywords.class);
    private final LinkedBlockingQueue<InsertTask> insertQueue = new LinkedBlockingQueue<>(32);
    private final LoaderIndexJournalWriter journalWriter;
    private record InsertTask(long combinedId,
                              int features,
                              DocumentMetadata metadata,
                              DocumentKeywords wordSet) {}
    private final Thread runThread;
    private volatile boolean canceled = false;
    @Inject
    public IndexLoadKeywords(LoaderIndexJournalWriter journalWriter) {
        this.journalWriter = journalWriter;
        runThread = new Thread(this, getClass().getSimpleName());
        runThread.start();
    }
    @SneakyThrows
    public void run() {
        while (!canceled) {
            var data = insertQueue.poll(1, TimeUnit.SECONDS);
            if (data != null) {
                journalWriter.putWords(data.combinedId,
                        data.features,
                        data.metadata(),
                        data.wordSet);
            }
        }
    }
    public void close() throws Exception {
        if (!canceled) {
            canceled = true;
            runThread.join();
            journalWriter.close();
        }
    }
@ -60,7 +31,7 @@ public class IndexLoadKeywords implements Runnable {
                     EdgeUrl url,
                     int features,
                     DocumentMetadata metadata,
-                     DocumentKeywords words) throws InterruptedException {
+                     DocumentKeywords words) {
        long combinedId = UrlIdCodec.encodeId(loaderData.getTargetDomainId(), ordinal);
        if (combinedId <= 0) {
@ -68,6 +39,9 @@ public class IndexLoadKeywords implements Runnable {
            return;
        }
-        insertQueue.put(new InsertTask(combinedId, features, metadata, words));
+        journalWriter.putWords(combinedId,
                features,
                metadata,
                words);
    }
 }
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/Loader.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/Loader.java
@ -12,6 +12,7 @@ import nu.marginalia.converting.instruction.instructions.LoadProcessedDocumentWi
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.List;
@ -33,14 +34,14 @@ public class Loader implements Interpreter, AutoCloseable {
    public final LoaderData data;
    public Loader(int sizeHint,
                  OldDomains oldDomains,
                  SqlLoadDomains sqlLoadDomains,
                  SqlLoadDomainLinks sqlLoadDomainLinks,
                  SqlLoadProcessedDomain sqlLoadProcessedDomain,
                  LdbLoadProcessedDocument loadProcessedDocument,
                  SqlLoadDomainMetadata sqlLoadDomainMetadata,
-                  IndexLoadKeywords indexLoadKeywords)
+                  IndexLoadKeywords indexLoadKeywords) {
-    {
+        data = new LoaderData(oldDomains, sizeHint);
        data = new LoaderData(sizeHint);
        this.sqlLoadDomains = sqlLoadDomains;
        this.sqlLoadDomainLinks = sqlLoadDomainLinks;
@ -93,11 +94,7 @@ public class Loader implements Interpreter, AutoCloseable {
    }
    @Override
    public void loadKeywords(EdgeUrl url, int ordinal, int features, DocumentMetadata metadata, DocumentKeywords words) {
-        try {
+        indexLoadKeywords.load(data, ordinal, url, features, metadata, words);
            indexLoadKeywords.load(data, ordinal, url, features, metadata, words);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    @Override
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderData.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderData.java
@ -1,17 +1,16 @@
 package nu.marginalia.loading.loader;
 import gnu.trove.map.hash.TObjectIntHashMap;
 import nu.marginalia.model.EdgeDomain;
 public class LoaderData {
-    private final TObjectIntHashMap<EdgeDomain> domainIds;
+    private final OldDomains oldDomains;
    private EdgeDomain targetDomain;
    public final int sizeHint;
    private int targetDomainId = -1;
-    public LoaderData(int sizeHint) {
+    public LoaderData(OldDomains oldDomains, int sizeHint) {
-        domainIds = new TObjectIntHashMap<>(10);
+        this.oldDomains = oldDomains;
        this.sizeHint = sizeHint;
    }
@ -21,17 +20,18 @@ public class LoaderData {
    public EdgeDomain getTargetDomain() {
        return targetDomain;
    }
    public int getTargetDomainId() {
        if (targetDomainId < 0)
-            targetDomainId = domainIds.get(targetDomain);
+            targetDomainId = oldDomains.getId(targetDomain);
        return targetDomainId;
    }
    public void addDomain(EdgeDomain domain, int id) {
-        domainIds.put(domain, id);
+        oldDomains.add(domain, id);
    }
    public int getDomainId(EdgeDomain domain) {
-        return domainIds.get(domain);
+        return oldDomains.getId(domain);
    }
 }
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderFactory.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderFactory.java
@ -3,6 +3,7 @@ package nu.marginalia.loading.loader;
 import com.google.inject.Inject;
 public class LoaderFactory {
    private final OldDomains oldDomains;
    private final SqlLoadDomains sqlLoadDomains;
    private final SqlLoadDomainLinks sqlLoadDomainLinks;
    private final SqlLoadProcessedDomain sqlLoadProcessedDomain;
@ -11,12 +12,14 @@ public class LoaderFactory {
    private final IndexLoadKeywords indexLoadKeywords;
    @Inject
-    public LoaderFactory(SqlLoadDomains sqlLoadDomains,
+    public LoaderFactory(OldDomains oldDomains,
                         SqlLoadDomains sqlLoadDomains,
                         SqlLoadDomainLinks sqlLoadDomainLinks,
                         SqlLoadProcessedDomain sqlLoadProcessedDomain,
                         LdbLoadProcessedDocument sqlLoadProcessedDocument,
                         SqlLoadDomainMetadata sqlLoadDomainMetadata,
                         IndexLoadKeywords indexLoadKeywords) {
        this.oldDomains = oldDomains;
        this.sqlLoadDomains = sqlLoadDomains;
        this.sqlLoadDomainLinks = sqlLoadDomainLinks;
@ -27,6 +30,6 @@ public class LoaderFactory {
    }
    public Loader create(int sizeHint) {
-        return new Loader(sizeHint, sqlLoadDomains, sqlLoadDomainLinks, sqlLoadProcessedDomain, sqlLoadProcessedDocument, sqlLoadDomainMetadata, indexLoadKeywords);
+        return new Loader(sizeHint, oldDomains, sqlLoadDomains, sqlLoadDomainLinks, sqlLoadProcessedDomain, sqlLoadProcessedDocument, sqlLoadDomainMetadata, indexLoadKeywords);
    }
 }
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java
@ -5,15 +5,13 @@ import com.google.inject.Singleton;
 import lombok.SneakyThrows;
 import nu.marginalia.db.storage.FileStorageService;
 import nu.marginalia.db.storage.model.FileStorageType;
-import nu.marginalia.dict.OffHeapDictionaryHashMap;
+import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
-import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
+import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.keyword.model.DocumentKeywords;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
@ -21,40 +19,30 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.attribute.PosixFilePermissions;
 import java.sql.SQLException;
 import java.util.Arrays;
-import java.util.concurrent.*;
+
 import static nu.marginalia.index.journal.model.IndexJournalEntryData.MAX_LENGTH;
@Singleton
 public class LoaderIndexJournalWriter {
    private final KeywordLexicon lexicon;
    private final IndexJournalWriter indexWriter;
    private static final Logger logger = LoggerFactory.getLogger(LoaderIndexJournalWriter.class);
    @Inject
    public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException, SQLException {
        var lexiconArea = fileStorageService.getStorageByType(FileStorageType.LEXICON_STAGING);
        var indexArea = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
-        var lexiconPath = lexiconArea.asPath().resolve("dictionary.dat");
+        var existingIndexFiles = IndexJournalFileNames.findJournalFiles(indexArea.asPath());
-        var indexPath = IndexJournalFileNames.resolve(indexArea.asPath());
+        for (var existingFile : existingIndexFiles) {
            Files.delete(existingFile);
        }
-        Files.deleteIfExists(indexPath);
+        indexWriter = new IndexJournalWriterPagingImpl(indexArea.asPath());
        Files.deleteIfExists(lexiconPath);
        Files.createFile(indexPath, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
        Files.createFile(lexiconPath, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
        lexicon = new KeywordLexicon(new KeywordLexiconJournal(lexiconPath.toFile(), KeywordLexiconJournalMode.READ_WRITE));
        indexWriter = new IndexJournalWriterImpl(lexicon, indexPath);
    }
-    private final LinkedBlockingQueue<Runnable> keywordInsertTaskQueue =
+    MurmurHash3_128 hasher = new MurmurHash3_128();
            new LinkedBlockingQueue<>(65536);
    private final ExecutorService keywordInsertionExecutor =
            new ThreadPoolExecutor(8, 16, 1, TimeUnit.MINUTES, keywordInsertTaskQueue);
    @SneakyThrows
    public void putWords(long combinedId,
@ -71,60 +59,32 @@ public class LoaderIndexJournalWriter {
            return;
        }
-        // Due to the very bursty access patterns of this method, doing the actual insertions in separate threads
+        String[] words = wordSet.keywords();
-        // with a chonky work queue is a fairly decent improvement
+        long[] wordIds = new long[wordSet.size()];
-        for (var chunk : KeywordListChunker.chopList(wordSet, IndexJournalEntryData.MAX_LENGTH)) {
+        long[] meta = wordSet.metadata();
-            try {
+
-                keywordInsertionExecutor.submit(() -> loadWords(combinedId, features, metadata, chunk));
+        Arrays.parallelSetAll(wordIds, i -> hasher.hashNearlyASCII(words[i]));
-            }
+
-            catch (RejectedExecutionException ex) {
+        long[] buffer = new long[MAX_LENGTH * 2];
-                loadWords(combinedId, features, metadata, chunk);
+        for (int start = 0; start < words.length; ) {
            int end = Math.min(start + MAX_LENGTH, words.length);
            for (int i = 0; i < end - start; i++) {
                buffer[2*i] = wordIds[i];
                buffer[2*i + 1] = meta[i];
            }
            var entry = new IndexJournalEntryData(end-start, buffer);
            var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
            indexWriter.put(header, entry);
            start = end;
        }
    }
    private void loadWords(long combinedId,
                           int features,
                           DocumentMetadata metadata,
                           DocumentKeywords wordSet) {
        if (null == metadata) {
            logger.warn("Null metadata for {}", combinedId);
            return;
        }
        var entry = new IndexJournalEntryData(getOrInsertWordIds(wordSet.keywords(), wordSet.metadata()));
        var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
        indexWriter.put(header, entry);
    }
    private long[] getOrInsertWordIds(String[] words, long[] meta) {
        long[] ids = new long[words.length*2];
        int putIdx = 0;
        for (int i = 0; i < words.length; i++) {
            String word = words[i];
            long id = lexicon.getOrInsert(word);
            if (id != OffHeapDictionaryHashMap.NO_VALUE) {
                ids[putIdx++] = id;
                ids[putIdx++] = meta[i];
            }
        }
        if (putIdx != words.length*2) {
            ids = Arrays.copyOf(ids, putIdx);
        }
        return ids;
    }
    public void close() throws Exception {
        keywordInsertionExecutor.shutdown();
        while (!keywordInsertionExecutor.awaitTermination(1, TimeUnit.DAYS)) {
            // ...?
        }
        indexWriter.close();
        lexicon.close();
    }
 }
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/OldDomains.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/OldDomains.java
@ -0,0 +1,41 @@
 package nu.marginalia.loading.loader;
 import com.google.inject.Inject;
 import com.zaxxer.hikari.HikariDataSource;
 import gnu.trove.map.hash.TObjectIntHashMap;
 import nu.marginalia.model.EdgeDomain;
 import java.sql.SQLException;
 import static java.sql.Statement.SUCCESS_NO_INFO;
 public class OldDomains {
    private final TObjectIntHashMap<EdgeDomain> knownDomains = new TObjectIntHashMap<>(100_000, 0.75f, -1);
    @Inject
    public OldDomains(HikariDataSource dataSource) {
        try (var conn = dataSource.getConnection()) {
            try (var stmt = conn.prepareStatement("""
                    SELECT DOMAIN_NAME, ID FROM EC_DOMAIN
                    """))
            {
                var rs = stmt.executeQuery();
                while (rs.next()) {
                    knownDomains.put(new EdgeDomain(rs.getString(1)), rs.getInt(2));
                }
            }
        }
        catch (SQLException ex) {
            throw new RuntimeException("Failed to set up loader", ex);
        }
    }
    public int getId(EdgeDomain domain) {
        return knownDomains.get(domain);
    }
    public void add(EdgeDomain domain, int id) {
        knownDomains.put(domain, id);
    }
 }
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java
@ -5,7 +5,7 @@ import nu.marginalia.model.EdgeDomain;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
 import java.sql.SQLException;
 public class SqlLoadDomainMetadata {
--- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java
+++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java
@ -6,6 +6,7 @@ import nu.marginalia.model.EdgeDomain;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.sql.Connection;
 import java.sql.SQLException;
--- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/MathParser.java
+++ b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/MathParser.java
@ -4,7 +4,7 @@ import lombok.AllArgsConstructor;
 import lombok.SneakyThrows;
 import lombok.ToString;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.math.RoundingMode;
 import java.text.DecimalFormat;
 import java.text.NumberFormat;
--- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Units.java
+++ b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Units.java
@ -3,8 +3,8 @@ package nu.marginalia.assistant.eval;
 import com.opencsv.CSVReader;
 import lombok.SneakyThrows;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.text.DecimalFormat;
--- a/code/services-core/control-service/build.gradle
+++ b/code/services-core/control-service/build.gradle
@ -36,6 +36,7 @@ dependencies {
    implementation project(':code:api:index-api')
    implementation project(':code:api:process-mqapi')
    implementation project(':code:features-search:screenshots')
    implementation project(':code:features-index:index-journal')
    implementation libs.lombok
    annotationProcessor libs.lombok
--- a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java
+++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java
@ -296,15 +296,6 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
                    """
    )
    public void switchOver(Long id) throws Exception {
        var live = storageService.getStorageByType(FileStorageType.LEXICON_LIVE);
        var staging = storageService.getStorageByType(FileStorageType.LEXICON_STAGING);
        var fromSource = staging.asPath().resolve("dictionary.dat");
        var liveDest = live.asPath().resolve("dictionary.dat");
        // Swap in new lexicon
        logger.info("Moving " + fromSource + " to " + liveDest);
        Files.move(fromSource, liveDest, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
        // Notify services to switch over
        searchOutbox.sendNotice(SearchMqEndpoints.SWITCH_LINKDB, ":-)");
        indexOutbox.sendNotice(IndexMqEndpoints.INDEX_REINDEX, ":^D");
--- a/code/services-core/control-service/src/main/java/nu/marginalia/control/process/ProcessService.java
+++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/process/ProcessService.java
@ -8,8 +8,8 @@ import org.slf4j.LoggerFactory;
 import org.slf4j.Marker;
 import org.slf4j.MarkerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
--- a/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/BackupService.java
+++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/BackupService.java
@ -7,9 +7,10 @@ import nu.marginalia.db.storage.model.FileStorage;
 import nu.marginalia.db.storage.model.FileStorageBaseType;
 import nu.marginalia.db.storage.model.FileStorageId;
 import nu.marginalia.db.storage.model.FileStorageType;
 import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.apache.commons.io.IOUtils;
-import javax.inject.Inject;
+import com.google.inject.Inject;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.sql.SQLException;
@ -38,12 +39,10 @@ public class BackupService {
        var indexStagingStorage = storageService.getStorageByType(FileStorageType.INDEX_STAGING);
        var linkdbStagingStorage = storageService.getStorageByType(FileStorageType.LINKDB_STAGING);
        var lexiconStagingStorage = storageService.getStorageByType(FileStorageType.LEXICON_STAGING);
        backupFileCompressed("links.db", linkdbStagingStorage, backupStorage);
        backupFileCompressed("dictionary.dat", lexiconStagingStorage, backupStorage);
        // This file format is already compressed
-        backupFileNoCompression("page-index.dat", indexStagingStorage, backupStorage);
+        backupJournal(indexStagingStorage, backupStorage);
    }
@ -53,29 +52,37 @@ public class BackupService {
        var indexStagingStorage = storageService.getStorageByType(FileStorageType.INDEX_STAGING);
        var linkdbStagingStorage = storageService.getStorageByType(FileStorageType.LINKDB_STAGING);
        var lexiconStagingStorage = storageService.getStorageByType(FileStorageType.LEXICON_STAGING);
        restoreBackupCompressed("links.db", linkdbStagingStorage, backupStorage);
-        restoreBackupCompressed("dictionary.dat", lexiconStagingStorage, backupStorage);
+        restoreJournal(indexStagingStorage, backupStorage);
        restoreBackupNoCompression("page-index.dat", indexStagingStorage, backupStorage);
    }
-    private void backupFileNoCompression(String fileName, FileStorage inputStorage, FileStorage backupStorage) throws IOException
+    private void backupJournal(FileStorage inputStorage, FileStorage backupStorage) throws IOException
    {
-        try (var is = Files.newInputStream(inputStorage.asPath().resolve(fileName));
+        for (var source : IndexJournalFileNames.findJournalFiles(inputStorage.asPath())) {
-             var os = Files.newOutputStream(backupStorage.asPath().resolve(fileName))
+            var dest = backupStorage.asPath().resolve(source.toFile().getName());
-        ) {
+
-            IOUtils.copyLarge(is, os);
+            try (var is = Files.newInputStream(source);
                 var os = Files.newOutputStream(dest)
            ) {
                IOUtils.copyLarge(is, os);
            }
        }
    }
-    private void restoreBackupNoCompression(String fileName, FileStorage inputStorage, FileStorage backupStorage) throws IOException {
+    private void restoreJournal(FileStorage destStorage, FileStorage backupStorage) throws IOException {
-        try (var is = Files.newInputStream(backupStorage.asPath().resolve(fileName));
+        for (var source : IndexJournalFileNames.findJournalFiles(backupStorage.asPath())) {
-             var os = Files.newOutputStream(inputStorage.asPath().resolve(fileName))
+            var dest = destStorage.asPath().resolve(source.toFile().getName());
-        ) {
+
-            IOUtils.copyLarge(is, os);
+            try (var is = Files.newInputStream(source);
                 var os = Files.newOutputStream(dest)
            ) {
                IOUtils.copyLarge(is, os);
            }
        }
    }
    private void backupFileCompressed(String fileName, FileStorage inputStorage, FileStorage backupStorage) throws IOException
--- a/code/services-core/index-service/build.gradle
+++ b/code/services-core/index-service/build.gradle
@ -35,13 +35,14 @@ dependencies {
    implementation project(':code:features-index:index-query')
    implementation project(':code:features-index:index-forward')
    implementation project(':code:features-index:index-reverse')
    implementation project(':code:features-index:lexicon')
    implementation project(':code:features-index:domain-ranking')
    implementation project(':code:features-search:result-ranking')
    implementation project(':third-party:commons-codec')
    implementation libs.lombok
    testImplementation project(path: ':code:services-core:control-service')
    testImplementation project(':code:common:process')
    annotationProcessor libs.lombok
    implementation libs.bundles.slf4j
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java
@ -8,10 +8,6 @@ import nu.marginalia.db.storage.FileStorageService;
 import nu.marginalia.db.storage.model.FileStorageType;
 import nu.marginalia.index.config.RankingSettings;
 import nu.marginalia.WmsaHome;
 import nu.marginalia.lexicon.KeywordLexicon;
 import nu.marginalia.lexicon.KeywordLexiconReadOnlyView;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
 import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
 import nu.marginalia.service.control.ServiceEventLog;
 import java.nio.file.Path;
@ -23,23 +19,6 @@ public class IndexModule extends AbstractModule {
    public void configure() {
    }
    @Provides
    @SneakyThrows
    @Singleton
    private KeywordLexiconReadOnlyView createLexicon(ServiceEventLog eventLog, FileStorageService fileStorageService) {
        try {
            eventLog.logEvent("INDEX-LEXICON-LOAD-BEGIN", "");
            var area = fileStorageService.getStorageByType(FileStorageType.LEXICON_LIVE);
            var path = area.asPath().resolve("dictionary.dat");
            return new KeywordLexiconReadOnlyView(new KeywordLexicon(new KeywordLexiconJournal(path.toFile(), KeywordLexiconJournalMode.READ_ONLY)));
        }
        finally {
            eventLog.logEvent("INDEX-LEXICON-LOAD-OK", "");
        }
    }
    @Provides
    public RankingSettings rankingSettings() {
        Path dir = WmsaHome.getHomePath().resolve("conf/ranking-settings.yaml");
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexService.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexService.java
@ -77,12 +77,7 @@ public class IndexService extends Service {
    @MqRequest(endpoint = IndexMqEndpoints.INDEX_RELOAD_LEXICON)
    public String reloadLexicon(String message) throws Exception {
-
+        throw new UnsupportedOperationException();
        if (!opsService.reloadLexicon()) {
            throw new IllegalStateException("Ops lock busy");
        }
        return "ok";
    }
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexServicesFactory.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexServicesFactory.java
@ -4,32 +4,18 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.db.storage.FileStorageService;
 import nu.marginalia.db.storage.model.FileStorageType;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.forward.ForwardIndexReader;
 import nu.marginalia.index.full.ReverseIndexFullFileNames;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.priority.ReverseIndexPrioFileNames;
 import nu.marginalia.index.priority.ReverseIndexPriorityConverter;
 import nu.marginalia.index.full.ReverseIndexFullConverter;
 import nu.marginalia.index.priority.ReverseIndexPriorityReader;
 import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
 import nu.marginalia.index.full.ReverseIndexFullReader;
 import nu.marginalia.ranking.DomainRankings;
 import nu.marginalia.index.index.SearchIndexReader;
 import nu.marginalia.service.control.ServiceHeartbeat;
 import org.checkerframework.checker.units.qual.C;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.sql.SQLException;
 import java.util.concurrent.Callable;
 import java.util.stream.Stream;
@Singleton
 public class IndexServicesFactory {
@ -55,16 +41,16 @@ public class IndexServicesFactory {
        return searchSetsBase;
    }
-    public ReverseIndexFullReader getReverseIndexReader() throws IOException {
+    public ReverseIndexReader getReverseIndexReader() throws IOException {
-        return new ReverseIndexFullReader(
+        return new ReverseIndexReader(
                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.CURRENT),
                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT)
        );
    }
-    public ReverseIndexPriorityReader getReverseIndexPrioReader() throws IOException {
+    public ReverseIndexReader getReverseIndexPrioReader() throws IOException {
-        return new ReverseIndexPriorityReader(
+        return new ReverseIndexReader(
                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT)
        );
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/db/DbUpdateRanks.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/db/DbUpdateRanks.java
@ -5,8 +5,8 @@ import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import javax.inject.Inject;
+import com.google.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Singleton;
 import java.sql.SQLException;
@Singleton
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java
@ -107,8 +107,8 @@ public class SearchIndex {
            return Collections.emptyList();
        }
-        final int[] orderedIncludes = terms.sortedDistinctIncludes(this::compareKeywords);
+        final long[] orderedIncludes = terms.sortedDistinctIncludes(this::compareKeywords);
-        final int[] orderedIncludesPrio = terms.sortedDistinctIncludes(this::compareKeywordsPrio);
+        final long[] orderedIncludesPrio = terms.sortedDistinctIncludes(this::compareKeywordsPrio);
        List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
        List<IndexQuery> queries = new ArrayList<>(10);
@ -146,11 +146,11 @@ public class SearchIndex {
                return Collections.emptyList();
            }
-            for (int orderedInclude : orderedIncludes) {
+            for (long orderedInclude : orderedIncludes) {
                query = query.alsoFull(orderedInclude);
            }
-            for (int term : terms.excludes()) {
+            for (long term : terms.excludes()) {
                query = query.notFull(term);
            }
@ -166,14 +166,14 @@ public class SearchIndex {
        return queries;
    }
-    private int compareKeywords(int a, int b) {
+    private int compareKeywords(long a, long b) {
        return Long.compare(
                indexReader.numHits(a),
                indexReader.numHits(b)
        );
    }
-    private int compareKeywordsPrio(int a, int b) {
+    private int compareKeywordsPrio(long a, long b) {
        return Long.compare(
                indexReader.numHitsPrio(a),
                indexReader.numHitsPrio(b)
@ -184,7 +184,7 @@ public class SearchIndex {
     * document identifiers provided; with metadata for termId.  The input array
     * docs[] *must* be sorted.
     */
-    public long[] getTermMetadata(int termId, long[] docs) {
+    public long[] getTermMetadata(long termId, long[] docs) {
        return indexReader.getMetadata(termId, docs);
    }
@ -199,10 +199,10 @@ public class SearchIndex {
        return indexReader.totalDocCount();
    }
-    public int getTermFrequency(int id) {
+    public int getTermFrequency(long id) {
        return (int) indexReader.numHits(id);
    }
-    public int getTermFrequencyPrio(int id) {
+    public int getTermFrequencyPrio(long id) {
        return (int) indexReader.numHitsPrio(id);
    }
 }
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexQueryBuilder.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexQueryBuilder.java
@ -1,19 +1,15 @@
 package nu.marginalia.index.index;
-import gnu.trove.set.hash.TIntHashSet;
+import gnu.trove.set.hash.TLongHashSet;
-import nu.marginalia.index.priority.ReverseIndexPriorityReader;
+import nu.marginalia.index.ReverseIndexReader;
 import nu.marginalia.index.query.IndexQuery;
 import nu.marginalia.index.query.IndexQueryBuilder;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.full.ReverseIndexFullReader;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 public class SearchIndexQueryBuilder implements IndexQueryBuilder  {
    private final IndexQuery query;
-    private final ReverseIndexFullReader reverseIndexFullReader;
+    private final ReverseIndexReader reverseIndexFullReader;
-    private final ReverseIndexPriorityReader reverseIndexPrioReader;
+    private final ReverseIndexReader reverseIndexPrioReader;
    /* Keep track of already added include terms to avoid redundant checks.
     *
@ -21,11 +17,11 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder  {
     * first check one index and then another for the same term. At the moment, that
     * makes no sense, but in the future, that might be a thing one might want to do.
     * */
-    private final TIntHashSet alreadyConsideredTerms = new TIntHashSet();
+    private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();
-    SearchIndexQueryBuilder(ReverseIndexFullReader reverseIndexFullReader,
+    SearchIndexQueryBuilder(ReverseIndexReader reverseIndexFullReader,
-                            ReverseIndexPriorityReader reverseIndexPrioReader,
+                            ReverseIndexReader reverseIndexPrioReader,
-                            IndexQuery query, int... sourceTerms)
+                            IndexQuery query, long... sourceTerms)
    {
        this.query = query;
        this.reverseIndexFullReader = reverseIndexFullReader;
@ -34,7 +30,7 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder  {
        alreadyConsideredTerms.addAll(sourceTerms);
    }
-    public IndexQueryBuilder alsoFull(int termId) {
+    public IndexQueryBuilder alsoFull(long termId) {
        if (alreadyConsideredTerms.add(termId)) {
            query.addInclusionFilter(reverseIndexFullReader.also(termId));
@ -43,7 +39,7 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder  {
        return this;
    }
-    public IndexQueryBuilder alsoPrio(int termId) {
+    public IndexQueryBuilder alsoPrio(long termId) {
        if (alreadyConsideredTerms.add(termId)) {
            query.addInclusionFilter(reverseIndexPrioReader.also(termId));
@ -52,7 +48,7 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder  {
        return this;
    }
-    public IndexQueryBuilder notFull(int termId) {
+    public IndexQueryBuilder notFull(long termId) {
        query.addInclusionFilter(reverseIndexFullReader.not(termId));
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java
@ -1,11 +1,10 @@
 package nu.marginalia.index.index;
 import nu.marginalia.index.ReverseIndexReader;
 import nu.marginalia.index.forward.ForwardIndexReader;
 import nu.marginalia.index.forward.ParamMatchingQueryFilter;
 import nu.marginalia.index.query.*;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.priority.ReverseIndexPriorityReader;
 import nu.marginalia.index.full.ReverseIndexFullReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -16,25 +15,25 @@ public class SearchIndexReader {
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private final ForwardIndexReader forwardIndexReader;
-    private final ReverseIndexFullReader reverseIndexFullReader;
+    private final ReverseIndexReader reverseIndexFullReader;
-    private final ReverseIndexPriorityReader reverseIndexPriorityReader;
+    private final ReverseIndexReader reverseIndexPriorityReader;
    public SearchIndexReader(ForwardIndexReader forwardIndexReader,
-                             ReverseIndexFullReader reverseIndexFullReader,
+                             ReverseIndexReader reverseIndexFullReader,
-                             ReverseIndexPriorityReader reverseIndexPriorityReader) {
+                             ReverseIndexReader reverseIndexPriorityReader) {
        this.forwardIndexReader = forwardIndexReader;
        this.reverseIndexFullReader = reverseIndexFullReader;
        this.reverseIndexPriorityReader = reverseIndexPriorityReader;
    }
-    public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, int wordId, int fetchSizeMultiplier) {
+    public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) {
-        var sources = List.of(reverseIndexPriorityReader.priorityDocuments(wordId));
+        var sources = List.of(reverseIndexPriorityReader.documents(wordId));
        return new SearchIndexQueryBuilder(reverseIndexFullReader, reverseIndexPriorityReader,
                new IndexQuery(sources, priority, fetchSizeMultiplier), wordId);
    }
-    public IndexQueryBuilder findFullWord(IndexQueryPriority priority, int wordId, int fetchSizeMultiplier) {
+    public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) {
        var sources = List.of(reverseIndexFullReader.documents(wordId));
        return new SearchIndexQueryBuilder(reverseIndexFullReader, reverseIndexPriorityReader,
@ -45,14 +44,14 @@ public class SearchIndexReader {
        return new ParamMatchingQueryFilter(params, forwardIndexReader);
    }
-    public long numHits(int word) {
+    public long numHits(long word) {
        return reverseIndexFullReader.numDocuments(word);
    }
-    public long numHitsPrio(int word) {
+    public long numHitsPrio(long word) {
        return reverseIndexPriorityReader.numDocuments(word);
    }
-    public long[] getMetadata(int wordId, long[] docIds) {
+    public long[] getMetadata(long wordId, long[] docIds) {
        return reverseIndexFullReader.getTermMeta(wordId, docIds);
    }
--- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java
+++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java
@ -1,35 +1,35 @@
 package nu.marginalia.index.index;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
-import it.unimi.dsi.fastutil.ints.IntComparator;
+import it.unimi.dsi.fastutil.longs.LongComparator;
-import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongList;
-import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
 import java.util.Collections;
 import java.util.List;
 public record SearchIndexSearchTerms(
-        IntList includes,
+        LongList includes,
-        IntList excludes,
+        LongList excludes,
-        IntList priority,
+        LongList priority,
-        List<IntList> coherences
+        List<LongList> coherences
        )
 {
    public SearchIndexSearchTerms() {
-        this(IntList.of(), IntList.of(), IntList.of(), Collections.emptyList());
+        this(LongList.of(), LongList.of(), LongList.of(), Collections.emptyList());
    }
    public boolean isEmpty() {
        return includes.isEmpty();
    }
-    public int[] sortedDistinctIncludes(IntComparator comparator) {
+    public long[] sortedDistinctIncludes(LongComparator comparator) {
        if (includes.isEmpty())
-            return includes.toIntArray();
+            return includes.toLongArray();
-        IntList list = new IntArrayList(new IntOpenHashSet(includes));
+        LongList list = new LongArrayList(new LongOpenHashSet(includes));
        list.sort(comparator);
-        return list.toIntArray();
+        return list.toLongArray();
    }
    public int size() {
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`DELETE FROM FILE_STORAGE WHERE TYPE IN ('LEXICON_STAGING', 'LEXICON_LIVE');`
`@ -1,4 +1,4 @@`
	`package nu.marginalia.index.full;`	`package nu.marginalia.index;`

	`import java.nio.file.Path;`	`import java.nio.file.Path;`
`@ -1,4 +1,4 @@`
	`package nu.marginalia.index.priority;`	`package nu.marginalia.index;`

	`import java.nio.file.Path;`	`import java.nio.file.Path;`