(index) Integrate positions data with indexes WIP

This change integrates the new positions data with the forward and reverse indexes. The ranking code is still only partially re-written.
2025-02-23 13:09:00 +00:00 · 2024-06-10 15:09:06 +02:00 · 2024-06-10 15:09:06 +02:00 · 36160988e2
commit 36160988e2
parent 9f982a0c3d
58 changed files with 1417 additions and 650 deletions
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
@ -5,8 +5,8 @@ import java.util.stream.IntStream;

 /** A compiled index service query */
 public class CompiledQueryInt {
-    private final CqExpression root;
-    private final CqDataInt data;
+    public final CqExpression root;
+    public final CqDataInt data;

    public CompiledQueryInt(CqExpression root, CqDataInt data) {
        this.root = root;
@ -26,7 +26,7 @@ public class CompiledQueryInt {
        return IntStream.range(0, data.size());
    }

-    public long at(int index) {
+    public int at(int index) {
        return data.get(index);
    }

--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
@ -61,7 +61,8 @@ public class CompiledQueryParser {

        String[] cqData = new String[wordIds.size()];
        wordIds.forEach((w, i) -> cqData[i] = w);
-        return new CompiledQuery<>(root, new CqData<>(cqData));
+
+        return root.newQuery(cqData);

    }

--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
@ -8,6 +8,18 @@ import java.util.stream.Stream;
 *
 */
 public sealed interface CqExpression {
+    /**  Create a new query for the provided data using this expression as the root */
+    default <T> CompiledQuery<T> newQuery(T[] data) {
+        return new CompiledQuery<>(this, data);
+    }
+    /**  Create a new query for the provided data using this expression as the root */
+    default CompiledQueryInt newQuery(int[] data) {
+        return new CompiledQueryInt(this, new CqDataInt(data));
+    }
+    /**  Create a new query for the provided data using this expression as the root */
+    default CompiledQueryLong newQuery(long[] data) {
+        return new CompiledQueryLong(this, new CqDataLong(data));
+    }

    Stream<Word> stream();

--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.compiled.aggregate;

 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;

 import java.util.ArrayList;
@ -36,7 +37,10 @@ public class CompiledQueryAggregates {
    public static <T> int intMaxMinAggregate(CompiledQuery<T> query, ToIntFunction<T> operator) {
        return query.root.visit(new CqIntMaxMinOperator(query, operator));
    }
-
+    /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
+    public static <T> int intMaxMinAggregate(CompiledQueryInt query, IntUnaryOperator operator) {
+        return query.root.visit(new CqIntMaxMinOperator(query, operator));
+    }
    /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
    public static int intMaxMinAggregate(CompiledQueryLong query, LongToIntFunction operator) {
        return query.root.visit(new CqIntMaxMinOperator(query, operator));
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
@ -1,6 +1,7 @@
 package nu.marginalia.api.searchquery.model.compiled.aggregate;

 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.compiled.CqExpression;

@ -21,7 +22,9 @@ public class CqIntMaxMinOperator implements CqExpression.IntVisitor {
    public CqIntMaxMinOperator(CompiledQueryLong query, LongToIntFunction operator) {
        this.operator = idx -> operator.applyAsInt(query.at(idx));
    }
-
+    public CqIntMaxMinOperator(CompiledQueryInt query, IntUnaryOperator operator) {
+        this.operator = idx -> operator.applyAsInt(query.at(idx));
+    }
    @Override
    public int onAnd(List<? extends CqExpression> parts) {
        int value = parts.getFirst().visit(this);
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
@ -36,6 +36,10 @@ public class SearchQuery {
    @Deprecated // why does this exist?
    private double value = 0;

+    public static SearchQueryBuilder builder(String compiledQuery) {
+        return new SearchQueryBuilder(compiledQuery);
+    }
+
    public SearchQuery() {
        this.compiledQuery = "";
        this.searchTermsInclude = new ArrayList<>();
@ -81,5 +85,45 @@ public class SearchQuery {
        return sb.toString();
    }

+    public static class SearchQueryBuilder {
+        private final String compiledQuery;
+        private List<String> searchTermsInclude = new ArrayList<>();
+        private List<String> searchTermsExclude = new ArrayList<>();
+        private List<String> searchTermsAdvice = new ArrayList<>();
+        private List<String> searchTermsPriority = new ArrayList<>();
+        private List<List<String>> searchTermCoherences = new ArrayList<>();

+        private SearchQueryBuilder(String compiledQuery) {
+            this.compiledQuery = compiledQuery;
+        }
+
+        public SearchQueryBuilder include(String... terms) {
+            searchTermsInclude.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder exclude(String... terms) {
+            searchTermsExclude.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder advice(String... terms) {
+            searchTermsAdvice.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder priority(String... terms) {
+            searchTermsPriority.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder coherences(String... coherences) {
+            searchTermCoherences.add(List.of(coherences));
+            return this;
+        }
+
+        public SearchQuery build() {
+            return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
+        }
+    }
 }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
@ -32,13 +32,11 @@ public class SearchResultItem implements Comparable<SearchResultItem> {

    public SearchResultItem(long combinedId,
                            long encodedDocMetadata,
-                            int htmlFeatures,
-                            boolean hasPrioTerm) {
+                            int htmlFeatures) {
        this.combinedId = combinedId;
        this.encodedDocMetadata = encodedDocMetadata;
        this.keywordScores = new ArrayList<>();
        this.htmlFeatures = htmlFeatures;
-        this.hasPrioTerm = hasPrioTerm;
    }


--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
@ -83,8 +83,10 @@ public class ForwardIndexConverter {
                int ranking = domainRankings.getRanking(domainId);
                long meta = DocumentMetadata.encodeRank(pointer.documentMeta(), ranking);

+                long features = pointer.documentFeatures() | ((long) pointer.documentSize() << 32L);
+
                docFileData.set(entryOffset + ForwardIndexParameters.METADATA_OFFSET, meta);
-                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, pointer.documentFeatures());
+                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, features);
            }

            progress.progress(TaskSteps.FORCE);
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
@ -82,9 +82,19 @@ public class ForwardIndexReader {
        long offset = idxForDoc(docId);
        if (offset < 0) return 0;

-        return (int) data.get(ENTRY_SIZE * offset + FEATURES_OFFSET);
+        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) & 0xFFFF_FFFFL);
    }

+    public int getDocumentSize(long docId) {
+        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
+
+        long offset = idxForDoc(docId);
+        if (offset < 0) return 0;
+
+        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) >>> 32L);
+    }
+
+
    private int idxForDoc(long docId) {
        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";

--- a/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@ -79,6 +79,7 @@ class ForwardIndexConverterTest {
        writer.put(
                new IndexJournalEntryHeader(createId(id, id/20),
                        id%3,
+                        15,
                        (id % 5)),
                new IndexJournalEntryData(
                    new String[]{},
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
@ -17,14 +17,17 @@ import nu.marginalia.model.idx.DocumentMetadata;
 */
 public record IndexJournalEntryHeader(int entrySize,
                                      int documentFeatures,
+                                      int documentSize,
                                      long combinedId,
                                      long documentMeta) {

    public IndexJournalEntryHeader(long combinedId,
                                   int documentFeatures,
+                                   int documentSize,
                                   long documentMeta) {
        this(-1,
                documentFeatures,
+                documentSize,
                combinedId,
                documentMeta);
    }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
@ -28,12 +28,17 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
    public static IndexJournalReadEntry read(DataInputStream inputStream) throws IOException {

        final long sizeBlock = inputStream.readLong();
+        final int entrySize = (int) (sizeBlock >>> 48L);
+        final int docSize = (int) ((sizeBlock >>> 32L) & 0xFFFFL);
+        final int docFeatures = (int) (sizeBlock & 0xFFFF_FFFFL);
        final long docId = inputStream.readLong();
        final long meta = inputStream.readLong();

+
        var header = new IndexJournalEntryHeader(
-                (int) (sizeBlock >>> 32L),
-                (int) (sizeBlock & 0xFFFF_FFFFL),
+                entrySize,
+                docFeatures,
+                docSize,
                docId,
                meta);

@ -57,6 +62,10 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
        return header.documentFeatures();
    }

+    public int documentSize() {
+        return header.documentSize();
+    }
+
    public int domainId() {
        return UrlIdCodec.getDomainId(docId());
    }
@ -88,7 +97,7 @@ class TermDataIterator implements Iterator<IndexJournalEntryTermData> {
    public IndexJournalEntryTermData next() {
        // read the metadata for the term
        long termId = buffer.getLong();
-        long meta = buffer.getLong();
+        long meta = buffer.getShort();

        // read the size of the sequence data
        int size = buffer.get() & 0xFF;
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
@ -13,7 +13,7 @@ public interface IndexJournalReader {
    int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;

    int DOCUMENT_HEADER_SIZE_BYTES = 24;
-    int TERM_HEADER_SIZE_BYTES = 17;
+    int TERM_HEADER_SIZE_BYTES = 11;

    /** Create a reader for a single file. */
    static IndexJournalReader singleFile(Path fileName) throws IOException {
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
@ -97,6 +97,9 @@ class SingleFileJournalPointer implements IndexJournalPointer {
    @Override
    public int documentFeatures() { return entry.documentFeatures(); }

+    @Override
+    public int documentSize() { return entry.documentSize(); }
+
    /** Return an iterator over the terms in the current document.
     *  This iterator is not valid after calling nextDocument().
     */
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
@ -42,6 +42,8 @@ public interface IndexJournalPointer extends Iterable<IndexJournalEntryTermData>
     */
    int documentFeatures();

+    int documentSize();
+
    /** Concatenate a number of journal pointers */
    static IndexJournalPointer concatenate(IndexJournalPointer... pointers) {
        if (pointers.length == 1)
@ -94,6 +96,11 @@ class JoiningJournalPointer implements IndexJournalPointer {
        return pointers[pIndex].documentFeatures();
    }

+    @Override
+    public int documentSize() {
+        return pointers[pIndex].documentSize();
+    }
+
    @NotNull
    @Override
    public Iterator<IndexJournalEntryTermData> iterator() {
@ -146,6 +153,12 @@ class FilteringJournalPointer implements IndexJournalPointer {
        return base.documentFeatures();
    }

+
+    @Override
+    public int documentSize() {
+        return base.documentSize();
+    }
+
    @NotNull
    @Override
    public Iterator<IndexJournalEntryTermData> iterator() {
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
@ -2,7 +2,6 @@ package nu.marginalia.index.journal.writer;

 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
-import nu.marginalia.index.journal.model.IndexJournalEntryTermData;

 import java.io.IOException;

--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
@ -81,12 +81,6 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
    public int put(IndexJournalEntryHeader header,
                   IndexJournalEntryData data)
    {
-        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
-            dataBuffer.flip();
-            compressingStream.compress(dataBuffer);
-            dataBuffer.clear();
-        }
-
        final long[] keywords = data.termIds();
        final long[] metadata = data.metadata();
        final var positions = data.positions();
@ -94,16 +88,30 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
        int recordSize = 0; // document header size is 3 longs
        for (int i = 0; i < keywords.length; i++) {
            // term header size is 2 longs
-            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();
        }

-        dataBuffer.putInt(recordSize);
+        if (recordSize > Short.MAX_VALUE) {
+            // This should never happen, but if it does, we should log it and deal with it in a way that doesn't corrupt the file
+            // (32 KB is *a lot* of data for a single document, larger than the uncompressed HTML of most documents)
+            logger.error("Omitting entry: Record size {} exceeds maximum representable size of {}", recordSize, Short.MAX_VALUE);
+            return 0;
+        }
+
+        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
+            dataBuffer.flip();
+            compressingStream.compress(dataBuffer);
+            dataBuffer.clear();
+        }
+
+        dataBuffer.putShort((short) recordSize);
+        dataBuffer.putShort((short) Math.clamp(0, header.documentSize(), Short.MAX_VALUE));
        dataBuffer.putInt(header.documentFeatures());
        dataBuffer.putLong(header.combinedId());
        dataBuffer.putLong(header.documentMeta());

        for (int i = 0; i < keywords.length; i++) {
-            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();

            if (dataBuffer.capacity() - dataBuffer.position() < requiredSize) {
                dataBuffer.flip();
@ -112,8 +120,8 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
            }

            dataBuffer.putLong(keywords[i]);
-            dataBuffer.putLong(metadata[i]);
-            dataBuffer.put((byte) positions[i].size());
+            dataBuffer.putShort((short) metadata[i]);
+            dataBuffer.put((byte) positions[i].bufferSize());
            dataBuffer.put(positions[i].buffer());
        }

--- a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
@ -1,6 +1,8 @@
 package nu.marginalia.index.journal;

 import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -8,6 +10,11 @@ import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import nu.marginalia.index.journal.reader.IndexJournalReaderPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
+import nu.marginalia.model.EdgeUrl;
+import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.model.idx.DocumentMetadata;
+import nu.marginalia.model.idx.WordFlags;
+import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
@ -18,8 +25,9 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;

 import static org.junit.jupiter.api.Assertions.*;

@ -52,7 +60,7 @@ public class IndexJournalWriterTest {
    public void testSingleFile() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -61,7 +69,7 @@ public class IndexJournalWriterTest {
                                gcs(2, 4, 6),
                        })
                    );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -90,6 +98,7 @@ public class IndexJournalWriterTest {
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
                assertEquals(33, ptr.documentMeta());
+                assertEquals(10, ptr.documentSize());

                iter = ptr.iterator();

@ -116,6 +125,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());

                iter = ptr.iterator();
                // Term 1
@ -147,7 +157,7 @@ public class IndexJournalWriterTest {
    @Test
    public void testMultiFile() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -162,7 +172,7 @@ public class IndexJournalWriterTest {
        }

        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile2)) {
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -191,6 +201,7 @@ public class IndexJournalWriterTest {
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
                assertEquals(33, ptr.documentMeta());
+                assertEquals(10, ptr.documentSize());

                iter = ptr.iterator();

@ -217,6 +228,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());

                iter = ptr.iterator();
                // Term 1
@ -249,7 +261,7 @@ public class IndexJournalWriterTest {
    public void testSingleFileIterTwice() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -277,6 +289,7 @@ public class IndexJournalWriterTest {
                assertTrue(ptr.nextDocument());
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
+                assertEquals(10, ptr.documentSize());
                assertEquals(33, ptr.documentMeta());

                iter = ptr.iterator();
@ -307,7 +320,7 @@ public class IndexJournalWriterTest {
    public void testFiltered() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -316,7 +329,7 @@ public class IndexJournalWriterTest {
                                gcs(2, 4, 6),
                        })
                    );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                            new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -344,6 +357,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());

                iter = ptr.iterator();
                // Term 1
@ -364,4 +378,72 @@ public class IndexJournalWriterTest {
        }
    }

+    @Test
+    public void testIntegrationScenario() throws IOException {
+        Map<Long, Integer> wordMap = new HashMap<>();
+        for (int i = 0; i < 512; i++) {
+            wordMap.put(hasher.hashKeyword(Integer.toString(i)), i);
+        }
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            for (int idc = 1; idc < 512; idc++) {
+                int id = idc;
+                int[] factors = IntStream
+                        .rangeClosed(1, id)
+                        .filter(v -> (id % v) == 0)
+                        .toArray();
+
+                System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
+
+                long fullId = UrlIdCodec.encodeId((32 - (id % 32)), id);
+
+                var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
+
+                String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
+                long[] metadata = new long[factors.length];
+                for (int i = 0; i < factors.length; i++) {
+                    metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+                }
+                GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+                ByteBuffer wa = ByteBuffer.allocate(16);
+                for (int i = 0; i < factors.length; i++) {
+                    positions[i] = GammaCodedSequence.generate(wa, i + 1);
+                }
+
+                writer.put(header, new IndexJournalEntryData(keywords, metadata, positions));
+            }
+        }
+
+        try (var ptr = new IndexJournalReaderSingleFile(tempFile).newPointer()) {
+            while (ptr.nextDocument()) {
+                int ordinal = UrlIdCodec.getDocumentOrdinal(ptr.documentId());
+                System.out.println(ordinal);
+
+                var expectedFactors =
+                        new LongArrayList(IntStream
+                        .rangeClosed(1, ordinal)
+                        .filter(v -> (ordinal % v) == 0)
+                        .mapToObj(Integer::toString)
+                        .mapToLong(hasher::hashKeyword)
+                        .toArray());
+
+                LongList foundIds = new LongArrayList();
+
+                var iter = ptr.iterator();
+                while (iter.hasNext()) {
+                    var termData = iter.next();
+                    foundIds.add(termData.termId());
+                }
+
+                if (!expectedFactors.equals(foundIds)) {
+                    System.out.println("Found: ");
+                    System.out.println(foundIds.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
+                    System.out.println("Expected: ");
+                    System.out.println(expectedFactors.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
+                    fail();
+                }
+                assertEquals(expectedFactors, foundIds);
+            }
+        }
+    }
+
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
@ -3,6 +3,8 @@ package nu.marginalia.index;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.BTreeReader;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.EntrySource;
 import nu.marginalia.index.query.ReverseIndexRejectFilter;
@ -14,9 +16,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import java.io.IOException;
+import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Arrays;
 import java.util.concurrent.Executors;

 public class ReverseIndexReader {
@ -27,9 +29,16 @@ public class ReverseIndexReader {
    private final BTreeReader wordsBTreeReader;
    private final String name;

-    public ReverseIndexReader(String name, Path words, Path documents) throws IOException {
+    private final PositionsFileReader positionsFileReader;
+
+    public ReverseIndexReader(String name,
+                              Path words,
+                              Path documents,
+                              PositionsFileReader positionsFileReader) throws IOException {
        this.name = name;

+        this.positionsFileReader = positionsFileReader;
+
        if (!Files.exists(words) || !Files.exists(documents)) {
            this.words = null;
            this.documents = null;
@ -133,31 +142,29 @@ public class ReverseIndexReader {
                offset);
    }

-    public long[] getTermMeta(long termId, long[] docIds) {
+    public TermData[] getTermData(Arena arena,
+                                  long termId,
+                                  long[] docIds)
+    {
+        var ret = new TermData[docIds.length];
+
        long offset = wordOffset(termId);

        if (offset < 0) {
            // This is likely a bug in the code, but we can't throw an exception here
            logger.debug("Missing offset for word {}", termId);
-            return new long[docIds.length];
+            return ret;
        }

-        assert isUniqueAndSorted(docIds) : "The input array docIds is assumed to be unique and sorted, was " + Arrays.toString(docIds);
-
        var reader = createReaderNew(offset);
-        return reader.queryData(docIds, 1);
-    }

-    private boolean isUniqueAndSorted(long[] ids) {
-        if (ids.length == 0)
-            return true;
+        // Read the size and offset of the position data
+        var offsets = reader.queryData(docIds, 1);

-        for (int i = 1; i < ids.length; i++) {
-            if(ids[i] <= ids[i-1])
-                return false;
+        for (int i = 0; i < docIds.length; i++) {
+            ret[i] = positionsFileReader.getTermData(arena, offsets[i]);
        }
-
-        return true;
+        return ret;
    }

    public void close() {
@ -166,5 +173,14 @@ public class ReverseIndexReader {

        if (words != null)
            words.close();
+
+        if (positionsFileReader != null) {
+            try {
+                positionsFileReader.close();
+            } catch (IOException e) {
+                logger.error("Failed to close positions file reader", e);
+            }
+        }
    }
+
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
@ -1,5 +1,6 @@
 package nu.marginalia.index.construction;

+import nu.marginalia.index.positions.PositionCodec;
 import nu.marginalia.sequence.GammaCodedSequence;

 import java.io.IOException;
@ -38,7 +39,7 @@ public class PositionsFileConstructor implements AutoCloseable {
    /** Add a term to the positions file
     * @param termMeta the term metadata
     * @param positions the positions of the term
-     * @return the offset of the term in the file
+     * @return the offset of the term in the file, with the size of the data in the highest byte
     */
    public long add(byte termMeta, GammaCodedSequence positions) throws IOException {
        synchronized (file) {
@ -53,12 +54,20 @@ public class PositionsFileConstructor implements AutoCloseable {
            workBuffer.put(termMeta);
            workBuffer.put(positionBuffer);

+            long ret = PositionCodec.encode(size, offset);
+
            offset += size;
-            return offset;
+
+            return ret;
        }
    }

    public void close() throws IOException {
+        while (workBuffer.position() < workBuffer.limit()) {
+            workBuffer.flip();
+            channel.write(workBuffer);
+        }
+
        channel.force(false);
        channel.close();
    }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@ -7,7 +7,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;

--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@ -21,12 +21,14 @@ import java.util.concurrent.TimeUnit;
 * the associated ReversePreindexWordSegments data
 */
 public class ReversePreindexDocuments {
+    public final LongArray documents;
+
    private static PositionsFileConstructor positionsFileConstructor;
-    final Path file;
-    public  final LongArray documents;
    private static final int RECORD_SIZE_LONGS = 2;
    private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class);

+    public final Path file;
+
    public ReversePreindexDocuments(LongArray documents, Path file) {
        this.documents = documents;
        this.file = file;
@ -70,22 +72,25 @@ public class ReversePreindexDocuments {

        long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();

-        try (RandomFileAssembler assembly = RandomFileAssembler.create(workDir, fileSizeLongs)) {
+        try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
+             var pointer = reader.newPointer())
+        {

            var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
            offsetMap.defaultReturnValue(0);

-            var pointer = reader.newPointer();
            while (pointer.nextDocument()) {
                long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
                for (var termData : pointer) {
                    long termId = termData.termId();

                    long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
-                    long posOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());
+
+                    // write position data to the positions file and get the offset
+                    long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());

                    assembly.put(offset + 0, rankEncodedId);
-                    assembly.put(offset + 1, posOffset);
+                    assembly.put(offset + 1, encodedPosOffset);
                }
            }

--- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
@ -0,0 +1,25 @@
+package nu.marginalia.index.positions;
+
+/** A utility class for encoding and decoding position data offsets,
+ * the data is encoded by using the highest 16 bits to store the offset,
+ * and the remaining 48 bits to store the size of the data.
+ * <p></p>
+ * This lets us address 256 TB of data, with up to 64 KB of position data for each term,
+ * which is ample headroom for both the size of the data and the number of positions.
+ * */
+public class PositionCodec {
+
+    public static long encode(int length, long offset) {
+        assert decodeSize(offset) == 0 : "Offset must be less than 2^48";
+
+        return (long) length << 48 | offset;
+    }
+
+    public static int decodeSize(long sizeEncodedOffset) {
+        return (int) ((sizeEncodedOffset & 0xFFFF_0000_0000_0000L) >>> 48);
+    }
+    public static long decodeOffset(long sizeEncodedOffset) {
+        return sizeEncodedOffset & 0x0000_FFFF_FFFF_FFFFL;
+    }
+
+}
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
@ -0,0 +1,39 @@
+package nu.marginalia.index.positions;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+public class PositionsFileReader implements AutoCloseable {
+    private final FileChannel positions;
+
+    public PositionsFileReader(Path positionsFile) throws IOException {
+        this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ);
+    }
+
+    /** Get the positions for a term in the index, as pointed out by the encoded offset;
+     * intermediate buffers are allocated from the provided arena allocator. */
+    public TermData getTermData(Arena arena, long sizeEncodedOffset) {
+        int length = PositionCodec.decodeSize(sizeEncodedOffset);
+        long offset = PositionCodec.decodeOffset(sizeEncodedOffset);
+
+        var segment = arena.allocate(length);
+        var buffer = segment.asByteBuffer();
+
+        try {
+            positions.read(buffer, offset);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        return new TermData(buffer);
+    }
+
+    @Override
+    public void close() throws IOException {
+        positions.close();
+    }
+
+}
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
@ -0,0 +1,21 @@
+package nu.marginalia.index.positions;
+
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import java.nio.ByteBuffer;
+
+public class TermData {
+    private final ByteBuffer buffer;
+
+    public TermData(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    public byte flags() {
+        return buffer.get(0);
+    }
+
+    public GammaCodedSequence positions() {
+        return new GammaCodedSequence(buffer, 1, buffer.capacity());
+    }
+}
--- a/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
@ -0,0 +1,63 @@
+package nu.marginalia.index;
+
+import it.unimi.dsi.fastutil.ints.IntList;
+import nu.marginalia.index.construction.PositionsFileConstructor;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.positions.PositionsFileReader;
+import nu.marginalia.sequence.GammaCodedSequence;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class PositionsFileReaderTest {
+
+    Path file;
+
+    @BeforeEach
+    void setUp() throws IOException {
+        file = Files.createTempFile("positions", "dat");
+    }
+    @AfterEach
+    void tearDown() throws IOException {
+        Files.delete(file);
+    }
+
+    @Test
+    void getTermData() throws IOException {
+        ByteBuffer workArea = ByteBuffer.allocate(8192);
+        long key1, key2, key3;
+        try (PositionsFileConstructor constructor = new PositionsFileConstructor(file)) {
+            key1 = constructor.add((byte) 43, GammaCodedSequence.generate(workArea, 1, 2, 3));
+            key2 = constructor.add((byte) 51, GammaCodedSequence.generate(workArea, 2, 3, 5, 1000, 5000, 20241));
+            key3 = constructor.add((byte) 61, GammaCodedSequence.generate(workArea, 3, 5, 7));
+        }
+
+        System.out.println("key1: " + Long.toHexString(key1));
+        System.out.println("key2: " + Long.toHexString(key2));
+        System.out.println("key3: " + Long.toHexString(key3));
+
+        try (Arena arena = Arena.ofConfined();
+            PositionsFileReader reader = new PositionsFileReader(file))
+        {
+            TermData data1 = reader.getTermData(arena, key1);
+            assertEquals(43, data1.flags());
+            assertEquals(IntList.of( 1, 2, 3), data1.positions().values());
+
+            TermData data2 = reader.getTermData(arena, key2);
+            assertEquals(51, data2.flags());
+            assertEquals(IntList.of(2, 3, 5, 1000, 5000, 20241), data2.positions().values());
+
+            TermData data3 = reader.getTermData(arena, key3);
+            assertEquals(61, data3.flags());
+            assertEquals(IntList.of(3, 5, 7), data3.positions().values());
+        }
+    }
+}
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
@ -1,17 +1,19 @@
 package nu.marginalia.index;

+import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.construction.ReversePreindex;
 import nu.marginalia.index.construction.TestJournalFactory;
 import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
+import nu.marginalia.index.positions.PositionsFileReader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;

 import java.io.IOException;
+import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
@ -47,13 +49,18 @@ class ReverseIndexReaderTest {
    public void testSimple() throws IOException {

        var indexReader = createIndex(
-                new EntryDataWithWordMeta(100, 101, wm(50, 51))
+                new EntryDataWithWordMeta(100, 101, wm(50, 51, 1, 3, 5))
        );

        assertEquals(1, indexReader.numDocuments(50));

-        long[] meta = indexReader.getTermMeta(50, new long[] { 100 });
-        assertArrayEquals(new long[] { 51 }, meta);
+        var positions = indexReader.getTermData(Arena.global(), 50, new long[] { 100 });
+
+        assertEquals(1, positions.length);
+        assertNotNull(positions[0]);
+        assertEquals((byte) 51, positions[0].flags());
+        assertEquals(IntList.of(1, 3, 5), positions[0].positions().values());
+
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
    }

@ -69,13 +76,8 @@ class ReverseIndexReaderTest {
        assertEquals(2, indexReader.numDocuments(51));
        assertEquals(1, indexReader.numDocuments(52));

-        assertArrayEquals(new long[] { 51 }, indexReader.getTermMeta(50, new long[] { 100 }));
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
-
-        assertArrayEquals(new long[] { 52, 53 }, indexReader.getTermMeta(51, new long[] { 100, 101 }));
        assertArrayEquals(new long[] { 100, 101 }, readEntries(indexReader, 51));
-
-        assertArrayEquals(new long[] { 54 }, indexReader.getTermMeta(52, new long[] { 101 }));
        assertArrayEquals(new long[] { 101 }, readEntries(indexReader, 52));

    }
@ -91,18 +93,20 @@ class ReverseIndexReaderTest {

    private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
        var reader = journalFactory.createReader(scenario);
-        var preindex = ReversePreindex.constructPreindex(reader,
-                Mockito.mock(PositionsFileConstructor.class),
-                DocIdRewriter.identity(), tempDir);
-

+        Path posFile = tempDir.resolve("positions.dat");
        Path docsFile = tempDir.resolve("docs.dat");
        Path wordsFile = tempDir.resolve("words.dat");

-        preindex.finalizeIndex(docsFile, wordsFile);
-        preindex.delete();
+        try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) {
+            var preindex = ReversePreindex.constructPreindex(reader,
+                    positionsFileConstructor,
+                    DocIdRewriter.identity(), tempDir);
+            preindex.finalizeIndex(docsFile, wordsFile);
+            preindex.delete();
+        }

-        return new ReverseIndexReader("test", wordsFile, docsFile);
+        return new ReverseIndexReader("test", wordsFile, docsFile, new PositionsFileReader(posFile));

    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@ -155,15 +155,15 @@ class ReversePreindexDocsTest {
            if (wordId != that.wordId) return false;
            if (start != that.start) return false;
            if (end != that.end) return false;
-            return Arrays.equals(data, that.data);
+            return data[0] == that.data[0]; //Arrays.equals(data, that.data);
        }

        @Override
        public int hashCode() {
-            int result = (int) (wordId ^ (wordId >>> 32));
-            result = 31 * result + (int) (start ^ (start >>> 32));
-            result = 31 * result + (int) (end ^ (end >>> 32));
-            result = 31 * result + Arrays.hashCode(data);
+            int result = Long.hashCode(wordId);
+            result = 31 * result + Long.hashCode(start);
+            result = 31 * result + Long.hashCode(end);
+            result = 31 * result + Long.hashCode(data[0]);
            return result;
        }

--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
@ -79,9 +79,7 @@ class ReversePreindexFinalizeTest {
        assertEquals(1, wordsHeader.numEntries());

        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
    }


@ -122,9 +120,7 @@ class ReversePreindexFinalizeTest {
        long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);

        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));

        BTreeHeader docsHeader;

@ -133,13 +129,11 @@ class ReversePreindexFinalizeTest {
        assertEquals(1, docsHeader.numEntries());

        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));

        docsHeader = new BTreeHeader(docsArray, offset2);
        System.out.println(docsHeader);
        assertEquals(1, docsHeader.numEntries());

        assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(52, docsArray.get(docsHeader.dataOffsetLongs() + 1));
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
@ -8,11 +8,13 @@ import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.sequence.GammaCodedSequence;

 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;

 public class TestJournalFactory {
    Path tempDir = Files.createTempDirectory("journal");
@ -50,10 +52,10 @@ public class TestJournalFactory {
                    '}';
        }
    }
-    public record WordWithMeta(long wordId, long meta) {}
+    public record WordWithMeta(long wordId, long meta, GammaCodedSequence gcs) {}

-    public static WordWithMeta wm(long wordId, long meta) {
-        return new WordWithMeta(wordId, meta);
+    public static WordWithMeta wm(long wordId, long meta, int... positions) {
+        return new WordWithMeta(wordId, meta, GammaCodedSequence.generate(ByteBuffer.allocate(128), positions));
    }

    IndexJournalReader createReader(EntryData... entries) throws IOException {
@ -71,7 +73,7 @@ public class TestJournalFactory {
                positions[i] = new GammaCodedSequence(new byte[1]);
            }

-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
@ -91,10 +93,10 @@ public class TestJournalFactory {
            for (int i = 0; i < entry.wordIds.length; i++) {
                termIds[i] = entry.wordIds[i].wordId;
                meta[i] = entry.wordIds[i].meta;
-                positions[i] = new GammaCodedSequence(new byte[1]);
+                positions[i] = Objects.requireNonNullElseGet(entry.wordIds[i].gcs, () -> new GammaCodedSequence(new byte[1]));
            }

-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
--- a/code/index/java/nu/marginalia/index/IndexFactory.java
+++ b/code/index/java/nu/marginalia/index/IndexFactory.java
@ -4,11 +4,10 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.index.CombinedIndexReader;
+import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.forward.ForwardIndexReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;

 import java.io.IOException;
 import java.nio.file.Files;
@ -40,17 +39,18 @@ public class IndexFactory {
    }

    public ReverseIndexReader getReverseIndexReader() throws IOException {
-
        return new ReverseIndexReader("full",
                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.CURRENT),
-                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT)
+                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT),
+                new PositionsFileReader(ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.CURRENT))
        );
    }

    public ReverseIndexReader getReverseIndexPrioReader() throws IOException {
        return new ReverseIndexReader("prio",
                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
-                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT)
+                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
+                null
        );
    }

--- a/code/index/java/nu/marginalia/index/IndexGrpcService.java
+++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java
@ -281,10 +281,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
            awaitCompletion();

            // Return the best results
-            return new SearchResultSet(
-                    resultValuator.selectBestResults(parameters,
-                            resultRankingContext,
-                            resultHeap));
+            return new SearchResultSet(resultValuator.selectBestResults(parameters, resultHeap));
        }

        /** Wait for all tasks to complete */
--- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
+++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
@ -14,12 +14,13 @@ import nu.marginalia.index.query.IndexQueryBuilder;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.query.limit.SpecificationLimitType;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import java.lang.foreign.Arena;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collections;
@ -169,8 +170,11 @@ public class CombinedIndexReader {
    }

    /** Retrieves the term metadata for the specified word for the provided documents */
-    public DocMetadataList getMetadata(long wordId, CombinedDocIdList docIds) {
-        return new DocMetadataList(reverseIndexFullReader.getTermMeta(wordId, docIds.array()));
+    public TermMetadataList getTermMetadata(Arena arena,
+                                            long wordId,
+                                            CombinedDocIdList docIds)
+    {
+        return new TermMetadataList(reverseIndexFullReader.getTermData(arena, wordId, docIds.array()));
    }

    /** Retrieves the document metadata for the specified document */
@ -186,8 +190,12 @@ public class CombinedIndexReader {
    /** Retrieves the HTML features for the specified document */
    public int getHtmlFeatures(long docId) {
        return forwardIndexReader.getHtmlFeatures(docId);
+    }    /** Retrieves the HTML features for the specified document */
+    public int getDocumentSize(long docId) {
+        return forwardIndexReader.getDocumentSize(docId);
    }

+
    /** Close the indexes (this is not done immediately)
     * */
    public void close() throws InterruptedException {
--- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
@ -10,12 +10,13 @@ import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchTermsUtil;
 import nu.marginalia.index.results.model.QuerySearchTerms;
 import nu.marginalia.index.results.model.TermCoherenceGroupList;
-import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.index.results.model.ids.TermIdList;

+import java.lang.foreign.Arena;
+
 import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoherenceGroup;
-import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata;

 public class IndexMetadataService {
    private final StatefulIndex statefulIndex;
@ -25,22 +26,19 @@ public class IndexMetadataService {
        this.statefulIndex = index;
    }

-    public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll,
-                                                                          TermIdList termIdsList)
+    public Long2ObjectArrayMap<TermMetadataList>
+        getTermMetadataForDocuments(Arena arena, CombinedDocIdList combinedIdsAll, TermIdList termIdsList)
    {
        var currentIndex = statefulIndex.get();

-        Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta =
+        Long2ObjectArrayMap<TermMetadataList> termdocToMeta =
                new Long2ObjectArrayMap<>(termIdsList.size());

        for (long termId : termIdsList.array()) {
-            var metadata = currentIndex.getMetadata(termId, combinedIdsAll);
-
-            termdocToMeta.put(termId,
-                    new DocumentsWithMetadata(combinedIdsAll, metadata));
+            termdocToMeta.put(termId, currentIndex.getTermMetadata(arena, termId, combinedIdsAll));
        }

-        return new TermMetadataForCombinedDocumentIds(termdocToMeta);
+        return termdocToMeta;
    }

    public QuerySearchTerms getSearchTerms(CompiledQuery<String> compiledQuery, SearchQuery searchQuery) {
--- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
@ -1,25 +1,22 @@
 package nu.marginalia.index.results;

 import nu.marginalia.api.searchquery.model.compiled.*;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.SearchResultItem;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
 import nu.marginalia.index.index.CombinedIndexReader;
 import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchParameters;
-import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.model.QueryParams;
 import nu.marginalia.index.results.model.QuerySearchTerms;
-import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.WordFlags;
-import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.index.query.limit.QueryStrategy;
 import nu.marginalia.ranking.results.ResultValuator;
+import nu.marginalia.sequence.GammaCodedSequence;

 import javax.annotation.Nullable;
-import java.util.List;
+
+import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.*;

 /** This class is responsible for calculating the score of a search result.
 * It holds the data required to perform the scoring, as there is strong
@ -28,94 +25,74 @@ public class IndexResultValuationContext {
    private final CombinedIndexReader index;
    private final QueryParams queryParams;

-    private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds;
-    private final QuerySearchTerms searchTerms;
-
    private final ResultRankingContext rankingContext;
    private final ResultValuator searchResultValuator;
    private final CompiledQuery<String> compiledQuery;
-    private final CompiledQueryLong compiledQueryIds;

-    public IndexResultValuationContext(IndexMetadataService metadataService,
-                                       ResultValuator searchResultValuator,
-                                       CombinedDocIdList ids,
+    public IndexResultValuationContext(ResultValuator searchResultValuator,
                                       StatefulIndex statefulIndex,
                                       ResultRankingContext rankingContext,
-                                       SearchParameters params
-                               ) {
+                                       SearchParameters params)
+    {
        this.index = statefulIndex.get();
        this.rankingContext = rankingContext;
        this.searchResultValuator = searchResultValuator;

        this.queryParams = params.queryParams;
        this.compiledQuery = params.compiledQuery;
-        this.compiledQueryIds = params.compiledQueryIds;
-
-        this.searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
-
-        this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids,
-                searchTerms.termIdsAll);
    }

-    private final long flagsFilterMask =
-            WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();
+    private final long flagsFilterMask = WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();

    @Nullable
-    public SearchResultItem calculatePreliminaryScore(long combinedId) {
+    public SearchResultItem calculatePreliminaryScore(long combinedId,
+                                                      QuerySearchTerms searchTerms,
+                                                      long[] wordFlags,
+                                                      GammaCodedSequence[] positions)
+    {
+
+
+        // FIXME: Reconsider coherence logic with the new position data
+//        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
+//            return null;
+
+        CompiledQuery<GammaCodedSequence> positionsQuery = compiledQuery.root.newQuery(positions);
+        CompiledQueryLong wordFlagsQuery = compiledQuery.root.newQuery(wordFlags);
+        int[] counts = new int[compiledQuery.size()];
+        for (int i = 0; i < counts.length; i++) {
+            if (positions[i] != null) {
+                counts[i] = positions[i].valueCount();
+            }
+        }
+        CompiledQueryInt positionsCountQuery = compiledQuery.root.newQuery(counts);
+
+        // If the document is not relevant to the query, abort early to reduce allocations and
+        // avoid unnecessary calculations
+        if (testRelevance(wordFlagsQuery, positionsCountQuery)) {
+            return null;
+        }
+

        long docId = UrlIdCodec.removeRank(combinedId);
-
-        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
-            return null;
-
        long docMetadata = index.getDocumentMetadata(docId);
        int htmlFeatures = index.getHtmlFeatures(docId);
-
-        SearchResultItem searchResult = new SearchResultItem(docId,
-                docMetadata,
-                htmlFeatures,
-                hasPrioTerm(combinedId));
-
-        long[] wordMetas = new long[compiledQuery.size()];
-        SearchResultKeywordScore[] scores = new SearchResultKeywordScore[compiledQuery.size()];
-
-        for (int i = 0; i < wordMetas.length; i++) {
-            final long termId = compiledQueryIds.at(i);
-            final String term = compiledQuery.at(i);
-
-            wordMetas[i] = termMetadataForCombinedDocumentIds.getTermMetadata(termId, combinedId);
-            scores[i] = new SearchResultKeywordScore(term, termId, wordMetas[i]);
-        }
-
-
-        // DANGER: IndexResultValuatorService assumes that searchResult.keywordScores has this specific order, as it needs
-        // to be able to re-construct its own CompiledQuery<SearchResultKeywordScore> for re-ranking the results.  This is
-        // a very flimsy assumption.
-        searchResult.keywordScores.addAll(List.of(scores));
-
-        CompiledQueryLong wordMetasQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
-
-
-        boolean allSynthetic = CompiledQueryAggregates.booleanAggregate(wordMetasQuery, WordFlags.Synthetic::isPresent);
-        int flagsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(wordMeta & flagsFilterMask));
-        int positionsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(WordMetadata.decodePositions(wordMeta)));
-
-        if (!meetsQueryStrategyRequirements(wordMetasQuery, queryParams.queryStrategy())) {
-            return null;
-        }
-
-        if (flagsCount == 0 && !allSynthetic && positionsCount == 0)
-            return null;
+        int docSize = index.getDocumentSize(docId);

        double score = searchResultValuator.calculateSearchResultValue(
-                wordMetasQuery,
+                wordFlagsQuery,
+                positionsCountQuery,
+                positionsQuery,
                docMetadata,
                htmlFeatures,
-                5000, // use a dummy value here as it's not present in the index
+                docSize,
                rankingContext,
                null);

-        if (searchResult.hasPrioTerm) {
+        SearchResultItem searchResult = new SearchResultItem(docId,
+                docMetadata,
+                htmlFeatures);
+
+        if (hasPrioTerm(searchTerms, positions)) {
            score = 0.75 * score;
        }

@ -124,13 +101,32 @@ public class IndexResultValuationContext {
        return searchResult;
    }

-    private boolean hasPrioTerm(long combinedId) {
-        for (var term : searchTerms.termIdsPrio.array()) {
-            if (termMetadataForCombinedDocumentIds.hasTermMeta(term, combinedId)) {
+    private boolean testRelevance(CompiledQueryLong wordFlagsQuery, CompiledQueryInt countsQuery) {
+        boolean allSynthetic = booleanAggregate(wordFlagsQuery, WordFlags.Synthetic::isPresent);
+        int flagsCount = intMaxMinAggregate(wordFlagsQuery, flags ->  Long.bitCount(flags & flagsFilterMask));
+        int positionsCount = intMaxMinAggregate(countsQuery, p -> p);
+
+        if (!meetsQueryStrategyRequirements(wordFlagsQuery, queryParams.queryStrategy())) {
+            return true;
+        }
+        if (flagsCount == 0 && !allSynthetic && positionsCount == 0) {
+            return true;
+        }
+
+        return false;
+    }
+
+    private boolean hasPrioTerm(QuerySearchTerms searchTerms, GammaCodedSequence[] positions) {
+        var allTerms = searchTerms.termIdsAll;
+        var prioTerms = searchTerms.termIdsPrio;
+
+        for (int i = 0; i < allTerms.size(); i++) {
+            if (positions[i] != null && prioTerms.contains(allTerms.at(i))) {
                return true;
            }
        }
-        return  false;
+
+        return false;
    }

    private boolean meetsQueryStrategyRequirements(CompiledQueryLong queryGraphScores,
@ -142,7 +138,7 @@ public class IndexResultValuationContext {
            return true;
        }

-        return CompiledQueryAggregates.booleanAggregate(queryGraphScores,
+        return booleanAggregate(queryGraphScores,
                docs -> meetsQueryStrategyRequirements(docs, queryParams.queryStrategy()));
    }

--- a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
@ -7,8 +7,6 @@ import gnu.trove.list.array.TLongArrayList;
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
-import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
@ -21,12 +19,13 @@ import nu.marginalia.linkdb.docs.DocumentDbReader;
 import nu.marginalia.linkdb.model.DocdbUrlDetail;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.ranking.results.ResultValuator;
+import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import java.lang.foreign.Arena;
 import java.sql.SQLException;
 import java.util.*;
-import java.util.function.Consumer;

@Singleton
 public class IndexResultValuatorService {
@ -53,35 +52,53 @@ public class IndexResultValuatorService {
                                              ResultRankingContext rankingContext,
                                              CombinedDocIdList resultIds)
    {
-        final var evaluator = createValuationContext(params, rankingContext, resultIds);
+        IndexResultValuationContext evaluator =
+                new IndexResultValuationContext(resultValuator, statefulIndex, rankingContext, params);

        List<SearchResultItem> results = new ArrayList<>(resultIds.size());

-        for (long id : resultIds.array()) {
-            var score = evaluator.calculatePreliminaryScore(id);
-            if (score != null) {
-                results.add(score);
+        try (var arena = Arena.ofConfined()) {
+            // Batch-fetch the word metadata for the documents
+
+            var searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
+            var termsForDocs = metadataService.getTermMetadataForDocuments(arena, resultIds, searchTerms.termIdsAll);
+
+            // Prepare data for the document.  We do this outside of the calculation function to avoid
+            // hash lookups in the inner loop, as it's very hot code and we don't want thrashing in there;
+            // out here we can rely on implicit array ordering to match up the data.
+
+            var ra = resultIds.array();
+            long[] flags = new long[searchTerms.termIdsAll.size()];
+            GammaCodedSequence[] positions = new GammaCodedSequence[searchTerms.termIdsAll.size()];
+
+            for (int i = 0; i < ra.length; i++) {
+                long id = ra[i];
+
+                // Prepare term-level data for the document
+                for (int ti = 0; ti < flags.length; ti++) {
+                    long tid = searchTerms.termIdsAll.at(ti);
+                    var tfd = termsForDocs.get(tid);
+
+                    assert tfd != null : "No term data for term " + ti;
+
+                    flags[ti] = tfd.flag(i);
+                    positions[ti] = tfd.position(i);
+                }
+
+                // Calculate the preliminary score
+
+                var score = evaluator.calculatePreliminaryScore(id, searchTerms, flags, positions);
+                if (score != null) {
+                    results.add(score);
+                }
            }
+
+            return results;
        }
-
-        return results;
-    }
-
-    private IndexResultValuationContext createValuationContext(SearchParameters params,
-                                                               ResultRankingContext rankingContext,
-                                                               CombinedDocIdList resultIds)
-    {
-        return new IndexResultValuationContext(metadataService,
-                resultValuator,
-                resultIds,
-                statefulIndex,
-                rankingContext,
-                params);
    }


    public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params,
-                                                     ResultRankingContext rankingContext,
                                                     Collection<SearchResultItem> results) throws SQLException {

        var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain);
@ -101,14 +118,13 @@ public class IndexResultValuatorService {
            item.resultsFromDomain = domainCountFilter.getCount(item);
        }

-        return decorateAndRerank(resultsList, params.compiledQuery, rankingContext);
+        return decorateResults(resultsList, params.compiledQuery);
    }

    /** Decorate the result items with additional information from the link database
     * and calculate an updated ranking with the additional information */
-    public List<DecoratedSearchResultItem> decorateAndRerank(List<SearchResultItem> rawResults,
-                                                             CompiledQuery<String> compiledQuery,
-                                                             ResultRankingContext rankingContext)
+    public List<DecoratedSearchResultItem> decorateResults(List<SearchResultItem> rawResults,
+                                                           CompiledQuery<String> compiledQuery)
            throws SQLException
    {
        TLongList idsList = new TLongArrayList(rawResults.size());
@ -131,42 +147,18 @@ public class IndexResultValuatorService {
                continue;
            }

-            // Reconstruct the compiledquery for re-valuation
-            //
-            // CAVEAT:  This hinges on a very fragile that IndexResultValuationContext puts them in the same
-            // order as the data for the CompiledQuery<String>.
-            long[] wordMetas = new long[compiledQuery.size()];
-
-            for (int i = 0; i < compiledQuery.size(); i++) {
-                var score = result.keywordScores.get(i);
-                wordMetas[i] = score.encodedWordMetadata();
-            }
-
-            CompiledQueryLong metaQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
-
            resultItems.add(createCombinedItem(
                    result,
-                    docData,
-                    metaQuery,
-                    rankingContext));
+                    docData));
        }
        return resultItems;
    }

    private DecoratedSearchResultItem createCombinedItem(SearchResultItem result,
-                                                         DocdbUrlDetail docData,
-                                                         CompiledQueryLong wordMetas,
-                                                         ResultRankingContext rankingContext) {
+                                                         DocdbUrlDetail docData) {

        ResultRankingDetailsExtractor detailsExtractor = new ResultRankingDetailsExtractor();
-        Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
-
-        double score = resultValuator.calculateSearchResultValue(wordMetas,
-                result.encodedDocMetadata,
-                result.htmlFeatures,
-                docData.wordsTotal(),
-                rankingContext,
-                detailConsumer);
+       //  Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;

        return new DecoratedSearchResultItem(
                result,
@ -179,8 +171,8 @@ public class IndexResultValuatorService {
                docData.pubYear(),
                docData.dataHash(),
                docData.wordsTotal(),
-                bestPositions(wordMetas),
-                score,
+                0L, //bestPositions(wordMetas),
+                result.getScore(),
                detailsExtractor.get()
        );
    }
--- a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
+++ b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
@ -1,26 +1,38 @@
 package nu.marginalia.index.results.model;

-import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap;
+import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
+import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import javax.annotation.Nullable;

 public class TermMetadataForCombinedDocumentIds {
-    private static final Logger logger = LoggerFactory.getLogger(TermMetadataForCombinedDocumentIds.class);
    private final Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta;

    public TermMetadataForCombinedDocumentIds(Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta) {
        this.termdocToMeta = termdocToMeta;
    }

-    public long getTermMetadata(long termId, long combinedId) {
+    public byte getTermMetadata(long termId, long combinedId) {
        var metaByCombinedId = termdocToMeta.get(termId);
        if (metaByCombinedId == null) {
            return 0;
        }
-        return metaByCombinedId.get(combinedId);
+        return metaByCombinedId.get(combinedId).flags();
+    }
+
+    @Nullable
+    public GammaCodedSequence getPositions(long termId, long combinedId) {
+        var metaByCombinedId = termdocToMeta.get(termId);
+
+        if (metaByCombinedId == null) {
+            return null;
+        }
+
+        return metaByCombinedId.get(combinedId).positions();
    }

    public boolean hasTermMeta(long termId, long combinedId) {
@ -30,16 +42,25 @@ public class TermMetadataForCombinedDocumentIds {
            return false;
        }

-        return metaByCombinedId.get(combinedId) != 0;
+        return metaByCombinedId.data().containsKey(combinedId);
    }

-    public record DocumentsWithMetadata(Long2LongOpenHashMap data) {
-        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, DocMetadataList metadata) {
-            this(new Long2LongOpenHashMap(combinedDocIdsAll.array(), metadata.array()));
+    public record DocumentsWithMetadata(Long2ObjectOpenHashMap<TermData> data) {
+        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, TermMetadataList metadata) {
+            this(new Long2ObjectOpenHashMap<>(combinedDocIdsAll.size()));
+
+            long[] ids = combinedDocIdsAll.array();
+            TermData[] data = metadata.array();
+
+            for (int i = 0; i < combinedDocIdsAll.size(); i++) {
+                if (data[i] != null) {
+                    this.data.put(ids[i], data[i]);
+                }
+            }
        }

-        public long get(long combinedId) {
-            return data.getOrDefault(combinedId, 0);
+        public TermData get(long combinedId) {
+            return data.get(combinedId);
        }
    }
 }
--- a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
@ -15,6 +15,10 @@ import java.util.stream.LongStream;
 public final class CombinedDocIdList {
    private final long[] data;

+    public CombinedDocIdList(long... data) {
+        this.data = Arrays.copyOf(data, data.length);
+    }
+
    public CombinedDocIdList(LongArrayList data) {
        this.data = data.toLongArray();
    }
--- a/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
@ -1,45 +0,0 @@
-package nu.marginalia.index.results.model.ids;
-
-import it.unimi.dsi.fastutil.longs.LongArrayList;
-
-import java.util.Arrays;
-import java.util.Objects;
-import java.util.stream.LongStream;
-
-public final class DocMetadataList {
-    private final long[] array;
-
-    public DocMetadataList(long[] array) {
-        this.array = array;
-    }
-
-    public DocMetadataList(LongArrayList list) {
-        this(list.toLongArray());
-    }
-
-    public int size() {
-        return array.length;
-    }
-
-    public LongStream stream() {
-        return LongStream.of(array);
-    }
-
-    public long[] array() {
-        return array;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (obj == this) return true;
-        if (obj == null || obj.getClass() != this.getClass()) return false;
-        var that = (DocMetadataList) obj;
-        return Arrays.equals(this.array, that.array);
-    }
-
-    @Override
-    public int hashCode() {
-        return Arrays.hashCode(array);
-    }
-
-}
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
@ -11,6 +11,7 @@ public final class TermIdList {

    public TermIdList(long[] array) {
        this.array = array;
+        Arrays.sort(this.array);
    }

    public TermIdList(LongArrayList list) {
@ -29,6 +30,15 @@ public final class TermIdList {
        return array;
    }

+    public long at(int i) {
+        return array[i];
+    }
+
+    public boolean contains(long id) {
+        // Implicitly sorted
+        return Arrays.binarySearch(array, id) >= 0;
+    }
+
    @Override
    public boolean equals(Object obj) {
        if (obj == this) return true;
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
@ -0,0 +1,55 @@
+package nu.marginalia.index.results.model.ids;
+
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public final class TermMetadataList {
+    private final TermData[] array;
+
+    public TermMetadataList(TermData[] array) {
+        this.array = array;
+    }
+
+    public int size() {
+        return array.length;
+    }
+
+    public long flag(int i) {
+        if (array[i] == null)
+            return 0;
+
+        return array[i].flags();
+    }
+
+    /** Returns the position data for the given document index,
+     * may be null if the term is not in the document
+     */
+    @Nullable
+    public GammaCodedSequence position(int i) {
+        if (array[i] == null)
+            return null;
+
+        return array[i].positions();
+    }
+
+    public TermData[] array() {
+        return array;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == this) return true;
+        if (obj == null || obj.getClass() != this.getClass()) return false;
+        var that = (TermMetadataList) obj;
+        return Arrays.equals(this.array, that.array);
+    }
+
+    @Override
+    public int hashCode() {
+        return Arrays.hashCode(array);
+    }
+
+}
--- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
+++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
@ -1,5 +1,7 @@
 package nu.marginalia.ranking.results;

+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
@ -14,6 +16,7 @@ import nu.marginalia.ranking.results.factors.*;

 import com.google.inject.Inject;
 import com.google.inject.Singleton;
+import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -33,15 +36,15 @@ public class ResultValuator {
        this.termCoherenceFactor = termCoherenceFactor;
    }

-    public double calculateSearchResultValue(CompiledQueryLong wordMeta,
-                                             long documentMetadata,
+    public double calculateSearchResultValue(CompiledQueryLong wordFlagsQuery,
+                                             CompiledQueryInt positionsCountQuery, CompiledQuery<GammaCodedSequence> positionsQuery, long documentMetadata,
                                             int features,
                                             int length,
                                             ResultRankingContext ctx,
                                             @Nullable Consumer<ResultRankingDetails> detailsConsumer
                                             )
    {
-        if (wordMeta.isEmpty())
+        if (wordFlagsQuery.isEmpty())
            return Double.MAX_VALUE;

        if (length < 0) {
@ -82,12 +85,11 @@ public class ResultValuator {
                           + temporalBias
                           + flagsPenalty;

-        double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
-        double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
+        // FIXME: need a weighting factor here
+        double tcfAvgDist = 25. / termCoherenceFactor.calculateAvgMinDistance(positionsQuery, ctx);

-        double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
-        double bM25N = rankingParams.bm25NgramWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
-        double bM25P = rankingParams.bm25PrioWeight * wordMeta.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordMeta.data, ctx));
+        double bM25F = rankingParams.bm25FullWeight * wordFlagsQuery.root.visit(new Bm25FullGraphVisitor(rankingParams.fullParams, positionsCountQuery.data, length, ctx));
+        double bM25P = rankingParams.bm25PrioWeight * wordFlagsQuery.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordFlagsQuery.data, ctx));

        double overallPartPositive = Math.max(0, overallPart);
        double overallPartNegative = -Math.min(0, overallPart);
@ -112,10 +114,10 @@ public class ResultValuator {
                            temporalBias,
                            flagsPenalty,
                            overallPart,
-                            tcfOverlap,
-                            tcfJaccard,
+                            0,
+                            0,
                            bM25F,
-                            bM25N,
+                            0, // FIXME: Remove from model
                            bM25P)
            );

@ -125,8 +127,8 @@ public class ResultValuator {
        // Renormalize to 0...15, where 0 is the best possible score;
        // this is a historical artifact of the original ranking function
        double ret = normalize(
-                      tcfOverlap + tcfJaccard
-                      + bM25F + bM25P + bM25N
+                      tcfAvgDist
+                      + bM25F + bM25P
                      + overallPartPositive,
                overallPartNegative);

--- a/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
@ -13,7 +13,7 @@ import java.util.List;
 public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
    private static final long AVG_LENGTH = 5000;

-    private final CqDataLong wordMetaData;
+    private final CqDataInt counts;
    private final CqDataInt frequencies;
    private final Bm25Parameters bm25Parameters;

@ -22,31 +22,16 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {

    private final BitSet mask;

-    private Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
-                                CqDataLong wordMetaData,
+    public Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
+                                CqDataInt counts,
                                int length,
-                                BitSet mask,
                                ResultRankingContext ctx) {
        this.length = length;
        this.bm25Parameters = bm25Parameters;
        this.docCount = ctx.termFreqDocCount();
-        this.wordMetaData = wordMetaData;
+        this.counts = counts;
        this.frequencies = ctx.fullCounts;
-        this.mask = mask;
-    }
-
-    public static Bm25FullGraphVisitor forRegular(Bm25Parameters bm25Parameters,
-                                                  CqDataLong wordMetaData,
-                                                  int length,
-                                                  ResultRankingContext ctx) {
-        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.regularMask, ctx);
-    }
-
-    public static Bm25FullGraphVisitor forNgrams(Bm25Parameters bm25Parameters,
-                                                 CqDataLong wordMetaData,
-                                                 int length,
-                                                 ResultRankingContext ctx) {
-        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.ngramsMask, ctx);
+        this.mask = ctx.regularMask;
    }

    @Override
@ -73,7 +58,7 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
            return 0;
        }

-        double count = Long.bitCount(WordMetadata.decodePositions(wordMetaData.get(idx)));
+        double count = counts.get(idx);

        int freq = frequencies.get(idx);

--- a/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
@ -1,66 +1,44 @@
 package nu.marginalia.ranking.results.factors;

-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.sequence.GammaCodedSequence;
+import nu.marginalia.sequence.SequenceOperations;

 /** Rewards documents where terms appear frequently within the same sentences
 */
 public class TermCoherenceFactor {

-    /** Calculate a factor that rewards the best total position overlap
-     * between the terms in the query.  This is high when all the terms
-     * found in the same sentences.
-     */
-    public double calculateOverlap(CompiledQueryLong wordMetadataQuery) {
-        if (wordMetadataQuery.size() < 2)
-            return 0;
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(wordMetadataQuery,
-                score -> score >>> WordMetadata.POSITIONS_SHIFT);
-
-        return bitsSetFactor(mask);
-    }
-
-    /** Calculate a factor that rewards the best average mutual Jaccard index
-     * between the terms in the query.  This is high when the several terms are frequently
-     * found in the same sentences.
-     */
-    public double calculateAvgMutualJaccard(CompiledQueryLong wordMetadataQuery, ResultRankingContext ctx) {
+    public double calculateAvgMinDistance(CompiledQuery<GammaCodedSequence> positions, ResultRankingContext ctx) {
        double sum = 0;
        int cnt = 0;

-        for (int i = 0; i < wordMetadataQuery.size(); i++) {
+        for (int i = 0; i < positions.size(); i++) {

            // Skip terms that are not in the regular mask
            if (!ctx.regularMask.get(i))
                continue;

-            long imask = WordMetadata.decodePositions(wordMetadataQuery.at(i));
+            var posi = positions.at(i);

            // Skip terms that are not in the document
-            if (imask == 0L)
+            if (posi == null)
                continue;

-            for (int j = i + 1; j < wordMetadataQuery.size(); j++) {
+            for (int j = i + 1; j < positions.size(); j++) {

                // Skip terms that are not in the regular mask
                if (!ctx.regularMask.get(j))
                    continue;

-                long jmask = WordMetadata.decodePositions(wordMetadataQuery.at(j));
+                var posj = positions.at(j);

                // Skip terms that are not in the document
-                if (jmask == 0L)
+                if (posj == null)
                    continue;

-                long quot = Long.bitCount(imask & jmask);
-                long rem = Long.bitCount(imask | jmask);
-
-                // rem is always > 0 because imask and jmask are not both 0
-
-                sum += quot/(double) rem;
+                int distance = SequenceOperations.minDistance(posi.iterator(), posj.iterator());
+                sum += distance;
                cnt++;
            }
        }
@ -68,15 +46,8 @@ public class TermCoherenceFactor {
        if (cnt > 0) {
            return sum / cnt;
        } else {
-            return 0;
+            return 1000.;
        }
    }

-    double bitsSetFactor(long mask) {
-        final int bitsSetInMask = Long.bitCount(mask);
-
-        return Math.pow(bitsSetInMask/(double) WordMetadata.POSITIONS_COUNT, 0.25);
-    }
-
-
 }
--- a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
+++ b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
@ -0,0 +1,382 @@
+package nu.marginalia.index;
+
+import com.google.inject.Guice;
+import com.google.inject.Inject;
+import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+import nu.marginalia.IndexLocations;
+import nu.marginalia.array.page.LongQueryBuffer;
+import nu.marginalia.hash.MurmurHash3_128;
+import nu.marginalia.index.construction.DocIdRewriter;
+import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.domainrankings.DomainRankings;
+import nu.marginalia.index.forward.ForwardIndexConverter;
+import nu.marginalia.index.forward.ForwardIndexFileNames;
+import nu.marginalia.index.index.CombinedIndexReader;
+import nu.marginalia.index.index.StatefulIndex;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+import nu.marginalia.index.journal.writer.IndexJournalWriter;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.results.model.ids.CombinedDocIdList;
+import nu.marginalia.linkdb.docs.DocumentDbReader;
+import nu.marginalia.linkdb.docs.DocumentDbWriter;
+import nu.marginalia.linkdb.model.DocdbUrlDetail;
+import nu.marginalia.model.EdgeUrl;
+import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.model.idx.DocumentFlags;
+import nu.marginalia.model.idx.DocumentMetadata;
+import nu.marginalia.model.idx.WordFlags;
+import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.process.control.FakeProcessHeartbeat;
+import nu.marginalia.process.control.ProcessHeartbeat;
+import nu.marginalia.sequence.GammaCodedSequence;
+import nu.marginalia.service.control.ServiceHeartbeat;
+import nu.marginalia.service.server.Initialization;
+import nu.marginalia.storage.FileStorageService;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.parallel.Execution;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+import java.util.*;
+
+import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
+
+@Execution(SAME_THREAD)
+public class CombinedIndexReaderTest {
+
+    @Inject
+    Initialization initialization;
+
+    IndexQueryServiceIntegrationTestModule testModule;
+
+    @Inject
+    StatefulIndex statefulIndex;
+
+    @Inject
+    IndexJournalWriter indexJournalWriter;
+
+    @Inject
+    FileStorageService fileStorageService;
+
+    @Inject
+    DomainRankings domainRankings;
+
+    @Inject
+    ProcessHeartbeat processHeartbeat;
+    @Inject
+    DocumentDbReader documentDbReader;
+
+    @Inject
+    IndexFactory indexFactory;
+
+    @BeforeEach
+    public void setUp() throws IOException {
+
+        testModule = new IndexQueryServiceIntegrationTestModule();
+        Guice.createInjector(testModule).injectMembers(this);
+
+        initialization.setReady();
+    }
+
+    @AfterEach
+    public void tearDown() throws IOException {
+        testModule.cleanUp();
+    }
+
+    private final MockDocumentMeta anyMetadata = new MockDocumentMeta(0, new DocumentMetadata(2, 0, 14, EnumSet.noneOf(DocumentFlags.class)));
+
+    @Test
+    public void testSimpleRetrieval() throws Exception {
+        new MockData().add(
+                d(1, 1),
+                anyMetadata,
+                w("hello", WordFlags.Title, 33, 55),
+                w("world", WordFlags.Subjects, 34)
+        ).load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader.findFullWord(kw("hello")).build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(1, 1)),
+                decode(buffer)
+        );
+
+        var helloMeta = td(reader, kw("hello"), d(1, 1));
+        assertEquals(helloMeta.flags(), WordFlags.Title.asBit());
+        assertEquals(IntList.of(33, 55), helloMeta.positions().values());
+
+        var worldMeta = td(reader, kw("world"), d(1, 1));
+        assertEquals(worldMeta.flags(), WordFlags.Subjects.asBit());
+        assertEquals(IntList.of(34), worldMeta.positions().values());
+    }
+
+    TermData td(CombinedIndexReader reader, long wordId, MockDataDocument docId) {
+        return (reader.getTermMetadata(Arena.global(), wordId, new CombinedDocIdList(docId.docId())).array())[0];
+    }
+
+
+    @Test
+    public void testUnionRetrieval() throws Exception {
+        new MockData()
+                .add(
+                        d(1, 1),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 2),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 3),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(2, 4),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader
+                .findFullWord(kw("hello"))
+                .also(kw("world"))
+                .build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(1, 1), d(2, 4)),
+                decode(buffer)
+        );
+    }
+
+    @Test
+    public void testNotFilterRetrieval() throws Exception {
+        new MockData()
+                .add(
+                        d(1, 1),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title),
+                        w("goodbye", WordFlags.Title)
+                )
+                .add(
+                        d(1, 2),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 3),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(2, 4),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader.findFullWord(kw("hello"))
+                .also(kw("world"))
+                .not(kw("goodbye"))
+                .build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(2, 4)),
+                decode(buffer)
+        );
+    }
+
+    List<MockDataDocument> decode(LongQueryBuffer buffer) {
+        List<MockDataDocument> result = new ArrayList<>();
+        for (int i = 0; i < buffer.size(); i++) {
+            result.add(new MockDataDocument(buffer.data.get(i)));
+        }
+        return result;
+    }
+
+    private MockDataDocument d(int domainId, int ordinal) {
+        return new MockDataDocument(domainId, ordinal);
+    }
+
+    private void constructIndex() throws IOException {
+        createForwardIndex();
+        createFullReverseIndex();
+        createPrioReverseIndex();
+    }
+
+    private void createFullReverseIndex() throws IOException {
+
+        Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
+        Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
+        Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
+
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path tmpDir = workDir.resolve("tmp");
+
+        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
+
+        var constructor =
+                new ReverseIndexConstructor(
+                    outputFileDocs,
+                    outputFileWords,
+                    outputFilePositions,
+                    IndexJournalReader::singleFile,
+                    DocIdRewriter.identity(),
+                    tmpDir);
+        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
+    }
+
+    private void createPrioReverseIndex() throws IOException {
+
+        Path outputFileDocs = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path outputFileWords = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path outputFilePositions = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.POSITIONS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path tmpDir = workDir.resolve("tmp");
+
+        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
+
+        var constructor = new ReverseIndexConstructor(
+                outputFileDocs,
+                outputFileWords,
+                outputFilePositions,
+                IndexJournalReader::singleFile,
+                DocIdRewriter.identity(),
+                tmpDir);
+
+        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
+    }
+
+    private void createForwardIndex() throws IOException {
+
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
+        Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
+
+        ForwardIndexConverter converter = new ForwardIndexConverter(processHeartbeat,
+                IndexJournalReader.paging(workDir),
+                outputFileDocsId,
+                outputFileDocsData,
+                domainRankings
+        );
+
+        converter.convert();
+    }
+
+    MurmurHash3_128 hasher = new MurmurHash3_128();
+
+    long kw(String s) {
+        return hasher.hashKeyword(s);
+    }
+
+    class MockData {
+        private final Map<Long, List<MockDataKeyword>> allData = new HashMap<>();
+        private final Map<Long, MockDocumentMeta> metaByDoc = new HashMap<>();
+
+        public MockData add(MockDataDocument document,
+                        MockDocumentMeta meta,
+                        MockDataKeyword... words)
+        {
+            long id = UrlIdCodec.encodeId(document.domainId, document.ordinal);
+
+            allData.computeIfAbsent(id, l -> new ArrayList<>()).addAll(List.of(words));
+            metaByDoc.put(id, meta);
+
+            return this;
+        }
+
+        void load() throws IOException, SQLException, URISyntaxException {
+            allData.forEach((doc, words) -> {
+
+                var meta = metaByDoc.get(doc);
+
+                var header = new IndexJournalEntryHeader(
+                        doc,
+                        meta.features,
+                        100,
+                        meta.documentMetadata.encode()
+                );
+
+                String[] keywords = words.stream().map(w -> w.keyword).toArray(String[]::new);
+                long[] metadata = words.stream().map(w -> w.termMetadata).mapToLong(Long::longValue).toArray();
+                var positions = words.stream().map(w -> w.positions).map(pos -> GammaCodedSequence.generate(ByteBuffer.allocate(1024), pos.toIntArray())).toArray(GammaCodedSequence[]::new);
+
+                indexJournalWriter.put(header,
+                        new IndexJournalEntryData(keywords, metadata, positions));
+            });
+
+            var linkdbWriter = new DocumentDbWriter(
+                    IndexLocations.getLinkdbLivePath(fileStorageService).resolve(DOCDB_FILE_NAME)
+            );
+            for (Long key : allData.keySet()) {
+                linkdbWriter.add(new DocdbUrlDetail(
+                        key,
+                        new EdgeUrl("https://www.example.com"),
+                        "test",
+                        "test",
+                        0.,
+                        "HTML5",
+                        0,
+                        null,
+                        0,
+                        5
+                ));
+            }
+            linkdbWriter.close();
+
+            indexJournalWriter.close();
+            constructIndex();
+            documentDbReader.reconnect();
+            statefulIndex.switchIndex();
+        }
+    }
+
+    record MockDataDocument(int domainId, int ordinal) {
+        public MockDataDocument(long encodedId) {
+            this(UrlIdCodec.getDomainId(encodedId), UrlIdCodec.getDocumentOrdinal(encodedId));
+        }
+
+        public long docId() {
+            return UrlIdCodec.encodeId(domainId, ordinal);
+        }
+
+    }
+    record MockDocumentMeta(int features, DocumentMetadata documentMetadata) {}
+    record MockDataKeyword(String keyword, long termMetadata, IntList positions) {}
+
+    MockDataKeyword w(String keyword, WordFlags flags, int... positions) {
+        return new MockDataKeyword(keyword, new WordMetadata(0L, EnumSet.of(flags)).encode(), IntList.of(positions));
+
+    }
+}
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
@ -13,7 +13,6 @@ import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
@ -142,6 +141,53 @@ public class IndexQueryServiceIntegrationSmokeTest {
        Assertions.assertArrayEquals(ids, actual);
    }

+    @Test
+    public void testSimple() throws Exception {
+        var linkdbWriter = new DocumentDbWriter(
+                IndexLocations.getLinkdbLivePath(fileStorageService)
+                        .resolve(DOCDB_FILE_NAME)
+        );
+        for (int i = 1; i < 512; i++) {
+            loadData(linkdbWriter, i);
+        }
+        linkdbWriter.close();
+        documentDbReader.reconnect();
+
+        indexJournalWriter.close();
+        constructIndex();
+        statefulIndex.switchIndex();
+
+        var rsp = queryService.justQuery(
+                SearchSpecification.builder()
+                        .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000))
+                        .queryStrategy(QueryStrategy.SENTENCE)
+                        .year(SpecificationLimit.none())
+                        .quality(SpecificationLimit.none())
+                        .size(SpecificationLimit.none())
+                        .rank(SpecificationLimit.none())
+                        .rankingParams(ResultRankingParameters.sensibleDefaults())
+                        .domains(new ArrayList<>())
+                        .searchSetIdentifier("NONE")
+                        .query(
+                                SearchQuery.builder("2")
+                                .include("2")
+                                .build()
+                        ).build()
+        );
+
+        int[] idxes = new int[] { 62, 222, 382, 60, 124, 220, 284, 380, 444, 122 };
+        long[] ids = IntStream.of(idxes).mapToLong(Long::valueOf).toArray();
+        long[] actual = rsp.results
+                .stream()
+                .mapToLong(i -> i.rawIndexResult.getDocumentId())
+                .map(UrlIdCodec::getDocumentOrdinal)
+                .toArray();
+
+        System.out.println(Arrays.toString(actual));
+        System.out.println(Arrays.toString(ids));
+        Assertions.assertArrayEquals(ids, actual);
+    }
+
    @Test
    public void testDomainQuery() throws Exception {

@ -297,7 +343,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
        return UrlIdCodec.encodeId((32 - (id % 32)), id);
    }

-    MurmurHash3_128 hasher = new MurmurHash3_128();
    @SneakyThrows
    public void loadData(DocumentDbWriter ldbw, int id) {
        int[] factors = IntStream
@ -305,22 +350,44 @@ public class IndexQueryServiceIntegrationSmokeTest {
                .filter(v -> (id % v) == 0)
                .toArray();

+        System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
+
        long fullId = fullId(id);

-        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
-
-        long[] data = new long[factors.length * 2];
-        for (int i = 0; i < factors.length; i++) {
-            data[2 * i] = hasher.hashNearlyASCII(Integer.toString(factors[i]));
-            data[2 * i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
-        }
+        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());

        ldbw.add(new DocdbUrlDetail(
                fullId, new EdgeUrl("https://www.example.com/"+id),
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));

-        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
+        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
+        long[] metadata = new long[factors.length];
+        for (int i = 0; i < factors.length; i++) {
+            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+        }
+        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+        ByteBuffer wa = ByteBuffer.allocate(32);
+        for (int i = 0; i < factors.length; i++) {
+            positions[i] = GammaCodedSequence.generate(wa, factors);
+        }
+
+        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
+    }
+
+    @SneakyThrows
+    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
+        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
+        long fullId = UrlIdCodec.encodeId(domain, id);
+        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, DocumentMetadata.defaultValue());
+
+        ldbw.add(new DocdbUrlDetail(
+                fullId, new EdgeUrl("https://www.example.com/"+id),
+                "test", "test", 0., "HTML5", 0, null, 0, 10
+        ));
+
+
+        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
        long[] metadata = new long[factors.length];
        for (int i = 0; i < factors.length; i++) {
            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
@ -334,30 +401,4 @@ public class IndexQueryServiceIntegrationSmokeTest {
        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }

-    @SneakyThrows
-    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
-        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
-        long fullId = UrlIdCodec.encodeId(domain, id);
-        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, DocumentMetadata.defaultValue());
-
-        ldbw.add(new DocdbUrlDetail(
-                fullId, new EdgeUrl("https://www.example.com/"+id),
-                "test", "test", 0., "HTML5", 0, null, 0, 10
-        ));
-
-
-        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
-        long[] metadata = new long[factors.length];
-        for (int i = 0; i < factors.length; i++) {
-            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
-        }
-        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
-        ByteBuffer wa = ByteBuffer.allocate(16);
-        for (int i = 0; i < factors.length; i++) {
-            positions[i] = GammaCodedSequence.generate(wa, i);
-        }
-
-        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
-    }
-
 }
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
@ -565,6 +565,7 @@ public class IndexQueryServiceIntegrationTest {
                var header = new IndexJournalEntryHeader(
                        doc,
                        meta.features,
+                        100,
                        meta.documentMetadata.encode()
                );

--- a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
+++ b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
@ -1,100 +0,0 @@
-package nu.marginalia.ranking.results;
-
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
-import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
-import nu.marginalia.model.idx.DocumentFlags;
-import nu.marginalia.model.idx.WordFlags;
-import nu.marginalia.model.crawl.PubDate;
-import nu.marginalia.model.idx.DocumentMetadata;
-import nu.marginalia.model.idx.WordMetadata;
-import nu.marginalia.ranking.results.factors.*;
-import nu.marginalia.term_frequency_dict.TermFrequencyDict;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
-
-import java.util.*;
-
-import static org.mockito.Mockito.when;
-
-class ResultValuatorTest {
-
-    TermFrequencyDict dict;
-    ResultValuator valuator;
-
-    @BeforeEach
-    public void setUp() {
-
-        dict = Mockito.mock(TermFrequencyDict.class);
-        when(dict.docCount()).thenReturn(100_000);
-
-        valuator = new ResultValuator(
-                new TermCoherenceFactor()
-        );
-
-    }
-
-    CqDataInt frequencyData = new CqDataInt(new int[] { 10 });
-
-    CompiledQueryLong titleOnlyLowCountSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);
-
-    CompiledQueryLong highCountNoTitleSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
-
-    CompiledQueryLong highCountSubjectSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
-
-
-    @Test
-    void evaluateTerms() {
-
-        when(dict.getTermFreq("bob")).thenReturn(10);
-        ResultRankingContext context = new ResultRankingContext(100000,
-                ResultRankingParameters.sensibleDefaults(),
-                new BitSet(),
-                new BitSet(),
-                frequencyData,
-                frequencyData);
-
-        long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class));
-        int features = 0;
-
-        double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
-        double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
-        double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet,  docMeta, features, 10_000, context, null);
-        double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null);
-
-        System.out.println(titleOnlyLowCount);
-        System.out.println(titleLongOnlyLowCount);
-        System.out.println(highCountNoTitle);
-        System.out.println(highCountSubject);
-    }
-
-    private long docMetadata(int topology,
-                             int year,
-                             int quality,
-                             EnumSet<DocumentFlags> flags) {
-        return new DocumentMetadata(topology, PubDate.toYearByte(year), quality, flags).encode();
-    }
-
-    private long wordMetadata(Set<Integer> positions, Set<WordFlags> wordFlags) {
-        long posBits = positions.stream()
-                .mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL))
-                .reduce((a,b) -> a|b)
-                .orElse(0L);
-
-        return new WordMetadata(posBits, wordFlags).encode();
-    }
-
-}
--- a/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
+++ b/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
@ -1,107 +0,0 @@
-package nu.marginalia.ranking.results.factors;
-
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
-import nu.marginalia.bbpc.BrailleBlockPunchCards;
-import nu.marginalia.model.idx.WordMetadata;
-import org.junit.jupiter.api.Test;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-class TermCoherenceFactorTest {
-
-    TermCoherenceFactor termCoherenceFactor = new TermCoherenceFactor();
-    @Test
-    public void testAllBitsSet() {
-        var allPositionsSet = createSet(
-                ~0L,
-                ~0L
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(
-                allPositionsSet,
-                SearchResultKeywordScore::positions
-        );
-
-        assertEquals(1.0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
-
-        assertEquals(1.0,
-                termCoherenceFactor.calculateOverlap(
-                        allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)
-                )
-        );
-
-    }
-
-    @Test
-    public void testNoBitsSet() {
-        var allPositionsSet = createSet(
-                0, 0
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(allPositionsSet, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-
-        assertEquals(0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
-
-        assertEquals(0, termCoherenceFactor.calculateOverlap(allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)));
-    }
-
-    @Test @SuppressWarnings("unchecked")
-    public void testLowPosMatches() {
-        var positions = createSet(
-                List.of(0, 1, 2, 3), List.of(0, 1, 2, 3)
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-        printMask(mask);
-
-    }
-
-    @Test @SuppressWarnings("unchecked")
-    public void testHiPosMatches() {
-        var positions = createSet(
-                List.of(55, 54, 53, 52), List.of(55, 54, 53, 52)
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-        printMask(mask);
-    }
-
-    @Test
-    public void testBitMatchScaling() {
-        for (int i = 1; i < 48; i++) {
-            System.out.println(i + ":" + termCoherenceFactor.bitsSetFactor((1L << i) - 1));
-        }
-    }
-
-    void printMask(long mask) {
-        System.out.println(BrailleBlockPunchCards.printBits(mask, 48));
-    }
-
-    CompiledQuery<SearchResultKeywordScore> createSet(List<Integer>... maskPositions) {
-        long[] positions = new long[maskPositions.length];
-
-        for (int i = 0; i < maskPositions.length; i++) {
-            for (long pos : maskPositions[i]) {
-                positions[i] |= (1L<<pos);
-            }
-        }
-
-        return createSet(positions);
-    }
-
-    CompiledQuery<SearchResultKeywordScore> createSet(long... positionMasks) {
-        List<SearchResultKeywordScore> keywords = new ArrayList<>();
-
-        for (int i = 0; i < positionMasks.length; i++) {
-            keywords.add(new SearchResultKeywordScore("", 0,
-                    new WordMetadata(positionMasks[i] & WordMetadata.POSITIONS_MASK, (byte) 0).encode()));
-        }
-
-        return CompiledQuery.just(keywords.toArray(SearchResultKeywordScore[]::new));
-    }
-}
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
@ -17,12 +17,13 @@ public class EliasGammaCodec implements IntIterator {

    private final BitReader reader;
    int rem = 0;
-    private int last = 0;
+    private int last;
    private int next = 0;

-    private EliasGammaCodec(ByteBuffer buffer) {
+    private EliasGammaCodec(ByteBuffer buffer, int zero) {
        reader = new BitReader(buffer);

+        last = zero;
        int bits = reader.takeWhileZero();

        if (!reader.hasMore()) {
@ -33,9 +34,24 @@ public class EliasGammaCodec implements IntIterator {
        }
    }

+    public static int readCount(ByteBuffer buffer) {
+        var reader = new BitReader(buffer);
+
+        if (reader.getCurrentValue() > 0) {
+            int bits = reader.takeWhileZero();
+            return reader.get(bits);
+        }
+        else {
+            return 0;
+        }
+    }
+
    /** Decode a sequence of integers from a ByteBuffer using the Elias Gamma code */
    public static IntIterator decode(ByteBuffer buffer) {
-        return new EliasGammaCodec(buffer);
+        return new EliasGammaCodec(buffer, 0);
+    }
+    public static IntIterator decodeWithOffset(ByteBuffer buffer, int offset) {
+        return new EliasGammaCodec(buffer, offset);
    }

    /** Encode a sequence of integers into a ByteBuffer using the Elias Gamma code.
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
@ -16,6 +16,7 @@ import java.util.StringJoiner;
 * */
 public class GammaCodedSequence implements BinarySerializable, Iterable<Integer> {
    private final ByteBuffer raw;
+
    int startPos = 0;
    int startLimit = 0;

@ -43,6 +44,12 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        startLimit = bytes.limit();
    }

+    public GammaCodedSequence(ByteBuffer bytes, int startPos, int startLimit) {
+        this.raw = bytes;
+        this.startPos = startPos;
+        this.startLimit = startLimit;
+    }
+
    public GammaCodedSequence(byte[] bytes) {
        raw = ByteBuffer.allocate(bytes.length);
        raw.put(bytes);
@ -72,6 +79,18 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return EliasGammaCodec.decode(raw);
    }

+    /** Return an iterator over the sequence with a constant offset applied to each value.
+     * This is useful for comparing sequences with different offsets, and adds zero
+     * extra cost to the decoding process which is already based on adding
+     * relative differences.
+     * */
+    public IntIterator offsetIterator(int offset) {
+        raw.position(startPos);
+        raw.limit(startLimit);
+
+        return EliasGammaCodec.decodeWithOffset(raw, offset);
+    }
+
    public IntList values() {
        var intItr = iterator();
        IntArrayList ret = new IntArrayList(8);
@ -81,18 +100,6 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return ret;
    }

-    /** Decode the sequence into an IntList;
-     * this is a somewhat slow operation,
-     * iterating over the data directly more performant */
-    public IntList decode() {
-        IntArrayList ret = new IntArrayList(8);
-        var iter = iterator();
-        while (iter.hasNext()) {
-            ret.add(iter.nextInt());
-        }
-        return ret;
-    }
-
    public int hashCode() {
        return raw.hashCode();
    }
@ -116,7 +123,11 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return raw;
    }

-    public int size() {
+    public int bufferSize() {
        return raw.capacity();
    }
+
+    public int valueCount() {
+        return EliasGammaCodec.readCount(buffer());
+    }
 }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
@ -0,0 +1,86 @@
+package nu.marginalia.sequence;
+
+import it.unimi.dsi.fastutil.ints.IntIterator;
+
+public class SequenceOperations {
+
+    /** Return true if the sequences intersect, false otherwise.
+     * */
+    public static boolean intersectSequences(IntIterator... sequences) {
+
+        if (sequences.length <= 1)
+            return true;
+
+        // Initialize values and find the maximum value
+        int[] values = new int[sequences.length];
+
+        for (int i = 0; i < sequences.length; i++) {
+            if (sequences[i].hasNext())
+                values[i] = sequences[i].nextInt();
+            else
+                return false;
+        }
+
+        // Intersect the sequences by advancing all values smaller than the maximum seen so far
+        // until they are equal to the maximum value, or until the end of the sequence is reached
+        int max = Integer.MIN_VALUE;
+        int successes = 0;
+        for (int i = 0; successes < sequences.length; i = (i + 1) % sequences.length)
+        {
+            if (values[i] == max) {
+                successes++;
+            } else {
+                successes = 0;
+
+                // Discard values until we reach the maximum value seen so far,
+                // or until the end of the sequence is reached
+                while (values[i] < max) {
+                    if (sequences[i].hasNext())
+                        values[i] = sequences[i].nextInt();
+                    else
+                        return false;
+                }
+
+                // Update the maximum value, if necessary
+                max = Math.max(max, values[i]);
+            }
+        }
+
+        return true;
+    }
+
+    /** Return the minimum word distance between two sequences, or a negative value if either sequence is empty.
+     * */
+    public static int minDistance(IntIterator seqA, IntIterator seqB)
+    {
+        int minDistance = Integer.MAX_VALUE;
+
+        if (!seqA.hasNext() || !seqB.hasNext())
+            return -1;
+
+        int a = seqA.nextInt();
+        int b = seqB.nextInt();
+
+        while (true) {
+            int distance = Math.abs(a - b);
+            if (distance < minDistance)
+                minDistance = distance;
+
+            if (a <= b) {
+                if (seqA.hasNext()) {
+                    a = seqA.nextInt();
+                } else {
+                    break;
+                }
+            } else {
+                if (seqB.hasNext()) {
+                    b = seqB.nextInt();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        return minDistance;
+    }
+}
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
@ -20,6 +20,10 @@ public class BitReader {
        this.currentValue = 0;
    }

+    public long getCurrentValue() {
+        return currentValue;
+    }
+
    /** Read the next bit from the buffer */
    public boolean getBit() {
        if (bitPosition <= 0) {
--- a/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
+++ b/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
@ -0,0 +1,75 @@
+package nu.marginalia.sequence;
+
+import it.unimi.dsi.fastutil.ints.IntIterator;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class SequenceOperationsTest {
+
+    @Test
+    void intersectSequencesSingle() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator()));
+    }
+
+    @Test
+    void intersectSequencesTrivialMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 1);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesTrivialMismatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2);
+
+        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesOffsetMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 3);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.offsetIterator(0), seq2.offsetIterator(-2)));
+    }
+
+    @Test
+    void intersectSequencesDeepMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesDeepMatch3() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
+        GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 1, 5, 8, 9);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator(), seq3.iterator()));
+    }
+
+    @Test
+    void intersectSequencesDeepMismatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 14);
+
+        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+}
--- a/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
+++ b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
@ -26,6 +26,8 @@ public class DocumentRecordKeywordsProjection {
    public int htmlFeatures;
    public long documentMetadata;

+    public int length;
+
    public List<String> words;
    public TLongList metas;
    public List<GammaCodedSequence> positions;
@ -39,13 +41,14 @@ public class DocumentRecordKeywordsProjection {
    }

    public static Collection<String> requiredColumns() {
-        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata");
+        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata", "length");
    }

    @SneakyThrows
    public DocumentRecordKeywordsProjection add(String heading, Object value) {
        switch (heading) {
            case "domain" -> domain = (String) value;
+            case "length" -> length = (Integer) value;
            case "ordinal" -> ordinal = (Integer) value;
            case "htmlFeatures" -> htmlFeatures = (Integer) value;
            case "documentMetadata" -> documentMetadata = (Long) value;
--- a/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
@ -6,12 +6,10 @@ import lombok.SneakyThrows;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.keyword.model.DocumentKeywords;
-import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -41,18 +39,11 @@ public class LoaderIndexJournalWriter {
        indexWriter = new IndexJournalWriterPagingImpl(indexArea);
    }

-    public void putWords(long combinedId,
-                         int features,
-                         DocumentMetadata metadata,
-                         DocumentKeywords wordSet) {
-
-        putWords(combinedId, features, metadata.encode(), wordSet);
-    }
-
    @SneakyThrows
    public void putWords(long combinedId,
                         int features,
                         long metadata,
+                         int length,
                         DocumentKeywords wordSet) {

        if (wordSet.isEmpty()) {
@ -65,7 +56,7 @@ public class LoaderIndexJournalWriter {
            return;
        }

-        var header = new IndexJournalEntryHeader(combinedId, features, metadata);
+        var header = new IndexJournalEntryHeader(combinedId, features, length, metadata);
        var data = new IndexJournalEntryData(wordSet.keywords, wordSet.metadata, wordSet.positions);

        indexWriter.put(header, data);
--- a/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
@ -75,6 +75,7 @@ public class KeywordLoaderService {
        writer.putWords(combinedId,
                projection.htmlFeatures,
                projection.documentMetadata,
+                projection.length,
                words);
    }
 }
--- a/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
+++ b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
@ -91,7 +91,7 @@ public class SearchServicePaperDoll extends AbstractModule {
            long positions)
    {
        results.add(new DecoratedSearchResultItem(
-                new SearchResultItem(url.hashCode(), 2, 3, false),
+                new SearchResultItem(url.hashCode(), 2, 3),
                new EdgeUrl(url),
                title,
                description,