From 36160988e29ec0fb05ef6dfe4c34243718719f60 Mon Sep 17 00:00:00 2001
From: Viktor Lofgren <vlofgren@marginalia.nu>
Date: Mon, 10 Jun 2024 15:09:06 +0200
Subject: [PATCH] (index) Integrate positions data with indexes WIP

This change integrates the new positions data with the forward and reverse indexes.

The ranking code is still only partially re-written.
---
 .../model/compiled/CompiledQueryInt.java      |   6 +-
 .../model/compiled/CompiledQueryParser.java   |   3 +-
 .../model/compiled/CqExpression.java          |  12 +
 .../aggregate/CompiledQueryAggregates.java    |   6 +-
 .../aggregate/CqIntMaxMinOperator.java        |   5 +-
 .../searchquery/model/query/SearchQuery.java  |  44 ++
 .../model/results/SearchResultItem.java       |   4 +-
 .../index/forward/ForwardIndexConverter.java  |   4 +-
 .../index/forward/ForwardIndexReader.java     |  12 +-
 .../forward/ForwardIndexConverterTest.java    |   1 +
 .../model/IndexJournalEntryHeader.java        |   3 +
 .../journal/reader/IndexJournalReadEntry.java |  15 +-
 .../journal/reader/IndexJournalReader.java    |   2 +-
 .../reader/IndexJournalReaderSingleFile.java  |   3 +
 .../reader/pointer/IndexJournalPointer.java   |  13 +
 .../journal/writer/IndexJournalWriter.java    |   1 -
 .../IndexJournalWriterSingleFileImpl.java     |  30 +-
 .../index/journal/IndexJournalWriterTest.java | 100 ++++-
 .../marginalia/index/ReverseIndexReader.java  |  48 ++-
 .../PositionsFileConstructor.java             |  13 +-
 .../construction/ReverseIndexConstructor.java |   1 -
 .../ReversePreindexDocuments.java             |  17 +-
 .../index/positions/PositionCodec.java        |  25 ++
 .../index/positions/PositionsFileReader.java  |  39 ++
 .../marginalia/index/positions/TermData.java  |  21 +
 .../index/PositionsFileReaderTest.java        |  63 +++
 .../index/ReverseIndexReaderTest.java         |  36 +-
 .../construction/ReversePreindexDocsTest.java |  10 +-
 .../ReversePreindexFinalizeTest.java          |   6 -
 .../construction/TestJournalFactory.java      |  14 +-
 .../nu/marginalia/index/IndexFactory.java     |  10 +-
 .../nu/marginalia/index/IndexGrpcService.java |   5 +-
 .../index/index/CombinedIndexReader.java      |  14 +-
 .../index/results/IndexMetadataService.java   |  18 +-
 .../results/IndexResultValuationContext.java  | 140 ++++---
 .../results/IndexResultValuatorService.java   | 104 +++--
 .../TermMetadataForCombinedDocumentIds.java   |  47 ++-
 .../results/model/ids/CombinedDocIdList.java  |   4 +
 .../results/model/ids/DocMetadataList.java    |  45 ---
 .../index/results/model/ids/TermIdList.java   |  10 +
 .../results/model/ids/TermMetadataList.java   |  55 +++
 .../ranking/results/ResultValuator.java       |  28 +-
 .../results/factors/Bm25FullGraphVisitor.java |  27 +-
 .../results/factors/TermCoherenceFactor.java  |  55 +--
 .../index/CombinedIndexReaderTest.java        | 382 ++++++++++++++++++
 ...IndexQueryServiceIntegrationSmokeTest.java | 113 ++++--
 .../IndexQueryServiceIntegrationTest.java     |   1 +
 .../ranking/results/ResultValuatorTest.java   | 100 -----
 .../factors/TermCoherenceFactorTest.java      | 107 -----
 .../marginalia/sequence/EliasGammaCodec.java  |  22 +-
 .../sequence/GammaCodedSequence.java          |  37 +-
 .../sequence/SequenceOperations.java          |  86 ++++
 .../nu/marginalia/sequence/io/BitReader.java  |   4 +
 .../sequence/SequenceOperationsTest.java      |  75 ++++
 .../DocumentRecordKeywordsProjection.java     |   5 +-
 .../loading/LoaderIndexJournalWriter.java     |  13 +-
 .../documents/KeywordLoaderService.java       |   1 +
 .../paperdoll/SearchServicePaperDoll.java     |   2 +-
 58 files changed, 1417 insertions(+), 650 deletions(-)
 create mode 100644 code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
 create mode 100644 code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
 create mode 100644 code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
 create mode 100644 code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
 delete mode 100644 code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
 create mode 100644 code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
 create mode 100644 code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
 delete mode 100644 code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
 delete mode 100644 code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
 create mode 100644 code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
 create mode 100644 code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java

diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
index 9e26c35c..0f80d479 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
@@ -5,8 +5,8 @@ import java.util.stream.IntStream;
 
 /** A compiled index service query */
 public class CompiledQueryInt {
-    private final CqExpression root;
-    private final CqDataInt data;
+    public final CqExpression root;
+    public final CqDataInt data;
 
     public CompiledQueryInt(CqExpression root, CqDataInt data) {
         this.root = root;
@@ -26,7 +26,7 @@ public class CompiledQueryInt {
         return IntStream.range(0, data.size());
     }
 
-    public long at(int index) {
+    public int at(int index) {
         return data.get(index);
     }
 
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
index ae197fb9..ef379e5a 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
@@ -61,7 +61,8 @@ public class CompiledQueryParser {
 
         String[] cqData = new String[wordIds.size()];
         wordIds.forEach((w, i) -> cqData[i] = w);
-        return new CompiledQuery<>(root, new CqData<>(cqData));
+
+        return root.newQuery(cqData);
 
     }
 
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
index e9972526..3f0cca50 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
@@ -8,6 +8,18 @@ import java.util.stream.Stream;
  *
  */
 public sealed interface CqExpression {
+    /**  Create a new query for the provided data using this expression as the root */
+    default <T> CompiledQuery<T> newQuery(T[] data) {
+        return new CompiledQuery<>(this, data);
+    }
+    /**  Create a new query for the provided data using this expression as the root */
+    default CompiledQueryInt newQuery(int[] data) {
+        return new CompiledQueryInt(this, new CqDataInt(data));
+    }
+    /**  Create a new query for the provided data using this expression as the root */
+    default CompiledQueryLong newQuery(long[] data) {
+        return new CompiledQueryLong(this, new CqDataLong(data));
+    }
 
     Stream<Word> stream();
 
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
index 7e8ca8ec..2ca45dca 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
@@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.compiled.aggregate;
 
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 
 import java.util.ArrayList;
@@ -36,7 +37,10 @@ public class CompiledQueryAggregates {
     public static <T> int intMaxMinAggregate(CompiledQuery<T> query, ToIntFunction<T> operator) {
         return query.root.visit(new CqIntMaxMinOperator(query, operator));
     }
-
+    /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
+    public static <T> int intMaxMinAggregate(CompiledQueryInt query, IntUnaryOperator operator) {
+        return query.root.visit(new CqIntMaxMinOperator(query, operator));
+    }
     /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
     public static int intMaxMinAggregate(CompiledQueryLong query, LongToIntFunction operator) {
         return query.root.visit(new CqIntMaxMinOperator(query, operator));
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
index 621dff73..c9712ed4 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
@@ -1,6 +1,7 @@
 package nu.marginalia.api.searchquery.model.compiled.aggregate;
 
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.compiled.CqExpression;
 
@@ -21,7 +22,9 @@ public class CqIntMaxMinOperator implements CqExpression.IntVisitor {
     public CqIntMaxMinOperator(CompiledQueryLong query, LongToIntFunction operator) {
         this.operator = idx -> operator.applyAsInt(query.at(idx));
     }
-
+    public CqIntMaxMinOperator(CompiledQueryInt query, IntUnaryOperator operator) {
+        this.operator = idx -> operator.applyAsInt(query.at(idx));
+    }
     @Override
     public int onAnd(List<? extends CqExpression> parts) {
         int value = parts.getFirst().visit(this);
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
index ffe02868..e33972c3 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
@@ -36,6 +36,10 @@ public class SearchQuery {
     @Deprecated // why does this exist?
     private double value = 0;
 
+    public static SearchQueryBuilder builder(String compiledQuery) {
+        return new SearchQueryBuilder(compiledQuery);
+    }
+
     public SearchQuery() {
         this.compiledQuery = "";
         this.searchTermsInclude = new ArrayList<>();
@@ -81,5 +85,45 @@ public class SearchQuery {
         return sb.toString();
     }
 
+    public static class SearchQueryBuilder {
+        private final String compiledQuery;
+        private List<String> searchTermsInclude = new ArrayList<>();
+        private List<String> searchTermsExclude = new ArrayList<>();
+        private List<String> searchTermsAdvice = new ArrayList<>();
+        private List<String> searchTermsPriority = new ArrayList<>();
+        private List<List<String>> searchTermCoherences = new ArrayList<>();
 
+        private SearchQueryBuilder(String compiledQuery) {
+            this.compiledQuery = compiledQuery;
+        }
+
+        public SearchQueryBuilder include(String... terms) {
+            searchTermsInclude.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder exclude(String... terms) {
+            searchTermsExclude.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder advice(String... terms) {
+            searchTermsAdvice.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder priority(String... terms) {
+            searchTermsPriority.addAll(List.of(terms));
+            return this;
+        }
+
+        public SearchQueryBuilder coherences(String... coherences) {
+            searchTermCoherences.add(List.of(coherences));
+            return this;
+        }
+
+        public SearchQuery build() {
+            return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
+        }
+    }
 }
diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
index ad8b8cb1..f676a954 100644
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
@@ -32,13 +32,11 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
 
     public SearchResultItem(long combinedId,
                             long encodedDocMetadata,
-                            int htmlFeatures,
-                            boolean hasPrioTerm) {
+                            int htmlFeatures) {
         this.combinedId = combinedId;
         this.encodedDocMetadata = encodedDocMetadata;
         this.keywordScores = new ArrayList<>();
         this.htmlFeatures = htmlFeatures;
-        this.hasPrioTerm = hasPrioTerm;
     }
 
 
diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
index 80cf502b..7c3704ba 100644
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
@@ -83,8 +83,10 @@ public class ForwardIndexConverter {
                 int ranking = domainRankings.getRanking(domainId);
                 long meta = DocumentMetadata.encodeRank(pointer.documentMeta(), ranking);
 
+                long features = pointer.documentFeatures() | ((long) pointer.documentSize() << 32L);
+
                 docFileData.set(entryOffset + ForwardIndexParameters.METADATA_OFFSET, meta);
-                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, pointer.documentFeatures());
+                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, features);
             }
 
             progress.progress(TaskSteps.FORCE);
diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
index 5d26de82..f9393b45 100644
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
@@ -82,9 +82,19 @@ public class ForwardIndexReader {
         long offset = idxForDoc(docId);
         if (offset < 0) return 0;
 
-        return (int) data.get(ENTRY_SIZE * offset + FEATURES_OFFSET);
+        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) & 0xFFFF_FFFFL);
     }
 
+    public int getDocumentSize(long docId) {
+        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
+
+        long offset = idxForDoc(docId);
+        if (offset < 0) return 0;
+
+        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) >>> 32L);
+    }
+
+
     private int idxForDoc(long docId) {
         assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
 
diff --git a/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
index b30f549f..5c02f648 100644
--- a/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@@ -79,6 +79,7 @@ class ForwardIndexConverterTest {
         writer.put(
                 new IndexJournalEntryHeader(createId(id, id/20),
                         id%3,
+                        15,
                         (id % 5)),
                 new IndexJournalEntryData(
                     new String[]{},
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
index b0f3d41e..82dc904a 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
@@ -17,14 +17,17 @@ import nu.marginalia.model.idx.DocumentMetadata;
  */
 public record IndexJournalEntryHeader(int entrySize,
                                       int documentFeatures,
+                                      int documentSize,
                                       long combinedId,
                                       long documentMeta) {
 
     public IndexJournalEntryHeader(long combinedId,
                                    int documentFeatures,
+                                   int documentSize,
                                    long documentMeta) {
         this(-1,
                 documentFeatures,
+                documentSize,
                 combinedId,
                 documentMeta);
     }
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
index 0f3a6ff2..aae65e81 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
@@ -28,12 +28,17 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
     public static IndexJournalReadEntry read(DataInputStream inputStream) throws IOException {
 
         final long sizeBlock = inputStream.readLong();
+        final int entrySize = (int) (sizeBlock >>> 48L);
+        final int docSize = (int) ((sizeBlock >>> 32L) & 0xFFFFL);
+        final int docFeatures = (int) (sizeBlock & 0xFFFF_FFFFL);
         final long docId = inputStream.readLong();
         final long meta = inputStream.readLong();
 
+
         var header = new IndexJournalEntryHeader(
-                (int) (sizeBlock >>> 32L),
-                (int) (sizeBlock & 0xFFFF_FFFFL),
+                entrySize,
+                docFeatures,
+                docSize,
                 docId,
                 meta);
 
@@ -57,6 +62,10 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
         return header.documentFeatures();
     }
 
+    public int documentSize() {
+        return header.documentSize();
+    }
+
     public int domainId() {
         return UrlIdCodec.getDomainId(docId());
     }
@@ -88,7 +97,7 @@ class TermDataIterator implements Iterator<IndexJournalEntryTermData> {
     public IndexJournalEntryTermData next() {
         // read the metadata for the term
         long termId = buffer.getLong();
-        long meta = buffer.getLong();
+        long meta = buffer.getShort();
 
         // read the size of the sequence data
         int size = buffer.get() & 0xFF;
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
index 2f57da61..2dd8d0e9 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
@@ -13,7 +13,7 @@ public interface IndexJournalReader {
     int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;
 
     int DOCUMENT_HEADER_SIZE_BYTES = 24;
-    int TERM_HEADER_SIZE_BYTES = 17;
+    int TERM_HEADER_SIZE_BYTES = 11;
 
     /** Create a reader for a single file. */
     static IndexJournalReader singleFile(Path fileName) throws IOException {
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
index 488d0dc6..d820f1e0 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
@@ -97,6 +97,9 @@ class SingleFileJournalPointer implements IndexJournalPointer {
     @Override
     public int documentFeatures() { return entry.documentFeatures(); }
 
+    @Override
+    public int documentSize() { return entry.documentSize(); }
+
     /** Return an iterator over the terms in the current document.
      *  This iterator is not valid after calling nextDocument().
      */
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
index 59e65e27..68d21360 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
@@ -42,6 +42,8 @@ public interface IndexJournalPointer extends Iterable<IndexJournalEntryTermData>
      */
     int documentFeatures();
 
+    int documentSize();
+
     /** Concatenate a number of journal pointers */
     static IndexJournalPointer concatenate(IndexJournalPointer... pointers) {
         if (pointers.length == 1)
@@ -94,6 +96,11 @@ class JoiningJournalPointer implements IndexJournalPointer {
         return pointers[pIndex].documentFeatures();
     }
 
+    @Override
+    public int documentSize() {
+        return pointers[pIndex].documentSize();
+    }
+
     @NotNull
     @Override
     public Iterator<IndexJournalEntryTermData> iterator() {
@@ -146,6 +153,12 @@ class FilteringJournalPointer implements IndexJournalPointer {
         return base.documentFeatures();
     }
 
+
+    @Override
+    public int documentSize() {
+        return base.documentSize();
+    }
+
     @NotNull
     @Override
     public Iterator<IndexJournalEntryTermData> iterator() {
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
index df9b6836..916cf7a6 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
@@ -2,7 +2,6 @@ package nu.marginalia.index.journal.writer;
 
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
-import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 
 import java.io.IOException;
 
diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
index 59999138..e5ddac52 100644
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
@@ -81,12 +81,6 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
     public int put(IndexJournalEntryHeader header,
                    IndexJournalEntryData data)
     {
-        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
-            dataBuffer.flip();
-            compressingStream.compress(dataBuffer);
-            dataBuffer.clear();
-        }
-
         final long[] keywords = data.termIds();
         final long[] metadata = data.metadata();
         final var positions = data.positions();
@@ -94,16 +88,30 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
         int recordSize = 0; // document header size is 3 longs
         for (int i = 0; i < keywords.length; i++) {
             // term header size is 2 longs
-            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();
         }
 
-        dataBuffer.putInt(recordSize);
+        if (recordSize > Short.MAX_VALUE) {
+            // This should never happen, but if it does, we should log it and deal with it in a way that doesn't corrupt the file
+            // (32 KB is *a lot* of data for a single document, larger than the uncompressed HTML of most documents)
+            logger.error("Omitting entry: Record size {} exceeds maximum representable size of {}", recordSize, Short.MAX_VALUE);
+            return 0;
+        }
+
+        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
+            dataBuffer.flip();
+            compressingStream.compress(dataBuffer);
+            dataBuffer.clear();
+        }
+
+        dataBuffer.putShort((short) recordSize);
+        dataBuffer.putShort((short) Math.clamp(0, header.documentSize(), Short.MAX_VALUE));
         dataBuffer.putInt(header.documentFeatures());
         dataBuffer.putLong(header.combinedId());
         dataBuffer.putLong(header.documentMeta());
 
         for (int i = 0; i < keywords.length; i++) {
-            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();
 
             if (dataBuffer.capacity() - dataBuffer.position() < requiredSize) {
                 dataBuffer.flip();
@@ -112,8 +120,8 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
             }
 
             dataBuffer.putLong(keywords[i]);
-            dataBuffer.putLong(metadata[i]);
-            dataBuffer.put((byte) positions[i].size());
+            dataBuffer.putShort((short) metadata[i]);
+            dataBuffer.put((byte) positions[i].bufferSize());
             dataBuffer.put(positions[i].buffer());
         }
 
diff --git a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
index b9cd49c1..84d72af3 100644
--- a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
@@ -1,6 +1,8 @@
 package nu.marginalia.index.journal;
 
 import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@@ -8,6 +10,11 @@ import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import nu.marginalia.index.journal.reader.IndexJournalReaderPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
+import nu.marginalia.model.EdgeUrl;
+import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.model.idx.DocumentMetadata;
+import nu.marginalia.model.idx.WordFlags;
+import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
@@ -18,8 +25,9 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import static org.junit.jupiter.api.Assertions.*;
 
@@ -52,7 +60,7 @@ public class IndexJournalWriterTest {
     public void testSingleFile() {
         try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
             // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{44, 55},
@@ -61,7 +69,7 @@ public class IndexJournalWriterTest {
                                 gcs(2, 4, 6),
                         })
                     );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{45, 56},
@@ -90,6 +98,7 @@ public class IndexJournalWriterTest {
                 assertEquals(11, ptr.documentId());
                 assertEquals(22, ptr.documentFeatures());
                 assertEquals(33, ptr.documentMeta());
+                assertEquals(10, ptr.documentSize());
 
                 iter = ptr.iterator();
 
@@ -116,6 +125,7 @@ public class IndexJournalWriterTest {
                 assertEquals(12, ptr.documentId());
                 assertEquals(23, ptr.documentFeatures());
                 assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());
 
                 iter = ptr.iterator();
                 // Term 1
@@ -147,7 +157,7 @@ public class IndexJournalWriterTest {
     @Test
     public void testMultiFile() {
         try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{44, 55},
@@ -162,7 +172,7 @@ public class IndexJournalWriterTest {
         }
 
         try (var writer = new IndexJournalWriterSingleFileImpl(tempFile2)) {
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{45, 56},
@@ -191,6 +201,7 @@ public class IndexJournalWriterTest {
                 assertEquals(11, ptr.documentId());
                 assertEquals(22, ptr.documentFeatures());
                 assertEquals(33, ptr.documentMeta());
+                assertEquals(10, ptr.documentSize());
 
                 iter = ptr.iterator();
 
@@ -217,6 +228,7 @@ public class IndexJournalWriterTest {
                 assertEquals(12, ptr.documentId());
                 assertEquals(23, ptr.documentFeatures());
                 assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());
 
                 iter = ptr.iterator();
                 // Term 1
@@ -249,7 +261,7 @@ public class IndexJournalWriterTest {
     public void testSingleFileIterTwice() {
         try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
             // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{44, 55},
@@ -277,6 +289,7 @@ public class IndexJournalWriterTest {
                 assertTrue(ptr.nextDocument());
                 assertEquals(11, ptr.documentId());
                 assertEquals(22, ptr.documentFeatures());
+                assertEquals(10, ptr.documentSize());
                 assertEquals(33, ptr.documentMeta());
 
                 iter = ptr.iterator();
@@ -307,7 +320,7 @@ public class IndexJournalWriterTest {
     public void testFiltered() {
         try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
             // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                     new IndexJournalEntryData(
                         new String[]{"word1", "word2"},
                         new long[]{44, 55},
@@ -316,7 +329,7 @@ public class IndexJournalWriterTest {
                                 gcs(2, 4, 6),
                         })
                     );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                     new IndexJournalEntryData(
                             new String[]{"word1", "word2"},
                         new long[]{45, 56},
@@ -344,6 +357,7 @@ public class IndexJournalWriterTest {
                 assertEquals(12, ptr.documentId());
                 assertEquals(23, ptr.documentFeatures());
                 assertEquals(34, ptr.documentMeta());
+                assertEquals(11, ptr.documentSize());
 
                 iter = ptr.iterator();
                 // Term 1
@@ -364,4 +378,72 @@ public class IndexJournalWriterTest {
         }
     }
 
+    @Test
+    public void testIntegrationScenario() throws IOException {
+        Map<Long, Integer> wordMap = new HashMap<>();
+        for (int i = 0; i < 512; i++) {
+            wordMap.put(hasher.hashKeyword(Integer.toString(i)), i);
+        }
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            for (int idc = 1; idc < 512; idc++) {
+                int id = idc;
+                int[] factors = IntStream
+                        .rangeClosed(1, id)
+                        .filter(v -> (id % v) == 0)
+                        .toArray();
+
+                System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
+
+                long fullId = UrlIdCodec.encodeId((32 - (id % 32)), id);
+
+                var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
+
+                String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
+                long[] metadata = new long[factors.length];
+                for (int i = 0; i < factors.length; i++) {
+                    metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+                }
+                GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+                ByteBuffer wa = ByteBuffer.allocate(16);
+                for (int i = 0; i < factors.length; i++) {
+                    positions[i] = GammaCodedSequence.generate(wa, i + 1);
+                }
+
+                writer.put(header, new IndexJournalEntryData(keywords, metadata, positions));
+            }
+        }
+
+        try (var ptr = new IndexJournalReaderSingleFile(tempFile).newPointer()) {
+            while (ptr.nextDocument()) {
+                int ordinal = UrlIdCodec.getDocumentOrdinal(ptr.documentId());
+                System.out.println(ordinal);
+
+                var expectedFactors =
+                        new LongArrayList(IntStream
+                        .rangeClosed(1, ordinal)
+                        .filter(v -> (ordinal % v) == 0)
+                        .mapToObj(Integer::toString)
+                        .mapToLong(hasher::hashKeyword)
+                        .toArray());
+
+                LongList foundIds = new LongArrayList();
+
+                var iter = ptr.iterator();
+                while (iter.hasNext()) {
+                    var termData = iter.next();
+                    foundIds.add(termData.termId());
+                }
+
+                if (!expectedFactors.equals(foundIds)) {
+                    System.out.println("Found: ");
+                    System.out.println(foundIds.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
+                    System.out.println("Expected: ");
+                    System.out.println(expectedFactors.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
+                    fail();
+                }
+                assertEquals(expectedFactors, foundIds);
+            }
+        }
+    }
+
 }
diff --git a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
index 72feb7fd..c7621427 100644
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
@@ -3,6 +3,8 @@ package nu.marginalia.index;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.BTreeReader;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.EntrySource;
 import nu.marginalia.index.query.ReverseIndexRejectFilter;
@@ -14,9 +16,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Arrays;
 import java.util.concurrent.Executors;
 
 public class ReverseIndexReader {
@@ -27,9 +29,16 @@ public class ReverseIndexReader {
     private final BTreeReader wordsBTreeReader;
     private final String name;
 
-    public ReverseIndexReader(String name, Path words, Path documents) throws IOException {
+    private final PositionsFileReader positionsFileReader;
+
+    public ReverseIndexReader(String name,
+                              Path words,
+                              Path documents,
+                              PositionsFileReader positionsFileReader) throws IOException {
         this.name = name;
 
+        this.positionsFileReader = positionsFileReader;
+
         if (!Files.exists(words) || !Files.exists(documents)) {
             this.words = null;
             this.documents = null;
@@ -133,31 +142,29 @@ public class ReverseIndexReader {
                 offset);
     }
 
-    public long[] getTermMeta(long termId, long[] docIds) {
+    public TermData[] getTermData(Arena arena,
+                                  long termId,
+                                  long[] docIds)
+    {
+        var ret = new TermData[docIds.length];
+
         long offset = wordOffset(termId);
 
         if (offset < 0) {
             // This is likely a bug in the code, but we can't throw an exception here
             logger.debug("Missing offset for word {}", termId);
-            return new long[docIds.length];
+            return ret;
         }
 
-        assert isUniqueAndSorted(docIds) : "The input array docIds is assumed to be unique and sorted, was " + Arrays.toString(docIds);
-
         var reader = createReaderNew(offset);
-        return reader.queryData(docIds, 1);
-    }
 
-    private boolean isUniqueAndSorted(long[] ids) {
-        if (ids.length == 0)
-            return true;
+        // Read the size and offset of the position data
+        var offsets = reader.queryData(docIds, 1);
 
-        for (int i = 1; i < ids.length; i++) {
-            if(ids[i] <= ids[i-1])
-                return false;
+        for (int i = 0; i < docIds.length; i++) {
+            ret[i] = positionsFileReader.getTermData(arena, offsets[i]);
         }
-
-        return true;
+        return ret;
     }
 
     public void close() {
@@ -166,5 +173,14 @@ public class ReverseIndexReader {
 
         if (words != null)
             words.close();
+
+        if (positionsFileReader != null) {
+            try {
+                positionsFileReader.close();
+            } catch (IOException e) {
+                logger.error("Failed to close positions file reader", e);
+            }
+        }
     }
+
 }
diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
index 80225e06..9cbd6b14 100644
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
@@ -1,5 +1,6 @@
 package nu.marginalia.index.construction;
 
+import nu.marginalia.index.positions.PositionCodec;
 import nu.marginalia.sequence.GammaCodedSequence;
 
 import java.io.IOException;
@@ -38,7 +39,7 @@ public class PositionsFileConstructor implements AutoCloseable {
     /** Add a term to the positions file
      * @param termMeta the term metadata
      * @param positions the positions of the term
-     * @return the offset of the term in the file
+     * @return the offset of the term in the file, with the size of the data in the highest byte
      */
     public long add(byte termMeta, GammaCodedSequence positions) throws IOException {
         synchronized (file) {
@@ -53,12 +54,20 @@ public class PositionsFileConstructor implements AutoCloseable {
             workBuffer.put(termMeta);
             workBuffer.put(positionBuffer);
 
+            long ret = PositionCodec.encode(size, offset);
+
             offset += size;
-            return offset;
+
+            return ret;
         }
     }
 
     public void close() throws IOException {
+        while (workBuffer.position() < workBuffer.limit()) {
+            workBuffer.flip();
+            channel.write(workBuffer);
+        }
+
         channel.force(false);
         channel.close();
     }
diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
index 8ea5b491..9fa3ed93 100644
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@@ -7,7 +7,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;
 
diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
index aa4fc98e..3f97061a 100644
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@@ -21,12 +21,14 @@ import java.util.concurrent.TimeUnit;
  * the associated ReversePreindexWordSegments data
  */
 public class ReversePreindexDocuments {
+    public final LongArray documents;
+
     private static PositionsFileConstructor positionsFileConstructor;
-    final Path file;
-    public  final LongArray documents;
     private static final int RECORD_SIZE_LONGS = 2;
     private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class);
 
+    public final Path file;
+
     public ReversePreindexDocuments(LongArray documents, Path file) {
         this.documents = documents;
         this.file = file;
@@ -70,22 +72,25 @@ public class ReversePreindexDocuments {
 
         long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
 
-        try (RandomFileAssembler assembly = RandomFileAssembler.create(workDir, fileSizeLongs)) {
+        try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
+             var pointer = reader.newPointer())
+        {
 
             var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
             offsetMap.defaultReturnValue(0);
 
-            var pointer = reader.newPointer();
             while (pointer.nextDocument()) {
                 long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
                 for (var termData : pointer) {
                     long termId = termData.termId();
 
                     long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
-                    long posOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());
+
+                    // write position data to the positions file and get the offset
+                    long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());
 
                     assembly.put(offset + 0, rankEncodedId);
-                    assembly.put(offset + 1, posOffset);
+                    assembly.put(offset + 1, encodedPosOffset);
                 }
             }
 
diff --git a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
new file mode 100644
index 00000000..9df63eec
--- /dev/null
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
@@ -0,0 +1,25 @@
+package nu.marginalia.index.positions;
+
+/** A utility class for encoding and decoding position data offsets,
+ * the data is encoded by using the highest 16 bits to store the offset,
+ * and the remaining 48 bits to store the size of the data.
+ * <p></p>
+ * This lets us address 256 TB of data, with up to 64 KB of position data for each term,
+ * which is ample headroom for both the size of the data and the number of positions.
+ * */
+public class PositionCodec {
+
+    public static long encode(int length, long offset) {
+        assert decodeSize(offset) == 0 : "Offset must be less than 2^48";
+
+        return (long) length << 48 | offset;
+    }
+
+    public static int decodeSize(long sizeEncodedOffset) {
+        return (int) ((sizeEncodedOffset & 0xFFFF_0000_0000_0000L) >>> 48);
+    }
+    public static long decodeOffset(long sizeEncodedOffset) {
+        return sizeEncodedOffset & 0x0000_FFFF_FFFF_FFFFL;
+    }
+
+}
diff --git a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
new file mode 100644
index 00000000..647b205e
--- /dev/null
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
@@ -0,0 +1,39 @@
+package nu.marginalia.index.positions;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+public class PositionsFileReader implements AutoCloseable {
+    private final FileChannel positions;
+
+    public PositionsFileReader(Path positionsFile) throws IOException {
+        this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ);
+    }
+
+    /** Get the positions for a term in the index, as pointed out by the encoded offset;
+     * intermediate buffers are allocated from the provided arena allocator. */
+    public TermData getTermData(Arena arena, long sizeEncodedOffset) {
+        int length = PositionCodec.decodeSize(sizeEncodedOffset);
+        long offset = PositionCodec.decodeOffset(sizeEncodedOffset);
+
+        var segment = arena.allocate(length);
+        var buffer = segment.asByteBuffer();
+
+        try {
+            positions.read(buffer, offset);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        return new TermData(buffer);
+    }
+
+    @Override
+    public void close() throws IOException {
+        positions.close();
+    }
+
+}
diff --git a/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java b/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
new file mode 100644
index 00000000..55458342
--- /dev/null
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
@@ -0,0 +1,21 @@
+package nu.marginalia.index.positions;
+
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import java.nio.ByteBuffer;
+
+public class TermData {
+    private final ByteBuffer buffer;
+
+    public TermData(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    public byte flags() {
+        return buffer.get(0);
+    }
+
+    public GammaCodedSequence positions() {
+        return new GammaCodedSequence(buffer, 1, buffer.capacity());
+    }
+}
diff --git a/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java b/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
new file mode 100644
index 00000000..5dd2be3a
--- /dev/null
+++ b/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
@@ -0,0 +1,63 @@
+package nu.marginalia.index;
+
+import it.unimi.dsi.fastutil.ints.IntList;
+import nu.marginalia.index.construction.PositionsFileConstructor;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.positions.PositionsFileReader;
+import nu.marginalia.sequence.GammaCodedSequence;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class PositionsFileReaderTest {
+
+    Path file;
+
+    @BeforeEach
+    void setUp() throws IOException {
+        file = Files.createTempFile("positions", "dat");
+    }
+    @AfterEach
+    void tearDown() throws IOException {
+        Files.delete(file);
+    }
+
+    @Test
+    void getTermData() throws IOException {
+        ByteBuffer workArea = ByteBuffer.allocate(8192);
+        long key1, key2, key3;
+        try (PositionsFileConstructor constructor = new PositionsFileConstructor(file)) {
+            key1 = constructor.add((byte) 43, GammaCodedSequence.generate(workArea, 1, 2, 3));
+            key2 = constructor.add((byte) 51, GammaCodedSequence.generate(workArea, 2, 3, 5, 1000, 5000, 20241));
+            key3 = constructor.add((byte) 61, GammaCodedSequence.generate(workArea, 3, 5, 7));
+        }
+
+        System.out.println("key1: " + Long.toHexString(key1));
+        System.out.println("key2: " + Long.toHexString(key2));
+        System.out.println("key3: " + Long.toHexString(key3));
+
+        try (Arena arena = Arena.ofConfined();
+            PositionsFileReader reader = new PositionsFileReader(file))
+        {
+            TermData data1 = reader.getTermData(arena, key1);
+            assertEquals(43, data1.flags());
+            assertEquals(IntList.of( 1, 2, 3), data1.positions().values());
+
+            TermData data2 = reader.getTermData(arena, key2);
+            assertEquals(51, data2.flags());
+            assertEquals(IntList.of(2, 3, 5, 1000, 5000, 20241), data2.positions().values());
+
+            TermData data3 = reader.getTermData(arena, key3);
+            assertEquals(61, data3.flags());
+            assertEquals(IntList.of(3, 5, 7), data3.positions().values());
+        }
+    }
+}
\ No newline at end of file
diff --git a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
index 981136ad..2d53dd2e 100644
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
@@ -1,17 +1,19 @@
 package nu.marginalia.index;
 
+import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.construction.ReversePreindex;
 import nu.marginalia.index.construction.TestJournalFactory;
 import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
+import nu.marginalia.index.positions.PositionsFileReader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
 
 import java.io.IOException;
+import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
@@ -47,13 +49,18 @@ class ReverseIndexReaderTest {
     public void testSimple() throws IOException {
 
         var indexReader = createIndex(
-                new EntryDataWithWordMeta(100, 101, wm(50, 51))
+                new EntryDataWithWordMeta(100, 101, wm(50, 51, 1, 3, 5))
         );
 
         assertEquals(1, indexReader.numDocuments(50));
 
-        long[] meta = indexReader.getTermMeta(50, new long[] { 100 });
-        assertArrayEquals(new long[] { 51 }, meta);
+        var positions = indexReader.getTermData(Arena.global(), 50, new long[] { 100 });
+
+        assertEquals(1, positions.length);
+        assertNotNull(positions[0]);
+        assertEquals((byte) 51, positions[0].flags());
+        assertEquals(IntList.of(1, 3, 5), positions[0].positions().values());
+
         assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
     }
 
@@ -69,13 +76,8 @@ class ReverseIndexReaderTest {
         assertEquals(2, indexReader.numDocuments(51));
         assertEquals(1, indexReader.numDocuments(52));
 
-        assertArrayEquals(new long[] { 51 }, indexReader.getTermMeta(50, new long[] { 100 }));
         assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
-
-        assertArrayEquals(new long[] { 52, 53 }, indexReader.getTermMeta(51, new long[] { 100, 101 }));
         assertArrayEquals(new long[] { 100, 101 }, readEntries(indexReader, 51));
-
-        assertArrayEquals(new long[] { 54 }, indexReader.getTermMeta(52, new long[] { 101 }));
         assertArrayEquals(new long[] { 101 }, readEntries(indexReader, 52));
 
     }
@@ -91,18 +93,20 @@ class ReverseIndexReaderTest {
 
     private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
         var reader = journalFactory.createReader(scenario);
-        var preindex = ReversePreindex.constructPreindex(reader,
-                Mockito.mock(PositionsFileConstructor.class),
-                DocIdRewriter.identity(), tempDir);
-
 
+        Path posFile = tempDir.resolve("positions.dat");
         Path docsFile = tempDir.resolve("docs.dat");
         Path wordsFile = tempDir.resolve("words.dat");
 
-        preindex.finalizeIndex(docsFile, wordsFile);
-        preindex.delete();
+        try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) {
+            var preindex = ReversePreindex.constructPreindex(reader,
+                    positionsFileConstructor,
+                    DocIdRewriter.identity(), tempDir);
+            preindex.finalizeIndex(docsFile, wordsFile);
+            preindex.delete();
+        }
 
-        return new ReverseIndexReader("test", wordsFile, docsFile);
+        return new ReverseIndexReader("test", wordsFile, docsFile, new PositionsFileReader(posFile));
 
     }
 }
\ No newline at end of file
diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
index e12dbad6..df378228 100644
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@@ -155,15 +155,15 @@ class ReversePreindexDocsTest {
             if (wordId != that.wordId) return false;
             if (start != that.start) return false;
             if (end != that.end) return false;
-            return Arrays.equals(data, that.data);
+            return data[0] == that.data[0]; //Arrays.equals(data, that.data);
         }
 
         @Override
         public int hashCode() {
-            int result = (int) (wordId ^ (wordId >>> 32));
-            result = 31 * result + (int) (start ^ (start >>> 32));
-            result = 31 * result + (int) (end ^ (end >>> 32));
-            result = 31 * result + Arrays.hashCode(data);
+            int result = Long.hashCode(wordId);
+            result = 31 * result + Long.hashCode(start);
+            result = 31 * result + Long.hashCode(end);
+            result = 31 * result + Long.hashCode(data[0]);
             return result;
         }
 
diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
index d9f3cddc..e10c2c27 100644
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
@@ -79,9 +79,7 @@ class ReversePreindexFinalizeTest {
         assertEquals(1, wordsHeader.numEntries());
 
         assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
         assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
     }
 
 
@@ -122,9 +120,7 @@ class ReversePreindexFinalizeTest {
         long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);
 
         assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
         assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
 
         BTreeHeader docsHeader;
 
@@ -133,13 +129,11 @@ class ReversePreindexFinalizeTest {
         assertEquals(1, docsHeader.numEntries());
 
         assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
 
         docsHeader = new BTreeHeader(docsArray, offset2);
         System.out.println(docsHeader);
         assertEquals(1, docsHeader.numEntries());
 
         assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(52, docsArray.get(docsHeader.dataOffsetLongs() + 1));
     }
 }
\ No newline at end of file
diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
index db262d9f..a4c15305 100644
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
@@ -8,11 +8,13 @@ import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.sequence.GammaCodedSequence;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 public class TestJournalFactory {
     Path tempDir = Files.createTempDirectory("journal");
@@ -50,10 +52,10 @@ public class TestJournalFactory {
                     '}';
         }
     }
-    public record WordWithMeta(long wordId, long meta) {}
+    public record WordWithMeta(long wordId, long meta, GammaCodedSequence gcs) {}
 
-    public static WordWithMeta wm(long wordId, long meta) {
-        return new WordWithMeta(wordId, meta);
+    public static WordWithMeta wm(long wordId, long meta, int... positions) {
+        return new WordWithMeta(wordId, meta, GammaCodedSequence.generate(ByteBuffer.allocate(128), positions));
     }
 
     IndexJournalReader createReader(EntryData... entries) throws IOException {
@@ -71,7 +73,7 @@ public class TestJournalFactory {
                 positions[i] = new GammaCodedSequence(new byte[1]);
             }
 
-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                     new IndexJournalEntryData(termIds, meta, positions));
         }
         writer.close();
@@ -91,10 +93,10 @@ public class TestJournalFactory {
             for (int i = 0; i < entry.wordIds.length; i++) {
                 termIds[i] = entry.wordIds[i].wordId;
                 meta[i] = entry.wordIds[i].meta;
-                positions[i] = new GammaCodedSequence(new byte[1]);
+                positions[i] = Objects.requireNonNullElseGet(entry.wordIds[i].gcs, () -> new GammaCodedSequence(new byte[1]));
             }
 
-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                     new IndexJournalEntryData(termIds, meta, positions));
         }
         writer.close();
diff --git a/code/index/java/nu/marginalia/index/IndexFactory.java b/code/index/java/nu/marginalia/index/IndexFactory.java
index a1d2f5a5..38fed31e 100644
--- a/code/index/java/nu/marginalia/index/IndexFactory.java
+++ b/code/index/java/nu/marginalia/index/IndexFactory.java
@@ -4,11 +4,10 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.index.CombinedIndexReader;
+import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.forward.ForwardIndexReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -40,17 +39,18 @@ public class IndexFactory {
     }
 
     public ReverseIndexReader getReverseIndexReader() throws IOException {
-
         return new ReverseIndexReader("full",
                 ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.CURRENT),
-                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT)
+                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT),
+                new PositionsFileReader(ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.CURRENT))
         );
     }
 
     public ReverseIndexReader getReverseIndexPrioReader() throws IOException {
         return new ReverseIndexReader("prio",
                 ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
-                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT)
+                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
+                null
         );
     }
 
diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java
index 1c430014..ec78890c 100644
--- a/code/index/java/nu/marginalia/index/IndexGrpcService.java
+++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java
@@ -281,10 +281,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
             awaitCompletion();
 
             // Return the best results
-            return new SearchResultSet(
-                    resultValuator.selectBestResults(parameters,
-                            resultRankingContext,
-                            resultHeap));
+            return new SearchResultSet(resultValuator.selectBestResults(parameters, resultHeap));
         }
 
         /** Wait for all tasks to complete */
diff --git a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
index afc52094..5779b526 100644
--- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
+++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
@@ -14,12 +14,13 @@ import nu.marginalia.index.query.IndexQueryBuilder;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.query.limit.SpecificationLimitType;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.lang.foreign.Arena;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -169,8 +170,11 @@ public class CombinedIndexReader {
     }
 
     /** Retrieves the term metadata for the specified word for the provided documents */
-    public DocMetadataList getMetadata(long wordId, CombinedDocIdList docIds) {
-        return new DocMetadataList(reverseIndexFullReader.getTermMeta(wordId, docIds.array()));
+    public TermMetadataList getTermMetadata(Arena arena,
+                                            long wordId,
+                                            CombinedDocIdList docIds)
+    {
+        return new TermMetadataList(reverseIndexFullReader.getTermData(arena, wordId, docIds.array()));
     }
 
     /** Retrieves the document metadata for the specified document */
@@ -186,8 +190,12 @@ public class CombinedIndexReader {
     /** Retrieves the HTML features for the specified document */
     public int getHtmlFeatures(long docId) {
         return forwardIndexReader.getHtmlFeatures(docId);
+    }    /** Retrieves the HTML features for the specified document */
+    public int getDocumentSize(long docId) {
+        return forwardIndexReader.getDocumentSize(docId);
     }
 
+
     /** Close the indexes (this is not done immediately)
      * */
     public void close() throws InterruptedException {
diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
index d068c0f4..4ee34b42 100644
--- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
@@ -10,12 +10,13 @@ import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchTermsUtil;
 import nu.marginalia.index.results.model.QuerySearchTerms;
 import nu.marginalia.index.results.model.TermCoherenceGroupList;
-import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.index.results.model.ids.TermIdList;
 
+import java.lang.foreign.Arena;
+
 import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoherenceGroup;
-import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata;
 
 public class IndexMetadataService {
     private final StatefulIndex statefulIndex;
@@ -25,22 +26,19 @@ public class IndexMetadataService {
         this.statefulIndex = index;
     }
 
-    public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll,
-                                                                          TermIdList termIdsList)
+    public Long2ObjectArrayMap<TermMetadataList>
+        getTermMetadataForDocuments(Arena arena, CombinedDocIdList combinedIdsAll, TermIdList termIdsList)
     {
         var currentIndex = statefulIndex.get();
 
-        Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta =
+        Long2ObjectArrayMap<TermMetadataList> termdocToMeta =
                 new Long2ObjectArrayMap<>(termIdsList.size());
 
         for (long termId : termIdsList.array()) {
-            var metadata = currentIndex.getMetadata(termId, combinedIdsAll);
-
-            termdocToMeta.put(termId,
-                    new DocumentsWithMetadata(combinedIdsAll, metadata));
+            termdocToMeta.put(termId, currentIndex.getTermMetadata(arena, termId, combinedIdsAll));
         }
 
-        return new TermMetadataForCombinedDocumentIds(termdocToMeta);
+        return termdocToMeta;
     }
 
     public QuerySearchTerms getSearchTerms(CompiledQuery<String> compiledQuery, SearchQuery searchQuery) {
diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
index 0fc4bdc1..3972c272 100644
--- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
@@ -1,25 +1,22 @@
 package nu.marginalia.index.results;
 
 import nu.marginalia.api.searchquery.model.compiled.*;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.SearchResultItem;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
 import nu.marginalia.index.index.CombinedIndexReader;
 import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchParameters;
-import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.model.QueryParams;
 import nu.marginalia.index.results.model.QuerySearchTerms;
-import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.WordFlags;
-import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.index.query.limit.QueryStrategy;
 import nu.marginalia.ranking.results.ResultValuator;
+import nu.marginalia.sequence.GammaCodedSequence;
 
 import javax.annotation.Nullable;
-import java.util.List;
+
+import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.*;
 
 /** This class is responsible for calculating the score of a search result.
  * It holds the data required to perform the scoring, as there is strong
@@ -28,94 +25,74 @@ public class IndexResultValuationContext {
     private final CombinedIndexReader index;
     private final QueryParams queryParams;
 
-    private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds;
-    private final QuerySearchTerms searchTerms;
-
     private final ResultRankingContext rankingContext;
     private final ResultValuator searchResultValuator;
     private final CompiledQuery<String> compiledQuery;
-    private final CompiledQueryLong compiledQueryIds;
 
-    public IndexResultValuationContext(IndexMetadataService metadataService,
-                                       ResultValuator searchResultValuator,
-                                       CombinedDocIdList ids,
+    public IndexResultValuationContext(ResultValuator searchResultValuator,
                                        StatefulIndex statefulIndex,
                                        ResultRankingContext rankingContext,
-                                       SearchParameters params
-                               ) {
+                                       SearchParameters params)
+    {
         this.index = statefulIndex.get();
         this.rankingContext = rankingContext;
         this.searchResultValuator = searchResultValuator;
 
         this.queryParams = params.queryParams;
         this.compiledQuery = params.compiledQuery;
-        this.compiledQueryIds = params.compiledQueryIds;
-
-        this.searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
-
-        this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids,
-                searchTerms.termIdsAll);
     }
 
-    private final long flagsFilterMask =
-            WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();
+    private final long flagsFilterMask = WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();
 
     @Nullable
-    public SearchResultItem calculatePreliminaryScore(long combinedId) {
+    public SearchResultItem calculatePreliminaryScore(long combinedId,
+                                                      QuerySearchTerms searchTerms,
+                                                      long[] wordFlags,
+                                                      GammaCodedSequence[] positions)
+    {
+
+
+        // FIXME: Reconsider coherence logic with the new position data
+//        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
+//            return null;
+
+        CompiledQuery<GammaCodedSequence> positionsQuery = compiledQuery.root.newQuery(positions);
+        CompiledQueryLong wordFlagsQuery = compiledQuery.root.newQuery(wordFlags);
+        int[] counts = new int[compiledQuery.size()];
+        for (int i = 0; i < counts.length; i++) {
+            if (positions[i] != null) {
+                counts[i] = positions[i].valueCount();
+            }
+        }
+        CompiledQueryInt positionsCountQuery = compiledQuery.root.newQuery(counts);
+
+        // If the document is not relevant to the query, abort early to reduce allocations and
+        // avoid unnecessary calculations
+        if (testRelevance(wordFlagsQuery, positionsCountQuery)) {
+            return null;
+        }
+
 
         long docId = UrlIdCodec.removeRank(combinedId);
-
-        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
-            return null;
-
         long docMetadata = index.getDocumentMetadata(docId);
         int htmlFeatures = index.getHtmlFeatures(docId);
-
-        SearchResultItem searchResult = new SearchResultItem(docId,
-                docMetadata,
-                htmlFeatures,
-                hasPrioTerm(combinedId));
-
-        long[] wordMetas = new long[compiledQuery.size()];
-        SearchResultKeywordScore[] scores = new SearchResultKeywordScore[compiledQuery.size()];
-
-        for (int i = 0; i < wordMetas.length; i++) {
-            final long termId = compiledQueryIds.at(i);
-            final String term = compiledQuery.at(i);
-
-            wordMetas[i] = termMetadataForCombinedDocumentIds.getTermMetadata(termId, combinedId);
-            scores[i] = new SearchResultKeywordScore(term, termId, wordMetas[i]);
-        }
-
-
-        // DANGER: IndexResultValuatorService assumes that searchResult.keywordScores has this specific order, as it needs
-        // to be able to re-construct its own CompiledQuery<SearchResultKeywordScore> for re-ranking the results.  This is
-        // a very flimsy assumption.
-        searchResult.keywordScores.addAll(List.of(scores));
-
-        CompiledQueryLong wordMetasQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
-
-
-        boolean allSynthetic = CompiledQueryAggregates.booleanAggregate(wordMetasQuery, WordFlags.Synthetic::isPresent);
-        int flagsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(wordMeta & flagsFilterMask));
-        int positionsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(WordMetadata.decodePositions(wordMeta)));
-
-        if (!meetsQueryStrategyRequirements(wordMetasQuery, queryParams.queryStrategy())) {
-            return null;
-        }
-
-        if (flagsCount == 0 && !allSynthetic && positionsCount == 0)
-            return null;
+        int docSize = index.getDocumentSize(docId);
 
         double score = searchResultValuator.calculateSearchResultValue(
-                wordMetasQuery,
+                wordFlagsQuery,
+                positionsCountQuery,
+                positionsQuery,
                 docMetadata,
                 htmlFeatures,
-                5000, // use a dummy value here as it's not present in the index
+                docSize,
                 rankingContext,
                 null);
 
-        if (searchResult.hasPrioTerm) {
+        SearchResultItem searchResult = new SearchResultItem(docId,
+                docMetadata,
+                htmlFeatures);
+
+        if (hasPrioTerm(searchTerms, positions)) {
             score = 0.75 * score;
         }
 
@@ -124,13 +101,32 @@ public class IndexResultValuationContext {
         return searchResult;
     }
 
-    private boolean hasPrioTerm(long combinedId) {
-        for (var term : searchTerms.termIdsPrio.array()) {
-            if (termMetadataForCombinedDocumentIds.hasTermMeta(term, combinedId)) {
+    private boolean testRelevance(CompiledQueryLong wordFlagsQuery, CompiledQueryInt countsQuery) {
+        boolean allSynthetic = booleanAggregate(wordFlagsQuery, WordFlags.Synthetic::isPresent);
+        int flagsCount = intMaxMinAggregate(wordFlagsQuery, flags ->  Long.bitCount(flags & flagsFilterMask));
+        int positionsCount = intMaxMinAggregate(countsQuery, p -> p);
+
+        if (!meetsQueryStrategyRequirements(wordFlagsQuery, queryParams.queryStrategy())) {
+            return true;
+        }
+        if (flagsCount == 0 && !allSynthetic && positionsCount == 0) {
+            return true;
+        }
+
+        return false;
+    }
+
+    private boolean hasPrioTerm(QuerySearchTerms searchTerms, GammaCodedSequence[] positions) {
+        var allTerms = searchTerms.termIdsAll;
+        var prioTerms = searchTerms.termIdsPrio;
+
+        for (int i = 0; i < allTerms.size(); i++) {
+            if (positions[i] != null && prioTerms.contains(allTerms.at(i))) {
                 return true;
             }
         }
-        return  false;
+
+        return false;
     }
 
     private boolean meetsQueryStrategyRequirements(CompiledQueryLong queryGraphScores,
@@ -142,7 +138,7 @@ public class IndexResultValuationContext {
             return true;
         }
 
-        return CompiledQueryAggregates.booleanAggregate(queryGraphScores,
+        return booleanAggregate(queryGraphScores,
                 docs -> meetsQueryStrategyRequirements(docs, queryParams.queryStrategy()));
     }
 
diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
index baecb564..fbe99cb1 100644
--- a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
@@ -7,8 +7,6 @@ import gnu.trove.list.array.TLongArrayList;
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
-import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
@@ -21,12 +19,13 @@ import nu.marginalia.linkdb.docs.DocumentDbReader;
 import nu.marginalia.linkdb.model.DocdbUrlDetail;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.ranking.results.ResultValuator;
+import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.lang.foreign.Arena;
 import java.sql.SQLException;
 import java.util.*;
-import java.util.function.Consumer;
 
 @Singleton
 public class IndexResultValuatorService {
@@ -53,35 +52,53 @@ public class IndexResultValuatorService {
                                               ResultRankingContext rankingContext,
                                               CombinedDocIdList resultIds)
     {
-        final var evaluator = createValuationContext(params, rankingContext, resultIds);
+        IndexResultValuationContext evaluator =
+                new IndexResultValuationContext(resultValuator, statefulIndex, rankingContext, params);
 
         List<SearchResultItem> results = new ArrayList<>(resultIds.size());
 
-        for (long id : resultIds.array()) {
-            var score = evaluator.calculatePreliminaryScore(id);
-            if (score != null) {
-                results.add(score);
+        try (var arena = Arena.ofConfined()) {
+            // Batch-fetch the word metadata for the documents
+
+            var searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
+            var termsForDocs = metadataService.getTermMetadataForDocuments(arena, resultIds, searchTerms.termIdsAll);
+
+            // Prepare data for the document.  We do this outside of the calculation function to avoid
+            // hash lookups in the inner loop, as it's very hot code and we don't want thrashing in there;
+            // out here we can rely on implicit array ordering to match up the data.
+
+            var ra = resultIds.array();
+            long[] flags = new long[searchTerms.termIdsAll.size()];
+            GammaCodedSequence[] positions = new GammaCodedSequence[searchTerms.termIdsAll.size()];
+
+            for (int i = 0; i < ra.length; i++) {
+                long id = ra[i];
+
+                // Prepare term-level data for the document
+                for (int ti = 0; ti < flags.length; ti++) {
+                    long tid = searchTerms.termIdsAll.at(ti);
+                    var tfd = termsForDocs.get(tid);
+
+                    assert tfd != null : "No term data for term " + ti;
+
+                    flags[ti] = tfd.flag(i);
+                    positions[ti] = tfd.position(i);
+                }
+
+                // Calculate the preliminary score
+
+                var score = evaluator.calculatePreliminaryScore(id, searchTerms, flags, positions);
+                if (score != null) {
+                    results.add(score);
+                }
             }
+
+            return results;
         }
-
-        return results;
-    }
-
-    private IndexResultValuationContext createValuationContext(SearchParameters params,
-                                                               ResultRankingContext rankingContext,
-                                                               CombinedDocIdList resultIds)
-    {
-        return new IndexResultValuationContext(metadataService,
-                resultValuator,
-                resultIds,
-                statefulIndex,
-                rankingContext,
-                params);
     }
 
 
     public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params,
-                                                     ResultRankingContext rankingContext,
                                                      Collection<SearchResultItem> results) throws SQLException {
 
         var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain);
@@ -101,14 +118,13 @@ public class IndexResultValuatorService {
             item.resultsFromDomain = domainCountFilter.getCount(item);
         }
 
-        return decorateAndRerank(resultsList, params.compiledQuery, rankingContext);
+        return decorateResults(resultsList, params.compiledQuery);
     }
 
     /** Decorate the result items with additional information from the link database
      * and calculate an updated ranking with the additional information */
-    public List<DecoratedSearchResultItem> decorateAndRerank(List<SearchResultItem> rawResults,
-                                                             CompiledQuery<String> compiledQuery,
-                                                             ResultRankingContext rankingContext)
+    public List<DecoratedSearchResultItem> decorateResults(List<SearchResultItem> rawResults,
+                                                           CompiledQuery<String> compiledQuery)
             throws SQLException
     {
         TLongList idsList = new TLongArrayList(rawResults.size());
@@ -131,42 +147,18 @@ public class IndexResultValuatorService {
                 continue;
             }
 
-            // Reconstruct the compiledquery for re-valuation
-            //
-            // CAVEAT:  This hinges on a very fragile that IndexResultValuationContext puts them in the same
-            // order as the data for the CompiledQuery<String>.
-            long[] wordMetas = new long[compiledQuery.size()];
-
-            for (int i = 0; i < compiledQuery.size(); i++) {
-                var score = result.keywordScores.get(i);
-                wordMetas[i] = score.encodedWordMetadata();
-            }
-
-            CompiledQueryLong metaQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
-
             resultItems.add(createCombinedItem(
                     result,
-                    docData,
-                    metaQuery,
-                    rankingContext));
+                    docData));
         }
         return resultItems;
     }
 
     private DecoratedSearchResultItem createCombinedItem(SearchResultItem result,
-                                                         DocdbUrlDetail docData,
-                                                         CompiledQueryLong wordMetas,
-                                                         ResultRankingContext rankingContext) {
+                                                         DocdbUrlDetail docData) {
 
         ResultRankingDetailsExtractor detailsExtractor = new ResultRankingDetailsExtractor();
-        Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
-
-        double score = resultValuator.calculateSearchResultValue(wordMetas,
-                result.encodedDocMetadata,
-                result.htmlFeatures,
-                docData.wordsTotal(),
-                rankingContext,
-                detailConsumer);
+       //  Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
 
         return new DecoratedSearchResultItem(
                 result,
@@ -179,8 +171,8 @@ public class IndexResultValuatorService {
                 docData.pubYear(),
                 docData.dataHash(),
                 docData.wordsTotal(),
-                bestPositions(wordMetas),
-                score,
+                0L, //bestPositions(wordMetas),
+                result.getScore(),
                 detailsExtractor.get()
         );
     }
diff --git a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
index 3ef2f7ab..20069a55 100644
--- a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
+++ b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
@@ -1,26 +1,38 @@
 package nu.marginalia.index.results.model;
 
-import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap;
+import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
+import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import javax.annotation.Nullable;
 
 public class TermMetadataForCombinedDocumentIds {
-    private static final Logger logger = LoggerFactory.getLogger(TermMetadataForCombinedDocumentIds.class);
     private final Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta;
 
     public TermMetadataForCombinedDocumentIds(Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta) {
         this.termdocToMeta = termdocToMeta;
     }
 
-    public long getTermMetadata(long termId, long combinedId) {
+    public byte getTermMetadata(long termId, long combinedId) {
         var metaByCombinedId = termdocToMeta.get(termId);
         if (metaByCombinedId == null) {
             return 0;
         }
-        return metaByCombinedId.get(combinedId);
+        return metaByCombinedId.get(combinedId).flags();
+    }
+
+    @Nullable
+    public GammaCodedSequence getPositions(long termId, long combinedId) {
+        var metaByCombinedId = termdocToMeta.get(termId);
+
+        if (metaByCombinedId == null) {
+            return null;
+        }
+
+        return metaByCombinedId.get(combinedId).positions();
     }
 
     public boolean hasTermMeta(long termId, long combinedId) {
@@ -30,16 +42,25 @@ public class TermMetadataForCombinedDocumentIds {
             return false;
         }
 
-        return metaByCombinedId.get(combinedId) != 0;
+        return metaByCombinedId.data().containsKey(combinedId);
     }
 
-    public record DocumentsWithMetadata(Long2LongOpenHashMap data) {
-        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, DocMetadataList metadata) {
-            this(new Long2LongOpenHashMap(combinedDocIdsAll.array(), metadata.array()));
+    public record DocumentsWithMetadata(Long2ObjectOpenHashMap<TermData> data) {
+        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, TermMetadataList metadata) {
+            this(new Long2ObjectOpenHashMap<>(combinedDocIdsAll.size()));
+
+            long[] ids = combinedDocIdsAll.array();
+            TermData[] data = metadata.array();
+
+            for (int i = 0; i < combinedDocIdsAll.size(); i++) {
+                if (data[i] != null) {
+                    this.data.put(ids[i], data[i]);
+                }
+            }
         }
 
-        public long get(long combinedId) {
-            return data.getOrDefault(combinedId, 0);
+        public TermData get(long combinedId) {
+            return data.get(combinedId);
         }
     }
 }
diff --git a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
index 17bd17a1..7845f14f 100644
--- a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
@@ -15,6 +15,10 @@ import java.util.stream.LongStream;
 public final class CombinedDocIdList {
     private final long[] data;
 
+    public CombinedDocIdList(long... data) {
+        this.data = Arrays.copyOf(data, data.length);
+    }
+
     public CombinedDocIdList(LongArrayList data) {
         this.data = data.toLongArray();
     }
diff --git a/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java b/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
deleted file mode 100644
index 0104f89c..00000000
--- a/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package nu.marginalia.index.results.model.ids;
-
-import it.unimi.dsi.fastutil.longs.LongArrayList;
-
-import java.util.Arrays;
-import java.util.Objects;
-import java.util.stream.LongStream;
-
-public final class DocMetadataList {
-    private final long[] array;
-
-    public DocMetadataList(long[] array) {
-        this.array = array;
-    }
-
-    public DocMetadataList(LongArrayList list) {
-        this(list.toLongArray());
-    }
-
-    public int size() {
-        return array.length;
-    }
-
-    public LongStream stream() {
-        return LongStream.of(array);
-    }
-
-    public long[] array() {
-        return array;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (obj == this) return true;
-        if (obj == null || obj.getClass() != this.getClass()) return false;
-        var that = (DocMetadataList) obj;
-        return Arrays.equals(this.array, that.array);
-    }
-
-    @Override
-    public int hashCode() {
-        return Arrays.hashCode(array);
-    }
-
-}
diff --git a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
index f25ab1b9..903fef9f 100644
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
@@ -11,6 +11,7 @@ public final class TermIdList {
 
     public TermIdList(long[] array) {
         this.array = array;
+        Arrays.sort(this.array);
     }
 
     public TermIdList(LongArrayList list) {
@@ -29,6 +30,15 @@ public final class TermIdList {
         return array;
     }
 
+    public long at(int i) {
+        return array[i];
+    }
+
+    public boolean contains(long id) {
+        // Implicitly sorted
+        return Arrays.binarySearch(array, id) >= 0;
+    }
+
     @Override
     public boolean equals(Object obj) {
         if (obj == this) return true;
diff --git a/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java b/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
new file mode 100644
index 00000000..dd7ebbcb
--- /dev/null
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
@@ -0,0 +1,55 @@
+package nu.marginalia.index.results.model.ids;
+
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public final class TermMetadataList {
+    private final TermData[] array;
+
+    public TermMetadataList(TermData[] array) {
+        this.array = array;
+    }
+
+    public int size() {
+        return array.length;
+    }
+
+    public long flag(int i) {
+        if (array[i] == null)
+            return 0;
+
+        return array[i].flags();
+    }
+
+    /** Returns the position data for the given document index,
+     * may be null if the term is not in the document
+     */
+    @Nullable
+    public GammaCodedSequence position(int i) {
+        if (array[i] == null)
+            return null;
+
+        return array[i].positions();
+    }
+
+    public TermData[] array() {
+        return array;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == this) return true;
+        if (obj == null || obj.getClass() != this.getClass()) return false;
+        var that = (TermMetadataList) obj;
+        return Arrays.equals(this.array, that.array);
+    }
+
+    @Override
+    public int hashCode() {
+        return Arrays.hashCode(array);
+    }
+
+}
diff --git a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
index 1e026b40..ae84a11e 100644
--- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
+++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
@@ -1,5 +1,7 @@
 package nu.marginalia.ranking.results;
 
+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
@@ -14,6 +16,7 @@ import nu.marginalia.ranking.results.factors.*;
 
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
+import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -33,15 +36,15 @@ public class ResultValuator {
         this.termCoherenceFactor = termCoherenceFactor;
     }
 
-    public double calculateSearchResultValue(CompiledQueryLong wordMeta,
-                                             long documentMetadata,
+    public double calculateSearchResultValue(CompiledQueryLong wordFlagsQuery,
+                                             CompiledQueryInt positionsCountQuery, CompiledQuery<GammaCodedSequence> positionsQuery, long documentMetadata,
                                              int features,
                                              int length,
                                              ResultRankingContext ctx,
                                              @Nullable Consumer<ResultRankingDetails> detailsConsumer
                                              )
     {
-        if (wordMeta.isEmpty())
+        if (wordFlagsQuery.isEmpty())
             return Double.MAX_VALUE;
 
         if (length < 0) {
@@ -82,12 +85,11 @@ public class ResultValuator {
                            + temporalBias
                            + flagsPenalty;
 
-        double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
-        double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
+        // FIXME: need a weighting factor here
+        double tcfAvgDist = 25. / termCoherenceFactor.calculateAvgMinDistance(positionsQuery, ctx);
 
-        double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
-        double bM25N = rankingParams.bm25NgramWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
-        double bM25P = rankingParams.bm25PrioWeight * wordMeta.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordMeta.data, ctx));
+        double bM25F = rankingParams.bm25FullWeight * wordFlagsQuery.root.visit(new Bm25FullGraphVisitor(rankingParams.fullParams, positionsCountQuery.data, length, ctx));
+        double bM25P = rankingParams.bm25PrioWeight * wordFlagsQuery.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordFlagsQuery.data, ctx));
 
         double overallPartPositive = Math.max(0, overallPart);
         double overallPartNegative = -Math.min(0, overallPart);
@@ -112,10 +114,10 @@ public class ResultValuator {
                             temporalBias,
                             flagsPenalty,
                             overallPart,
-                            tcfOverlap,
-                            tcfJaccard,
+                            0,
+                            0,
                             bM25F,
-                            bM25N,
+                            0, // FIXME: Remove from model
                             bM25P)
             );
 
@@ -125,8 +127,8 @@ public class ResultValuator {
         // Renormalize to 0...15, where 0 is the best possible score;
         // this is a historical artifact of the original ranking function
         double ret = normalize(
-                      tcfOverlap + tcfJaccard
-                      + bM25F + bM25P + bM25N
+                      tcfAvgDist
+                      + bM25F + bM25P
                       + overallPartPositive,
                 overallPartNegative);
 
diff --git a/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java b/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
index 4105ed6b..88a592bb 100644
--- a/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
@@ -13,7 +13,7 @@ import java.util.List;
 public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
     private static final long AVG_LENGTH = 5000;
 
-    private final CqDataLong wordMetaData;
+    private final CqDataInt counts;
     private final CqDataInt frequencies;
     private final Bm25Parameters bm25Parameters;
 
@@ -22,31 +22,16 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
 
     private final BitSet mask;
 
-    private Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
-                                CqDataLong wordMetaData,
+    public Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
+                                CqDataInt counts,
                                 int length,
-                                BitSet mask,
                                 ResultRankingContext ctx) {
         this.length = length;
         this.bm25Parameters = bm25Parameters;
         this.docCount = ctx.termFreqDocCount();
-        this.wordMetaData = wordMetaData;
+        this.counts = counts;
         this.frequencies = ctx.fullCounts;
-        this.mask = mask;
-    }
-
-    public static Bm25FullGraphVisitor forRegular(Bm25Parameters bm25Parameters,
-                                                  CqDataLong wordMetaData,
-                                                  int length,
-                                                  ResultRankingContext ctx) {
-        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.regularMask, ctx);
-    }
-
-    public static Bm25FullGraphVisitor forNgrams(Bm25Parameters bm25Parameters,
-                                                 CqDataLong wordMetaData,
-                                                 int length,
-                                                 ResultRankingContext ctx) {
-        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.ngramsMask, ctx);
+        this.mask = ctx.regularMask;
     }
 
     @Override
@@ -73,7 +58,7 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
             return 0;
         }
 
-        double count = Long.bitCount(WordMetadata.decodePositions(wordMetaData.get(idx)));
+        double count = counts.get(idx);
 
         int freq = frequencies.get(idx);
 
diff --git a/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
index 3bda0580..2ebef7cd 100644
--- a/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
@@ -1,66 +1,44 @@
 package nu.marginalia.ranking.results.factors;
 
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.sequence.GammaCodedSequence;
+import nu.marginalia.sequence.SequenceOperations;
 
 /** Rewards documents where terms appear frequently within the same sentences
  */
 public class TermCoherenceFactor {
 
-    /** Calculate a factor that rewards the best total position overlap
-     * between the terms in the query.  This is high when all the terms
-     * found in the same sentences.
-     */
-    public double calculateOverlap(CompiledQueryLong wordMetadataQuery) {
-        if (wordMetadataQuery.size() < 2)
-            return 0;
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(wordMetadataQuery,
-                score -> score >>> WordMetadata.POSITIONS_SHIFT);
-
-        return bitsSetFactor(mask);
-    }
-
-    /** Calculate a factor that rewards the best average mutual Jaccard index
-     * between the terms in the query.  This is high when the several terms are frequently
-     * found in the same sentences.
-     */
-    public double calculateAvgMutualJaccard(CompiledQueryLong wordMetadataQuery, ResultRankingContext ctx) {
+    public double calculateAvgMinDistance(CompiledQuery<GammaCodedSequence> positions, ResultRankingContext ctx) {
         double sum = 0;
         int cnt = 0;
 
-        for (int i = 0; i < wordMetadataQuery.size(); i++) {
+        for (int i = 0; i < positions.size(); i++) {
 
             // Skip terms that are not in the regular mask
             if (!ctx.regularMask.get(i))
                 continue;
 
-            long imask = WordMetadata.decodePositions(wordMetadataQuery.at(i));
+            var posi = positions.at(i);
 
             // Skip terms that are not in the document
-            if (imask == 0L)
+            if (posi == null)
                 continue;
 
-            for (int j = i + 1; j < wordMetadataQuery.size(); j++) {
+            for (int j = i + 1; j < positions.size(); j++) {
 
                 // Skip terms that are not in the regular mask
                 if (!ctx.regularMask.get(j))
                     continue;
 
-                long jmask = WordMetadata.decodePositions(wordMetadataQuery.at(j));
+                var posj = positions.at(j);
 
                 // Skip terms that are not in the document
-                if (jmask == 0L)
+                if (posj == null)
                     continue;
 
-                long quot = Long.bitCount(imask & jmask);
-                long rem = Long.bitCount(imask | jmask);
-
-                // rem is always > 0 because imask and jmask are not both 0
-
-                sum += quot/(double) rem;
+                int distance = SequenceOperations.minDistance(posi.iterator(), posj.iterator());
+                sum += distance;
                 cnt++;
             }
         }
@@ -68,15 +46,8 @@ public class TermCoherenceFactor {
         if (cnt > 0) {
             return sum / cnt;
         } else {
-            return 0;
+            return 1000.;
         }
     }
 
-    double bitsSetFactor(long mask) {
-        final int bitsSetInMask = Long.bitCount(mask);
-
-        return Math.pow(bitsSetInMask/(double) WordMetadata.POSITIONS_COUNT, 0.25);
-    }
-
-
 }
\ No newline at end of file
diff --git a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
new file mode 100644
index 00000000..cd23261e
--- /dev/null
+++ b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
@@ -0,0 +1,382 @@
+package nu.marginalia.index;
+
+import com.google.inject.Guice;
+import com.google.inject.Inject;
+import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+import nu.marginalia.IndexLocations;
+import nu.marginalia.array.page.LongQueryBuffer;
+import nu.marginalia.hash.MurmurHash3_128;
+import nu.marginalia.index.construction.DocIdRewriter;
+import nu.marginalia.index.construction.ReverseIndexConstructor;
+import nu.marginalia.index.domainrankings.DomainRankings;
+import nu.marginalia.index.forward.ForwardIndexConverter;
+import nu.marginalia.index.forward.ForwardIndexFileNames;
+import nu.marginalia.index.index.CombinedIndexReader;
+import nu.marginalia.index.index.StatefulIndex;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+import nu.marginalia.index.journal.writer.IndexJournalWriter;
+import nu.marginalia.index.positions.TermData;
+import nu.marginalia.index.results.model.ids.CombinedDocIdList;
+import nu.marginalia.linkdb.docs.DocumentDbReader;
+import nu.marginalia.linkdb.docs.DocumentDbWriter;
+import nu.marginalia.linkdb.model.DocdbUrlDetail;
+import nu.marginalia.model.EdgeUrl;
+import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.model.idx.DocumentFlags;
+import nu.marginalia.model.idx.DocumentMetadata;
+import nu.marginalia.model.idx.WordFlags;
+import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.process.control.FakeProcessHeartbeat;
+import nu.marginalia.process.control.ProcessHeartbeat;
+import nu.marginalia.sequence.GammaCodedSequence;
+import nu.marginalia.service.control.ServiceHeartbeat;
+import nu.marginalia.service.server.Initialization;
+import nu.marginalia.storage.FileStorageService;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.parallel.Execution;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+import java.util.*;
+
+import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
+
+@Execution(SAME_THREAD)
+public class CombinedIndexReaderTest {
+
+    @Inject
+    Initialization initialization;
+
+    IndexQueryServiceIntegrationTestModule testModule;
+
+    @Inject
+    StatefulIndex statefulIndex;
+
+    @Inject
+    IndexJournalWriter indexJournalWriter;
+
+    @Inject
+    FileStorageService fileStorageService;
+
+    @Inject
+    DomainRankings domainRankings;
+
+    @Inject
+    ProcessHeartbeat processHeartbeat;
+    @Inject
+    DocumentDbReader documentDbReader;
+
+    @Inject
+    IndexFactory indexFactory;
+
+    @BeforeEach
+    public void setUp() throws IOException {
+
+        testModule = new IndexQueryServiceIntegrationTestModule();
+        Guice.createInjector(testModule).injectMembers(this);
+
+        initialization.setReady();
+    }
+
+    @AfterEach
+    public void tearDown() throws IOException {
+        testModule.cleanUp();
+    }
+
+    private final MockDocumentMeta anyMetadata = new MockDocumentMeta(0, new DocumentMetadata(2, 0, 14, EnumSet.noneOf(DocumentFlags.class)));
+
+    @Test
+    public void testSimpleRetrieval() throws Exception {
+        new MockData().add(
+                d(1, 1),
+                anyMetadata,
+                w("hello", WordFlags.Title, 33, 55),
+                w("world", WordFlags.Subjects, 34)
+        ).load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader.findFullWord(kw("hello")).build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(1, 1)),
+                decode(buffer)
+        );
+
+        var helloMeta = td(reader, kw("hello"), d(1, 1));
+        assertEquals(helloMeta.flags(), WordFlags.Title.asBit());
+        assertEquals(IntList.of(33, 55), helloMeta.positions().values());
+
+        var worldMeta = td(reader, kw("world"), d(1, 1));
+        assertEquals(worldMeta.flags(), WordFlags.Subjects.asBit());
+        assertEquals(IntList.of(34), worldMeta.positions().values());
+    }
+
+    TermData td(CombinedIndexReader reader, long wordId, MockDataDocument docId) {
+        return (reader.getTermMetadata(Arena.global(), wordId, new CombinedDocIdList(docId.docId())).array())[0];
+    }
+
+
+    @Test
+    public void testUnionRetrieval() throws Exception {
+        new MockData()
+                .add(
+                        d(1, 1),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 2),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 3),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(2, 4),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader
+                .findFullWord(kw("hello"))
+                .also(kw("world"))
+                .build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(1, 1), d(2, 4)),
+                decode(buffer)
+        );
+    }
+
+    @Test
+    public void testNotFilterRetrieval() throws Exception {
+        new MockData()
+                .add(
+                        d(1, 1),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title),
+                        w("goodbye", WordFlags.Title)
+                )
+                .add(
+                        d(1, 2),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(1, 3),
+                        anyMetadata,
+                        w("world", WordFlags.Title)
+                )
+                .add(
+                        d(2, 4),
+                        anyMetadata,
+                        w("hello", WordFlags.Title),
+                        w("world", WordFlags.Title)
+                )
+                .load();
+
+        var reader = indexFactory.getCombinedIndexReader();
+        var query = reader.findFullWord(kw("hello"))
+                .also(kw("world"))
+                .not(kw("goodbye"))
+                .build();
+
+        var buffer = new LongQueryBuffer(32);
+        query.getMoreResults(buffer);
+
+        assertEquals(
+                List.of(d(2, 4)),
+                decode(buffer)
+        );
+    }
+
+    List<MockDataDocument> decode(LongQueryBuffer buffer) {
+        List<MockDataDocument> result = new ArrayList<>();
+        for (int i = 0; i < buffer.size(); i++) {
+            result.add(new MockDataDocument(buffer.data.get(i)));
+        }
+        return result;
+    }
+
+    private MockDataDocument d(int domainId, int ordinal) {
+        return new MockDataDocument(domainId, ordinal);
+    }
+
+    private void constructIndex() throws IOException {
+        createForwardIndex();
+        createFullReverseIndex();
+        createPrioReverseIndex();
+    }
+
+    private void createFullReverseIndex() throws IOException {
+
+        Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
+        Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
+        Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
+
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path tmpDir = workDir.resolve("tmp");
+
+        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
+
+        var constructor =
+                new ReverseIndexConstructor(
+                    outputFileDocs,
+                    outputFileWords,
+                    outputFilePositions,
+                    IndexJournalReader::singleFile,
+                    DocIdRewriter.identity(),
+                    tmpDir);
+        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
+    }
+
+    private void createPrioReverseIndex() throws IOException {
+
+        Path outputFileDocs = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path outputFileWords = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path outputFilePositions = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.POSITIONS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path tmpDir = workDir.resolve("tmp");
+
+        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
+
+        var constructor = new ReverseIndexConstructor(
+                outputFileDocs,
+                outputFileWords,
+                outputFilePositions,
+                IndexJournalReader::singleFile,
+                DocIdRewriter.identity(),
+                tmpDir);
+
+        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
+    }
+
+    private void createForwardIndex() throws IOException {
+
+        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
+        Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
+        Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
+
+        ForwardIndexConverter converter = new ForwardIndexConverter(processHeartbeat,
+                IndexJournalReader.paging(workDir),
+                outputFileDocsId,
+                outputFileDocsData,
+                domainRankings
+        );
+
+        converter.convert();
+    }
+
+    MurmurHash3_128 hasher = new MurmurHash3_128();
+
+    long kw(String s) {
+        return hasher.hashKeyword(s);
+    }
+
+    class MockData {
+        private final Map<Long, List<MockDataKeyword>> allData = new HashMap<>();
+        private final Map<Long, MockDocumentMeta> metaByDoc = new HashMap<>();
+
+        public MockData add(MockDataDocument document,
+                        MockDocumentMeta meta,
+                        MockDataKeyword... words)
+        {
+            long id = UrlIdCodec.encodeId(document.domainId, document.ordinal);
+
+            allData.computeIfAbsent(id, l -> new ArrayList<>()).addAll(List.of(words));
+            metaByDoc.put(id, meta);
+
+            return this;
+        }
+
+        void load() throws IOException, SQLException, URISyntaxException {
+            allData.forEach((doc, words) -> {
+
+                var meta = metaByDoc.get(doc);
+
+                var header = new IndexJournalEntryHeader(
+                        doc,
+                        meta.features,
+                        100,
+                        meta.documentMetadata.encode()
+                );
+
+                String[] keywords = words.stream().map(w -> w.keyword).toArray(String[]::new);
+                long[] metadata = words.stream().map(w -> w.termMetadata).mapToLong(Long::longValue).toArray();
+                var positions = words.stream().map(w -> w.positions).map(pos -> GammaCodedSequence.generate(ByteBuffer.allocate(1024), pos.toIntArray())).toArray(GammaCodedSequence[]::new);
+
+                indexJournalWriter.put(header,
+                        new IndexJournalEntryData(keywords, metadata, positions));
+            });
+
+            var linkdbWriter = new DocumentDbWriter(
+                    IndexLocations.getLinkdbLivePath(fileStorageService).resolve(DOCDB_FILE_NAME)
+            );
+            for (Long key : allData.keySet()) {
+                linkdbWriter.add(new DocdbUrlDetail(
+                        key,
+                        new EdgeUrl("https://www.example.com"),
+                        "test",
+                        "test",
+                        0.,
+                        "HTML5",
+                        0,
+                        null,
+                        0,
+                        5
+                ));
+            }
+            linkdbWriter.close();
+
+            indexJournalWriter.close();
+            constructIndex();
+            documentDbReader.reconnect();
+            statefulIndex.switchIndex();
+        }
+    }
+
+    record MockDataDocument(int domainId, int ordinal) {
+        public MockDataDocument(long encodedId) {
+            this(UrlIdCodec.getDomainId(encodedId), UrlIdCodec.getDocumentOrdinal(encodedId));
+        }
+
+        public long docId() {
+            return UrlIdCodec.encodeId(domainId, ordinal);
+        }
+
+    }
+    record MockDocumentMeta(int features, DocumentMetadata documentMetadata) {}
+    record MockDataKeyword(String keyword, long termMetadata, IntList positions) {}
+
+    MockDataKeyword w(String keyword, WordFlags flags, int... positions) {
+        return new MockDataKeyword(keyword, new WordMetadata(0L, EnumSet.of(flags)).encode(), IntList.of(positions));
+
+    }
+}
diff --git a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
index 1af355f6..e5040157 100644
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
@@ -13,7 +13,6 @@ import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
@@ -142,6 +141,53 @@ public class IndexQueryServiceIntegrationSmokeTest {
         Assertions.assertArrayEquals(ids, actual);
     }
 
+    @Test
+    public void testSimple() throws Exception {
+        var linkdbWriter = new DocumentDbWriter(
+                IndexLocations.getLinkdbLivePath(fileStorageService)
+                        .resolve(DOCDB_FILE_NAME)
+        );
+        for (int i = 1; i < 512; i++) {
+            loadData(linkdbWriter, i);
+        }
+        linkdbWriter.close();
+        documentDbReader.reconnect();
+
+        indexJournalWriter.close();
+        constructIndex();
+        statefulIndex.switchIndex();
+
+        var rsp = queryService.justQuery(
+                SearchSpecification.builder()
+                        .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000))
+                        .queryStrategy(QueryStrategy.SENTENCE)
+                        .year(SpecificationLimit.none())
+                        .quality(SpecificationLimit.none())
+                        .size(SpecificationLimit.none())
+                        .rank(SpecificationLimit.none())
+                        .rankingParams(ResultRankingParameters.sensibleDefaults())
+                        .domains(new ArrayList<>())
+                        .searchSetIdentifier("NONE")
+                        .query(
+                                SearchQuery.builder("2")
+                                .include("2")
+                                .build()
+                        ).build()
+        );
+
+        int[] idxes = new int[] { 62, 222, 382, 60, 124, 220, 284, 380, 444, 122 };
+        long[] ids = IntStream.of(idxes).mapToLong(Long::valueOf).toArray();
+        long[] actual = rsp.results
+                .stream()
+                .mapToLong(i -> i.rawIndexResult.getDocumentId())
+                .map(UrlIdCodec::getDocumentOrdinal)
+                .toArray();
+
+        System.out.println(Arrays.toString(actual));
+        System.out.println(Arrays.toString(ids));
+        Assertions.assertArrayEquals(ids, actual);
+    }
+
     @Test
     public void testDomainQuery() throws Exception {
 
@@ -297,7 +343,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
         return UrlIdCodec.encodeId((32 - (id % 32)), id);
     }
 
-    MurmurHash3_128 hasher = new MurmurHash3_128();
     @SneakyThrows
     public void loadData(DocumentDbWriter ldbw, int id) {
         int[] factors = IntStream
@@ -305,22 +350,44 @@ public class IndexQueryServiceIntegrationSmokeTest {
                 .filter(v -> (id % v) == 0)
                 .toArray();
 
+        System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
+
         long fullId = fullId(id);
 
-        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
-
-        long[] data = new long[factors.length * 2];
-        for (int i = 0; i < factors.length; i++) {
-            data[2 * i] = hasher.hashNearlyASCII(Integer.toString(factors[i]));
-            data[2 * i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
-        }
+        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
 
         ldbw.add(new DocdbUrlDetail(
                 fullId, new EdgeUrl("https://www.example.com/"+id),
                 "test", "test", 0., "HTML5", 0, null, 0, 10
         ));
 
-        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
+        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
+        long[] metadata = new long[factors.length];
+        for (int i = 0; i < factors.length; i++) {
+            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+        }
+        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+        ByteBuffer wa = ByteBuffer.allocate(32);
+        for (int i = 0; i < factors.length; i++) {
+            positions[i] = GammaCodedSequence.generate(wa, factors);
+        }
+
+        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
+    }
+
+    @SneakyThrows
+    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
+        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
+        long fullId = UrlIdCodec.encodeId(domain, id);
+        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, DocumentMetadata.defaultValue());
+
+        ldbw.add(new DocdbUrlDetail(
+                fullId, new EdgeUrl("https://www.example.com/"+id),
+                "test", "test", 0., "HTML5", 0, null, 0, 10
+        ));
+
+
+        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
         long[] metadata = new long[factors.length];
         for (int i = 0; i < factors.length; i++) {
             metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
@@ -334,30 +401,4 @@ public class IndexQueryServiceIntegrationSmokeTest {
         indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
     }
 
-    @SneakyThrows
-    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
-        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
-        long fullId = UrlIdCodec.encodeId(domain, id);
-        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, DocumentMetadata.defaultValue());
-
-        ldbw.add(new DocdbUrlDetail(
-                fullId, new EdgeUrl("https://www.example.com/"+id),
-                "test", "test", 0., "HTML5", 0, null, 0, 10
-        ));
-
-
-        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
-        long[] metadata = new long[factors.length];
-        for (int i = 0; i < factors.length; i++) {
-            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
-        }
-        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
-        ByteBuffer wa = ByteBuffer.allocate(16);
-        for (int i = 0; i < factors.length; i++) {
-            positions[i] = GammaCodedSequence.generate(wa, i);
-        }
-
-        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
-    }
-
 }
diff --git a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
index 9e9c3873..0251a471 100644
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
@@ -565,6 +565,7 @@ public class IndexQueryServiceIntegrationTest {
                 var header = new IndexJournalEntryHeader(
                         doc,
                         meta.features,
+                        100,
                         meta.documentMetadata.encode()
                 );
 
diff --git a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
deleted file mode 100644
index 41906904..00000000
--- a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
+++ /dev/null
@@ -1,100 +0,0 @@
-package nu.marginalia.ranking.results;
-
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
-import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
-import nu.marginalia.model.idx.DocumentFlags;
-import nu.marginalia.model.idx.WordFlags;
-import nu.marginalia.model.crawl.PubDate;
-import nu.marginalia.model.idx.DocumentMetadata;
-import nu.marginalia.model.idx.WordMetadata;
-import nu.marginalia.ranking.results.factors.*;
-import nu.marginalia.term_frequency_dict.TermFrequencyDict;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
-
-import java.util.*;
-
-import static org.mockito.Mockito.when;
-
-class ResultValuatorTest {
-
-    TermFrequencyDict dict;
-    ResultValuator valuator;
-
-    @BeforeEach
-    public void setUp() {
-
-        dict = Mockito.mock(TermFrequencyDict.class);
-        when(dict.docCount()).thenReturn(100_000);
-
-        valuator = new ResultValuator(
-                new TermCoherenceFactor()
-        );
-
-    }
-
-    CqDataInt frequencyData = new CqDataInt(new int[] { 10 });
-
-    CompiledQueryLong titleOnlyLowCountSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);
-
-    CompiledQueryLong highCountNoTitleSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
-
-    CompiledQueryLong highCountSubjectSet = CompiledQuery.just(
-            new SearchResultKeywordScore("bob", 1,
-                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)))
-    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
-
-
-    @Test
-    void evaluateTerms() {
-
-        when(dict.getTermFreq("bob")).thenReturn(10);
-        ResultRankingContext context = new ResultRankingContext(100000,
-                ResultRankingParameters.sensibleDefaults(),
-                new BitSet(),
-                new BitSet(),
-                frequencyData,
-                frequencyData);
-
-        long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class));
-        int features = 0;
-
-        double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
-        double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
-        double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet,  docMeta, features, 10_000, context, null);
-        double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null);
-
-        System.out.println(titleOnlyLowCount);
-        System.out.println(titleLongOnlyLowCount);
-        System.out.println(highCountNoTitle);
-        System.out.println(highCountSubject);
-    }
-
-    private long docMetadata(int topology,
-                             int year,
-                             int quality,
-                             EnumSet<DocumentFlags> flags) {
-        return new DocumentMetadata(topology, PubDate.toYearByte(year), quality, flags).encode();
-    }
-
-    private long wordMetadata(Set<Integer> positions, Set<WordFlags> wordFlags) {
-        long posBits = positions.stream()
-                .mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL))
-                .reduce((a,b) -> a|b)
-                .orElse(0L);
-
-        return new WordMetadata(posBits, wordFlags).encode();
-    }
-
-}
\ No newline at end of file
diff --git a/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java b/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
deleted file mode 100644
index 5d2b47c9..00000000
--- a/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-package nu.marginalia.ranking.results.factors;
-
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
-import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
-import nu.marginalia.bbpc.BrailleBlockPunchCards;
-import nu.marginalia.model.idx.WordMetadata;
-import org.junit.jupiter.api.Test;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-class TermCoherenceFactorTest {
-
-    TermCoherenceFactor termCoherenceFactor = new TermCoherenceFactor();
-    @Test
-    public void testAllBitsSet() {
-        var allPositionsSet = createSet(
-                ~0L,
-                ~0L
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(
-                allPositionsSet,
-                SearchResultKeywordScore::positions
-        );
-
-        assertEquals(1.0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
-
-        assertEquals(1.0,
-                termCoherenceFactor.calculateOverlap(
-                        allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)
-                )
-        );
-
-    }
-
-    @Test
-    public void testNoBitsSet() {
-        var allPositionsSet = createSet(
-                0, 0
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(allPositionsSet, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-
-        assertEquals(0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
-
-        assertEquals(0, termCoherenceFactor.calculateOverlap(allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)));
-    }
-
-    @Test @SuppressWarnings("unchecked")
-    public void testLowPosMatches() {
-        var positions = createSet(
-                List.of(0, 1, 2, 3), List.of(0, 1, 2, 3)
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-        printMask(mask);
-
-    }
-
-    @Test @SuppressWarnings("unchecked")
-    public void testHiPosMatches() {
-        var positions = createSet(
-                List.of(55, 54, 53, 52), List.of(55, 54, 53, 52)
-        );
-
-        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
-        printMask(mask);
-    }
-
-    @Test
-    public void testBitMatchScaling() {
-        for (int i = 1; i < 48; i++) {
-            System.out.println(i + ":" + termCoherenceFactor.bitsSetFactor((1L << i) - 1));
-        }
-    }
-
-    void printMask(long mask) {
-        System.out.println(BrailleBlockPunchCards.printBits(mask, 48));
-    }
-
-    CompiledQuery<SearchResultKeywordScore> createSet(List<Integer>... maskPositions) {
-        long[] positions = new long[maskPositions.length];
-
-        for (int i = 0; i < maskPositions.length; i++) {
-            for (long pos : maskPositions[i]) {
-                positions[i] |= (1L<<pos);
-            }
-        }
-
-        return createSet(positions);
-    }
-
-    CompiledQuery<SearchResultKeywordScore> createSet(long... positionMasks) {
-        List<SearchResultKeywordScore> keywords = new ArrayList<>();
-
-        for (int i = 0; i < positionMasks.length; i++) {
-            keywords.add(new SearchResultKeywordScore("", 0,
-                    new WordMetadata(positionMasks[i] & WordMetadata.POSITIONS_MASK, (byte) 0).encode()));
-        }
-
-        return CompiledQuery.just(keywords.toArray(SearchResultKeywordScore[]::new));
-    }
-}
\ No newline at end of file
diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
index 335d57d8..87b2abd5 100644
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
@@ -17,12 +17,13 @@ public class EliasGammaCodec implements IntIterator {
 
     private final BitReader reader;
     int rem = 0;
-    private int last = 0;
+    private int last;
     private int next = 0;
 
-    private EliasGammaCodec(ByteBuffer buffer) {
+    private EliasGammaCodec(ByteBuffer buffer, int zero) {
         reader = new BitReader(buffer);
 
+        last = zero;
         int bits = reader.takeWhileZero();
 
         if (!reader.hasMore()) {
@@ -33,9 +34,24 @@ public class EliasGammaCodec implements IntIterator {
         }
     }
 
+    public static int readCount(ByteBuffer buffer) {
+        var reader = new BitReader(buffer);
+
+        if (reader.getCurrentValue() > 0) {
+            int bits = reader.takeWhileZero();
+            return reader.get(bits);
+        }
+        else {
+            return 0;
+        }
+    }
+
     /** Decode a sequence of integers from a ByteBuffer using the Elias Gamma code */
     public static IntIterator decode(ByteBuffer buffer) {
-        return new EliasGammaCodec(buffer);
+        return new EliasGammaCodec(buffer, 0);
+    }
+    public static IntIterator decodeWithOffset(ByteBuffer buffer, int offset) {
+        return new EliasGammaCodec(buffer, offset);
     }
 
     /** Encode a sequence of integers into a ByteBuffer using the Elias Gamma code.
diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
index 58ff30d2..a2335fbf 100644
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
@@ -16,6 +16,7 @@ import java.util.StringJoiner;
  * */
 public class GammaCodedSequence implements BinarySerializable, Iterable<Integer> {
     private final ByteBuffer raw;
+
     int startPos = 0;
     int startLimit = 0;
 
@@ -43,6 +44,12 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
         startLimit = bytes.limit();
     }
 
+    public GammaCodedSequence(ByteBuffer bytes, int startPos, int startLimit) {
+        this.raw = bytes;
+        this.startPos = startPos;
+        this.startLimit = startLimit;
+    }
+
     public GammaCodedSequence(byte[] bytes) {
         raw = ByteBuffer.allocate(bytes.length);
         raw.put(bytes);
@@ -72,6 +79,18 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
         return EliasGammaCodec.decode(raw);
     }
 
+    /** Return an iterator over the sequence with a constant offset applied to each value.
+     * This is useful for comparing sequences with different offsets, and adds zero
+     * extra cost to the decoding process which is already based on adding
+     * relative differences.
+     * */
+    public IntIterator offsetIterator(int offset) {
+        raw.position(startPos);
+        raw.limit(startLimit);
+
+        return EliasGammaCodec.decodeWithOffset(raw, offset);
+    }
+
     public IntList values() {
         var intItr = iterator();
         IntArrayList ret = new IntArrayList(8);
@@ -81,18 +100,6 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
         return ret;
     }
 
-    /** Decode the sequence into an IntList;
-     * this is a somewhat slow operation,
-     * iterating over the data directly more performant */
-    public IntList decode() {
-        IntArrayList ret = new IntArrayList(8);
-        var iter = iterator();
-        while (iter.hasNext()) {
-            ret.add(iter.nextInt());
-        }
-        return ret;
-    }
-
     public int hashCode() {
         return raw.hashCode();
     }
@@ -116,7 +123,11 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
         return raw;
     }
 
-    public int size() {
+    public int bufferSize() {
         return raw.capacity();
     }
+
+    public int valueCount() {
+        return EliasGammaCodec.readCount(buffer());
+    }
 }
diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
new file mode 100644
index 00000000..7a026862
--- /dev/null
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
@@ -0,0 +1,86 @@
+package nu.marginalia.sequence;
+
+import it.unimi.dsi.fastutil.ints.IntIterator;
+
+public class SequenceOperations {
+
+    /** Return true if the sequences intersect, false otherwise.
+     * */
+    public static boolean intersectSequences(IntIterator... sequences) {
+
+        if (sequences.length <= 1)
+            return true;
+
+        // Initialize values and find the maximum value
+        int[] values = new int[sequences.length];
+
+        for (int i = 0; i < sequences.length; i++) {
+            if (sequences[i].hasNext())
+                values[i] = sequences[i].nextInt();
+            else
+                return false;
+        }
+
+        // Intersect the sequences by advancing all values smaller than the maximum seen so far
+        // until they are equal to the maximum value, or until the end of the sequence is reached
+        int max = Integer.MIN_VALUE;
+        int successes = 0;
+        for (int i = 0; successes < sequences.length; i = (i + 1) % sequences.length)
+        {
+            if (values[i] == max) {
+                successes++;
+            } else {
+                successes = 0;
+
+                // Discard values until we reach the maximum value seen so far,
+                // or until the end of the sequence is reached
+                while (values[i] < max) {
+                    if (sequences[i].hasNext())
+                        values[i] = sequences[i].nextInt();
+                    else
+                        return false;
+                }
+
+                // Update the maximum value, if necessary
+                max = Math.max(max, values[i]);
+            }
+        }
+
+        return true;
+    }
+
+    /** Return the minimum word distance between two sequences, or a negative value if either sequence is empty.
+     * */
+    public static int minDistance(IntIterator seqA, IntIterator seqB)
+    {
+        int minDistance = Integer.MAX_VALUE;
+
+        if (!seqA.hasNext() || !seqB.hasNext())
+            return -1;
+
+        int a = seqA.nextInt();
+        int b = seqB.nextInt();
+
+        while (true) {
+            int distance = Math.abs(a - b);
+            if (distance < minDistance)
+                minDistance = distance;
+
+            if (a <= b) {
+                if (seqA.hasNext()) {
+                    a = seqA.nextInt();
+                } else {
+                    break;
+                }
+            } else {
+                if (seqB.hasNext()) {
+                    b = seqB.nextInt();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        return minDistance;
+    }
+}
diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
index 08979f0d..61125d2e 100644
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
@@ -20,6 +20,10 @@ public class BitReader {
         this.currentValue = 0;
     }
 
+    public long getCurrentValue() {
+        return currentValue;
+    }
+
     /** Read the next bit from the buffer */
     public boolean getBit() {
         if (bitPosition <= 0) {
diff --git a/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java b/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
new file mode 100644
index 00000000..dbae6f29
--- /dev/null
+++ b/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
@@ -0,0 +1,75 @@
+package nu.marginalia.sequence;
+
+import it.unimi.dsi.fastutil.ints.IntIterator;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class SequenceOperationsTest {
+
+    @Test
+    void intersectSequencesSingle() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator()));
+    }
+
+    @Test
+    void intersectSequencesTrivialMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 1);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesTrivialMismatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2);
+
+        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesOffsetMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 3);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.offsetIterator(0), seq2.offsetIterator(-2)));
+    }
+
+    @Test
+    void intersectSequencesDeepMatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+    @Test
+    void intersectSequencesDeepMatch3() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
+        GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 1, 5, 8, 9);
+
+        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator(), seq3.iterator()));
+    }
+
+    @Test
+    void intersectSequencesDeepMismatch() {
+        ByteBuffer wa = ByteBuffer.allocate(1024);
+        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
+        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 14);
+
+        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
+    }
+
+}
\ No newline at end of file
diff --git a/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
index c981f0da..5e98f96c 100644
--- a/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
+++ b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
@@ -26,6 +26,8 @@ public class DocumentRecordKeywordsProjection {
     public int htmlFeatures;
     public long documentMetadata;
 
+    public int length;
+
     public List<String> words;
     public TLongList metas;
     public List<GammaCodedSequence> positions;
@@ -39,13 +41,14 @@ public class DocumentRecordKeywordsProjection {
     }
 
     public static Collection<String> requiredColumns() {
-        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata");
+        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata", "length");
     }
 
     @SneakyThrows
     public DocumentRecordKeywordsProjection add(String heading, Object value) {
         switch (heading) {
             case "domain" -> domain = (String) value;
+            case "length" -> length = (Integer) value;
             case "ordinal" -> ordinal = (Integer) value;
             case "htmlFeatures" -> htmlFeatures = (Integer) value;
             case "documentMetadata" -> documentMetadata = (Long) value;
diff --git a/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
index 9c87bab7..f523f8e7 100644
--- a/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
@@ -6,12 +6,10 @@ import lombok.SneakyThrows;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.keyword.model.DocumentKeywords;
-import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -41,18 +39,11 @@ public class LoaderIndexJournalWriter {
         indexWriter = new IndexJournalWriterPagingImpl(indexArea);
     }
 
-    public void putWords(long combinedId,
-                         int features,
-                         DocumentMetadata metadata,
-                         DocumentKeywords wordSet) {
-
-        putWords(combinedId, features, metadata.encode(), wordSet);
-    }
-
     @SneakyThrows
     public void putWords(long combinedId,
                          int features,
                          long metadata,
+                         int length,
                          DocumentKeywords wordSet) {
 
         if (wordSet.isEmpty()) {
@@ -65,7 +56,7 @@ public class LoaderIndexJournalWriter {
             return;
         }
 
-        var header = new IndexJournalEntryHeader(combinedId, features, metadata);
+        var header = new IndexJournalEntryHeader(combinedId, features, length, metadata);
         var data = new IndexJournalEntryData(wordSet.keywords, wordSet.metadata, wordSet.positions);
 
         indexWriter.put(header, data);
diff --git a/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
index f69a891d..ab43bdd7 100644
--- a/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
@@ -75,6 +75,7 @@ public class KeywordLoaderService {
         writer.putWords(combinedId,
                 projection.htmlFeatures,
                 projection.documentMetadata,
+                projection.length,
                 words);
     }
 }
\ No newline at end of file
diff --git a/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
index 2a2cc003..be3fe0b7 100644
--- a/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
+++ b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
@@ -91,7 +91,7 @@ public class SearchServicePaperDoll extends AbstractModule {
             long positions)
     {
         results.add(new DecoratedSearchResultItem(
-                new SearchResultItem(url.hashCode(), 2, 3, false),
+                new SearchResultItem(url.hashCode(), 2, 3),
                 new EdgeUrl(url),
                 title,
                 description,