(index) Integrate positions data with indexes WIP

This change integrates the new positions data with the forward and reverse indexes. The ranking code is still only partially re-written.
2025-02-24 05:18:58 +00:00 · 2024-06-10 15:09:06 +02:00 · 2024-06-10 15:09:06 +02:00 · 36160988e2
commit 36160988e2
parent 9f982a0c3d
58 changed files with 1417 additions and 650 deletions
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryInt.java
@ -5,8 +5,8 @@ import java.util.stream.IntStream;
 /** A compiled index service query */
 public class CompiledQueryInt {
-    private final CqExpression root;
+    public final CqExpression root;
-    private final CqDataInt data;
+    public final CqDataInt data;
    public CompiledQueryInt(CqExpression root, CqDataInt data) {
        this.root = root;
@ -26,7 +26,7 @@ public class CompiledQueryInt {
        return IntStream.range(0, data.size());
    }
-    public long at(int index) {
+    public int at(int index) {
        return data.get(index);
    }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CompiledQueryParser.java
@ -61,7 +61,8 @@ public class CompiledQueryParser {
        String[] cqData = new String[wordIds.size()];
        wordIds.forEach((w, i) -> cqData[i] = w);
-        return new CompiledQuery<>(root, new CqData<>(cqData));
+
        return root.newQuery(cqData);
    }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/CqExpression.java
@ -8,6 +8,18 @@ import java.util.stream.Stream;
 *
 */
 public sealed interface CqExpression {
    /**  Create a new query for the provided data using this expression as the root */
    default <T> CompiledQuery<T> newQuery(T[] data) {
        return new CompiledQuery<>(this, data);
    }
    /**  Create a new query for the provided data using this expression as the root */
    default CompiledQueryInt newQuery(int[] data) {
        return new CompiledQueryInt(this, new CqDataInt(data));
    }
    /**  Create a new query for the provided data using this expression as the root */
    default CompiledQueryLong newQuery(long[] data) {
        return new CompiledQueryLong(this, new CqDataLong(data));
    }
    Stream<Word> stream();
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java
@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.compiled.aggregate;
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import java.util.ArrayList;
@ -36,7 +37,10 @@ public class CompiledQueryAggregates {
    public static <T> int intMaxMinAggregate(CompiledQuery<T> query, ToIntFunction<T> operator) {
        return query.root.visit(new CqIntMaxMinOperator(query, operator));
    }
-
+    /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
    public static <T> int intMaxMinAggregate(CompiledQueryInt query, IntUnaryOperator operator) {
        return query.root.visit(new CqIntMaxMinOperator(query, operator));
    }
    /** Apply the operator to each leaf node, then return the highest minimum value found along any path */
    public static int intMaxMinAggregate(CompiledQueryLong query, LongToIntFunction operator) {
        return query.root.visit(new CqIntMaxMinOperator(query, operator));
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqIntMaxMinOperator.java
@ -1,6 +1,7 @@
 package nu.marginalia.api.searchquery.model.compiled.aggregate;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.compiled.CqExpression;
@ -21,7 +22,9 @@ public class CqIntMaxMinOperator implements CqExpression.IntVisitor {
    public CqIntMaxMinOperator(CompiledQueryLong query, LongToIntFunction operator) {
        this.operator = idx -> operator.applyAsInt(query.at(idx));
    }
-
+    public CqIntMaxMinOperator(CompiledQueryInt query, IntUnaryOperator operator) {
        this.operator = idx -> operator.applyAsInt(query.at(idx));
    }
    @Override
    public int onAnd(List<? extends CqExpression> parts) {
        int value = parts.getFirst().visit(this);
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java
@ -36,6 +36,10 @@ public class SearchQuery {
    @Deprecated // why does this exist?
    private double value = 0;
    public static SearchQueryBuilder builder(String compiledQuery) {
        return new SearchQueryBuilder(compiledQuery);
    }
    public SearchQuery() {
        this.compiledQuery = "";
        this.searchTermsInclude = new ArrayList<>();
@ -81,5 +85,45 @@ public class SearchQuery {
        return sb.toString();
    }
    public static class SearchQueryBuilder {
        private final String compiledQuery;
        private List<String> searchTermsInclude = new ArrayList<>();
        private List<String> searchTermsExclude = new ArrayList<>();
        private List<String> searchTermsAdvice = new ArrayList<>();
        private List<String> searchTermsPriority = new ArrayList<>();
        private List<List<String>> searchTermCoherences = new ArrayList<>();
        private SearchQueryBuilder(String compiledQuery) {
            this.compiledQuery = compiledQuery;
        }
        public SearchQueryBuilder include(String... terms) {
            searchTermsInclude.addAll(List.of(terms));
            return this;
        }
        public SearchQueryBuilder exclude(String... terms) {
            searchTermsExclude.addAll(List.of(terms));
            return this;
        }
        public SearchQueryBuilder advice(String... terms) {
            searchTermsAdvice.addAll(List.of(terms));
            return this;
        }
        public SearchQueryBuilder priority(String... terms) {
            searchTermsPriority.addAll(List.of(terms));
            return this;
        }
        public SearchQueryBuilder coherences(String... coherences) {
            searchTermCoherences.add(List.of(coherences));
            return this;
        }
        public SearchQuery build() {
            return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
        }
    }
 }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java
@ -32,13 +32,11 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
    public SearchResultItem(long combinedId,
                            long encodedDocMetadata,
-                            int htmlFeatures,
+                            int htmlFeatures) {
                            boolean hasPrioTerm) {
        this.combinedId = combinedId;
        this.encodedDocMetadata = encodedDocMetadata;
        this.keywordScores = new ArrayList<>();
        this.htmlFeatures = htmlFeatures;
        this.hasPrioTerm = hasPrioTerm;
    }
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java
@ -83,8 +83,10 @@ public class ForwardIndexConverter {
                int ranking = domainRankings.getRanking(domainId);
                long meta = DocumentMetadata.encodeRank(pointer.documentMeta(), ranking);
                long features = pointer.documentFeatures() | ((long) pointer.documentSize() << 32L);
                docFileData.set(entryOffset + ForwardIndexParameters.METADATA_OFFSET, meta);
-                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, pointer.documentFeatures());
+                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, features);
            }
            progress.progress(TaskSteps.FORCE);
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
@ -82,9 +82,19 @@ public class ForwardIndexReader {
        long offset = idxForDoc(docId);
        if (offset < 0) return 0;
-        return (int) data.get(ENTRY_SIZE * offset + FEATURES_OFFSET);
+        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) & 0xFFFF_FFFFL);
    }
    public int getDocumentSize(long docId) {
        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
        long offset = idxForDoc(docId);
        if (offset < 0) return 0;
        return (int) (data.get(ENTRY_SIZE * offset + FEATURES_OFFSET) >>> 32L);
    }
    private int idxForDoc(long docId) {
        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
--- a/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@ -79,6 +79,7 @@ class ForwardIndexConverterTest {
        writer.put(
                new IndexJournalEntryHeader(createId(id, id/20),
                        id%3,
                        15,
                        (id % 5)),
                new IndexJournalEntryData(
                    new String[]{},
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java
@ -17,14 +17,17 @@ import nu.marginalia.model.idx.DocumentMetadata;
 */
 public record IndexJournalEntryHeader(int entrySize,
                                      int documentFeatures,
                                      int documentSize,
                                      long combinedId,
                                      long documentMeta) {
    public IndexJournalEntryHeader(long combinedId,
                                   int documentFeatures,
                                   int documentSize,
                                   long documentMeta) {
        this(-1,
                documentFeatures,
                documentSize,
                combinedId,
                documentMeta);
    }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
@ -28,12 +28,17 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
    public static IndexJournalReadEntry read(DataInputStream inputStream) throws IOException {
        final long sizeBlock = inputStream.readLong();
        final int entrySize = (int) (sizeBlock >>> 48L);
        final int docSize = (int) ((sizeBlock >>> 32L) & 0xFFFFL);
        final int docFeatures = (int) (sizeBlock & 0xFFFF_FFFFL);
        final long docId = inputStream.readLong();
        final long meta = inputStream.readLong();
        var header = new IndexJournalEntryHeader(
-                (int) (sizeBlock >>> 32L),
+                entrySize,
-                (int) (sizeBlock & 0xFFFF_FFFFL),
+                docFeatures,
                docSize,
                docId,
                meta);
@ -57,6 +62,10 @@ public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData
        return header.documentFeatures();
    }
    public int documentSize() {
        return header.documentSize();
    }
    public int domainId() {
        return UrlIdCodec.getDomainId(docId());
    }
@ -88,7 +97,7 @@ class TermDataIterator implements Iterator<IndexJournalEntryTermData> {
    public IndexJournalEntryTermData next() {
        // read the metadata for the term
        long termId = buffer.getLong();
-        long meta = buffer.getLong();
+        long meta = buffer.getShort();
        // read the size of the sequence data
        int size = buffer.get() & 0xFF;
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
@ -13,7 +13,7 @@ public interface IndexJournalReader {
    int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;
    int DOCUMENT_HEADER_SIZE_BYTES = 24;
-    int TERM_HEADER_SIZE_BYTES = 17;
+    int TERM_HEADER_SIZE_BYTES = 11;
    /** Create a reader for a single file. */
    static IndexJournalReader singleFile(Path fileName) throws IOException {
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
@ -97,6 +97,9 @@ class SingleFileJournalPointer implements IndexJournalPointer {
    @Override
    public int documentFeatures() { return entry.documentFeatures(); }
    @Override
    public int documentSize() { return entry.documentSize(); }
    /** Return an iterator over the terms in the current document.
     *  This iterator is not valid after calling nextDocument().
     */
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
@ -42,6 +42,8 @@ public interface IndexJournalPointer extends Iterable<IndexJournalEntryTermData>
     */
    int documentFeatures();
    int documentSize();
    /** Concatenate a number of journal pointers */
    static IndexJournalPointer concatenate(IndexJournalPointer... pointers) {
        if (pointers.length == 1)
@ -94,6 +96,11 @@ class JoiningJournalPointer implements IndexJournalPointer {
        return pointers[pIndex].documentFeatures();
    }
    @Override
    public int documentSize() {
        return pointers[pIndex].documentSize();
    }
    @NotNull
    @Override
    public Iterator<IndexJournalEntryTermData> iterator() {
@ -146,6 +153,12 @@ class FilteringJournalPointer implements IndexJournalPointer {
        return base.documentFeatures();
    }
    @Override
    public int documentSize() {
        return base.documentSize();
    }
    @NotNull
    @Override
    public Iterator<IndexJournalEntryTermData> iterator() {
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
@ -2,7 +2,6 @@ package nu.marginalia.index.journal.writer;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import java.io.IOException;
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
@ -81,12 +81,6 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
    public int put(IndexJournalEntryHeader header,
                   IndexJournalEntryData data)
    {
        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
            dataBuffer.flip();
            compressingStream.compress(dataBuffer);
            dataBuffer.clear();
        }
        final long[] keywords = data.termIds();
        final long[] metadata = data.metadata();
        final var positions = data.positions();
@ -94,16 +88,30 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
        int recordSize = 0; // document header size is 3 longs
        for (int i = 0; i < keywords.length; i++) {
            // term header size is 2 longs
-            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();
        }
-        dataBuffer.putInt(recordSize);
+        if (recordSize > Short.MAX_VALUE) {
            // This should never happen, but if it does, we should log it and deal with it in a way that doesn't corrupt the file
            // (32 KB is *a lot* of data for a single document, larger than the uncompressed HTML of most documents)
            logger.error("Omitting entry: Record size {} exceeds maximum representable size of {}", recordSize, Short.MAX_VALUE);
            return 0;
        }
        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
            dataBuffer.flip();
            compressingStream.compress(dataBuffer);
            dataBuffer.clear();
        }
        dataBuffer.putShort((short) recordSize);
        dataBuffer.putShort((short) Math.clamp(0, header.documentSize(), Short.MAX_VALUE));
        dataBuffer.putInt(header.documentFeatures());
        dataBuffer.putLong(header.combinedId());
        dataBuffer.putLong(header.documentMeta());
        for (int i = 0; i < keywords.length; i++) {
-            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].bufferSize();
            if (dataBuffer.capacity() - dataBuffer.position() < requiredSize) {
                dataBuffer.flip();
@ -112,8 +120,8 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
            }
            dataBuffer.putLong(keywords[i]);
-            dataBuffer.putLong(metadata[i]);
+            dataBuffer.putShort((short) metadata[i]);
-            dataBuffer.put((byte) positions[i].size());
+            dataBuffer.put((byte) positions[i].bufferSize());
            dataBuffer.put(positions[i].buffer());
        }
--- a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
@ -1,6 +1,8 @@
 package nu.marginalia.index.journal;
 import it.unimi.dsi.fastutil.ints.IntList;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongList;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -8,6 +10,11 @@ import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import nu.marginalia.index.journal.reader.IndexJournalReaderPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
 import nu.marginalia.model.EdgeUrl;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.model.idx.WordFlags;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
@ -18,8 +25,9 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Iterator;
+import java.util.*;
-import java.util.List;
+import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import static org.junit.jupiter.api.Assertions.*;
@ -52,7 +60,7 @@ public class IndexJournalWriterTest {
    public void testSingleFile() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -61,7 +69,7 @@ public class IndexJournalWriterTest {
                                gcs(2, 4, 6),
                        })
                    );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -90,6 +98,7 @@ public class IndexJournalWriterTest {
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
                assertEquals(33, ptr.documentMeta());
                assertEquals(10, ptr.documentSize());
                iter = ptr.iterator();
@ -116,6 +125,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
                assertEquals(11, ptr.documentSize());
                iter = ptr.iterator();
                // Term 1
@ -147,7 +157,7 @@ public class IndexJournalWriterTest {
    @Test
    public void testMultiFile() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -162,7 +172,7 @@ public class IndexJournalWriterTest {
        }
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile2)) {
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -191,6 +201,7 @@ public class IndexJournalWriterTest {
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
                assertEquals(33, ptr.documentMeta());
                assertEquals(10, ptr.documentSize());
                iter = ptr.iterator();
@ -217,6 +228,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
                assertEquals(11, ptr.documentSize());
                iter = ptr.iterator();
                // Term 1
@ -249,7 +261,7 @@ public class IndexJournalWriterTest {
    public void testSingleFileIterTwice() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -277,6 +289,7 @@ public class IndexJournalWriterTest {
                assertTrue(ptr.nextDocument());
                assertEquals(11, ptr.documentId());
                assertEquals(22, ptr.documentFeatures());
                assertEquals(10, ptr.documentSize());
                assertEquals(33, ptr.documentMeta());
                iter = ptr.iterator();
@ -307,7 +320,7 @@ public class IndexJournalWriterTest {
    public void testFiltered() {
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            // Write two documents with two terms each
-            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+            writer.put(new IndexJournalEntryHeader(11, 22, 10, 33),
                    new IndexJournalEntryData(
                        new String[]{"word1", "word2"},
                        new long[]{44, 55},
@ -316,7 +329,7 @@ public class IndexJournalWriterTest {
                                gcs(2, 4, 6),
                        })
                    );
-            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+            writer.put(new IndexJournalEntryHeader(12, 23, 11, 34),
                    new IndexJournalEntryData(
                            new String[]{"word1", "word2"},
                        new long[]{45, 56},
@ -344,6 +357,7 @@ public class IndexJournalWriterTest {
                assertEquals(12, ptr.documentId());
                assertEquals(23, ptr.documentFeatures());
                assertEquals(34, ptr.documentMeta());
                assertEquals(11, ptr.documentSize());
                iter = ptr.iterator();
                // Term 1
@ -364,4 +378,72 @@ public class IndexJournalWriterTest {
        }
    }
    @Test
    public void testIntegrationScenario() throws IOException {
        Map<Long, Integer> wordMap = new HashMap<>();
        for (int i = 0; i < 512; i++) {
            wordMap.put(hasher.hashKeyword(Integer.toString(i)), i);
        }
        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
            for (int idc = 1; idc < 512; idc++) {
                int id = idc;
                int[] factors = IntStream
                        .rangeClosed(1, id)
                        .filter(v -> (id % v) == 0)
                        .toArray();
                System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
                long fullId = UrlIdCodec.encodeId((32 - (id % 32)), id);
                var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
                String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
                long[] metadata = new long[factors.length];
                for (int i = 0; i < factors.length; i++) {
                    metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
                }
                GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
                ByteBuffer wa = ByteBuffer.allocate(16);
                for (int i = 0; i < factors.length; i++) {
                    positions[i] = GammaCodedSequence.generate(wa, i + 1);
                }
                writer.put(header, new IndexJournalEntryData(keywords, metadata, positions));
            }
        }
        try (var ptr = new IndexJournalReaderSingleFile(tempFile).newPointer()) {
            while (ptr.nextDocument()) {
                int ordinal = UrlIdCodec.getDocumentOrdinal(ptr.documentId());
                System.out.println(ordinal);
                var expectedFactors =
                        new LongArrayList(IntStream
                        .rangeClosed(1, ordinal)
                        .filter(v -> (ordinal % v) == 0)
                        .mapToObj(Integer::toString)
                        .mapToLong(hasher::hashKeyword)
                        .toArray());
                LongList foundIds = new LongArrayList();
                var iter = ptr.iterator();
                while (iter.hasNext()) {
                    var termData = iter.next();
                    foundIds.add(termData.termId());
                }
                if (!expectedFactors.equals(foundIds)) {
                    System.out.println("Found: ");
                    System.out.println(foundIds.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
                    System.out.println("Expected: ");
                    System.out.println(expectedFactors.stream().map(fac -> wordMap.getOrDefault(fac, -1)).map(Objects::toString).collect(Collectors.joining(",")));
                    fail();
                }
                assertEquals(expectedFactors, foundIds);
            }
        }
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java
@ -3,6 +3,8 @@ package nu.marginalia.index;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.EntrySource;
 import nu.marginalia.index.query.ReverseIndexRejectFilter;
@ -14,9 +16,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.concurrent.Executors;
 public class ReverseIndexReader {
@ -27,9 +29,16 @@ public class ReverseIndexReader {
    private final BTreeReader wordsBTreeReader;
    private final String name;
-    public ReverseIndexReader(String name, Path words, Path documents) throws IOException {
+    private final PositionsFileReader positionsFileReader;
    public ReverseIndexReader(String name,
                              Path words,
                              Path documents,
                              PositionsFileReader positionsFileReader) throws IOException {
        this.name = name;
        this.positionsFileReader = positionsFileReader;
        if (!Files.exists(words) || !Files.exists(documents)) {
            this.words = null;
            this.documents = null;
@ -133,31 +142,29 @@ public class ReverseIndexReader {
                offset);
    }
-    public long[] getTermMeta(long termId, long[] docIds) {
+    public TermData[] getTermData(Arena arena,
                                  long termId,
                                  long[] docIds)
    {
        var ret = new TermData[docIds.length];
        long offset = wordOffset(termId);
        if (offset < 0) {
            // This is likely a bug in the code, but we can't throw an exception here
            logger.debug("Missing offset for word {}", termId);
-            return new long[docIds.length];
+            return ret;
        }
        assert isUniqueAndSorted(docIds) : "The input array docIds is assumed to be unique and sorted, was " + Arrays.toString(docIds);
        var reader = createReaderNew(offset);
-        return reader.queryData(docIds, 1);
+
        // Read the size and offset of the position data
        var offsets = reader.queryData(docIds, 1);
        for (int i = 0; i < docIds.length; i++) {
            ret[i] = positionsFileReader.getTermData(arena, offsets[i]);
        }
-
+        return ret;
    private boolean isUniqueAndSorted(long[] ids) {
        if (ids.length == 0)
            return true;
        for (int i = 1; i < ids.length; i++) {
            if(ids[i] <= ids[i-1])
                return false;
        }
        return true;
    }
    public void close() {
@ -166,5 +173,14 @@ public class ReverseIndexReader {
        if (words != null)
            words.close();
        if (positionsFileReader != null) {
            try {
                positionsFileReader.close();
            } catch (IOException e) {
                logger.error("Failed to close positions file reader", e);
            }
        }
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
@ -1,5 +1,6 @@
 package nu.marginalia.index.construction;
 import nu.marginalia.index.positions.PositionCodec;
 import nu.marginalia.sequence.GammaCodedSequence;
 import java.io.IOException;
@ -38,7 +39,7 @@ public class PositionsFileConstructor implements AutoCloseable {
    /** Add a term to the positions file
     * @param termMeta the term metadata
     * @param positions the positions of the term
-     * @return the offset of the term in the file
+     * @return the offset of the term in the file, with the size of the data in the highest byte
     */
    public long add(byte termMeta, GammaCodedSequence positions) throws IOException {
        synchronized (file) {
@ -53,12 +54,20 @@ public class PositionsFileConstructor implements AutoCloseable {
            workBuffer.put(termMeta);
            workBuffer.put(positionBuffer);
            long ret = PositionCodec.encode(size, offset);
            offset += size;
-            return offset;
+
            return ret;
        }
    }
    public void close() throws IOException {
        while (workBuffer.position() < workBuffer.limit()) {
            workBuffer.flip();
            channel.write(workBuffer);
        }
        channel.force(false);
        channel.close();
    }
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@ -7,7 +7,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@ -21,12 +21,14 @@ import java.util.concurrent.TimeUnit;
 * the associated ReversePreindexWordSegments data
 */
 public class ReversePreindexDocuments {
    private static PositionsFileConstructor positionsFileConstructor;
    final Path file;
    public final LongArray documents;
    private static PositionsFileConstructor positionsFileConstructor;
    private static final int RECORD_SIZE_LONGS = 2;
    private static final Logger logger = LoggerFactory.getLogger(ReversePreindexDocuments.class);
    public final Path file;
    public ReversePreindexDocuments(LongArray documents, Path file) {
        this.documents = documents;
        this.file = file;
@ -70,22 +72,25 @@ public class ReversePreindexDocuments {
        long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
-        try (RandomFileAssembler assembly = RandomFileAssembler.create(workDir, fileSizeLongs)) {
+        try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
             var pointer = reader.newPointer())
        {
            var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
            offsetMap.defaultReturnValue(0);
            var pointer = reader.newPointer();
            while (pointer.nextDocument()) {
                long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
                for (var termData : pointer) {
                    long termId = termData.termId();
                    long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
-                    long posOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());
+
                    // write position data to the positions file and get the offset
                    long encodedPosOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());
                    assembly.put(offset + 0, rankEncodedId);
-                    assembly.put(offset + 1, posOffset);
+                    assembly.put(offset + 1, encodedPosOffset);
                }
            }
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionCodec.java
@ -0,0 +1,25 @@
 package nu.marginalia.index.positions;
 /** A utility class for encoding and decoding position data offsets,
 * the data is encoded by using the highest 16 bits to store the offset,
 * and the remaining 48 bits to store the size of the data.
 * <p></p>
 * This lets us address 256 TB of data, with up to 64 KB of position data for each term,
 * which is ample headroom for both the size of the data and the number of positions.
 * */
 public class PositionCodec {
    public static long encode(int length, long offset) {
        assert decodeSize(offset) == 0 : "Offset must be less than 2^48";
        return (long) length << 48 | offset;
    }
    public static int decodeSize(long sizeEncodedOffset) {
        return (int) ((sizeEncodedOffset & 0xFFFF_0000_0000_0000L) >>> 48);
    }
    public static long decodeOffset(long sizeEncodedOffset) {
        return sizeEncodedOffset & 0x0000_FFFF_FFFF_FFFFL;
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
@ -0,0 +1,39 @@
 package nu.marginalia.index.positions;
 import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.nio.channels.FileChannel;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 public class PositionsFileReader implements AutoCloseable {
    private final FileChannel positions;
    public PositionsFileReader(Path positionsFile) throws IOException {
        this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ);
    }
    /** Get the positions for a term in the index, as pointed out by the encoded offset;
     * intermediate buffers are allocated from the provided arena allocator. */
    public TermData getTermData(Arena arena, long sizeEncodedOffset) {
        int length = PositionCodec.decodeSize(sizeEncodedOffset);
        long offset = PositionCodec.decodeOffset(sizeEncodedOffset);
        var segment = arena.allocate(length);
        var buffer = segment.asByteBuffer();
        try {
            positions.read(buffer, offset);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return new TermData(buffer);
    }
    @Override
    public void close() throws IOException {
        positions.close();
    }
 }
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/TermData.java
@ -0,0 +1,21 @@
 package nu.marginalia.index.positions;
 import nu.marginalia.sequence.GammaCodedSequence;
 import java.nio.ByteBuffer;
 public class TermData {
    private final ByteBuffer buffer;
    public TermData(ByteBuffer buffer) {
        this.buffer = buffer;
    }
    public byte flags() {
        return buffer.get(0);
    }
    public GammaCodedSequence positions() {
        return new GammaCodedSequence(buffer, 1, buffer.capacity());
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
@ -0,0 +1,63 @@
 package nu.marginalia.index;
 import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import static org.junit.jupiter.api.Assertions.*;
 class PositionsFileReaderTest {
    Path file;
    @BeforeEach
    void setUp() throws IOException {
        file = Files.createTempFile("positions", "dat");
    }
    @AfterEach
    void tearDown() throws IOException {
        Files.delete(file);
    }
    @Test
    void getTermData() throws IOException {
        ByteBuffer workArea = ByteBuffer.allocate(8192);
        long key1, key2, key3;
        try (PositionsFileConstructor constructor = new PositionsFileConstructor(file)) {
            key1 = constructor.add((byte) 43, GammaCodedSequence.generate(workArea, 1, 2, 3));
            key2 = constructor.add((byte) 51, GammaCodedSequence.generate(workArea, 2, 3, 5, 1000, 5000, 20241));
            key3 = constructor.add((byte) 61, GammaCodedSequence.generate(workArea, 3, 5, 7));
        }
        System.out.println("key1: " + Long.toHexString(key1));
        System.out.println("key2: " + Long.toHexString(key2));
        System.out.println("key3: " + Long.toHexString(key3));
        try (Arena arena = Arena.ofConfined();
            PositionsFileReader reader = new PositionsFileReader(file))
        {
            TermData data1 = reader.getTermData(arena, key1);
            assertEquals(43, data1.flags());
            assertEquals(IntList.of( 1, 2, 3), data1.positions().values());
            TermData data2 = reader.getTermData(arena, key2);
            assertEquals(51, data2.flags());
            assertEquals(IntList.of(2, 3, 5, 1000, 5000, 20241), data2.positions().values());
            TermData data3 = reader.getTermData(arena, key3);
            assertEquals(61, data3.flags());
            assertEquals(IntList.of(3, 5, 7), data3.positions().values());
        }
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
@ -1,17 +1,19 @@
 package nu.marginalia.index;
 import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.construction.ReversePreindex;
 import nu.marginalia.index.construction.TestJournalFactory;
 import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
 import nu.marginalia.index.positions.PositionsFileReader;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
@ -47,13 +49,18 @@ class ReverseIndexReaderTest {
    public void testSimple() throws IOException {
        var indexReader = createIndex(
-                new EntryDataWithWordMeta(100, 101, wm(50, 51))
+                new EntryDataWithWordMeta(100, 101, wm(50, 51, 1, 3, 5))
        );
        assertEquals(1, indexReader.numDocuments(50));
-        long[] meta = indexReader.getTermMeta(50, new long[] { 100 });
+        var positions = indexReader.getTermData(Arena.global(), 50, new long[] { 100 });
-        assertArrayEquals(new long[] { 51 }, meta);
+
        assertEquals(1, positions.length);
        assertNotNull(positions[0]);
        assertEquals((byte) 51, positions[0].flags());
        assertEquals(IntList.of(1, 3, 5), positions[0].positions().values());
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
    }
@ -69,13 +76,8 @@ class ReverseIndexReaderTest {
        assertEquals(2, indexReader.numDocuments(51));
        assertEquals(1, indexReader.numDocuments(52));
        assertArrayEquals(new long[] { 51 }, indexReader.getTermMeta(50, new long[] { 100 }));
        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
        assertArrayEquals(new long[] { 52, 53 }, indexReader.getTermMeta(51, new long[] { 100, 101 }));
        assertArrayEquals(new long[] { 100, 101 }, readEntries(indexReader, 51));
        assertArrayEquals(new long[] { 54 }, indexReader.getTermMeta(52, new long[] { 101 }));
        assertArrayEquals(new long[] { 101 }, readEntries(indexReader, 52));
    }
@ -91,18 +93,20 @@ class ReverseIndexReaderTest {
    private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
        var reader = journalFactory.createReader(scenario);
        var preindex = ReversePreindex.constructPreindex(reader,
                Mockito.mock(PositionsFileConstructor.class),
                DocIdRewriter.identity(), tempDir);
        Path posFile = tempDir.resolve("positions.dat");
        Path docsFile = tempDir.resolve("docs.dat");
        Path wordsFile = tempDir.resolve("words.dat");
        try (var positionsFileConstructor = new PositionsFileConstructor(posFile)) {
            var preindex = ReversePreindex.constructPreindex(reader,
                    positionsFileConstructor,
                    DocIdRewriter.identity(), tempDir);
            preindex.finalizeIndex(docsFile, wordsFile);
            preindex.delete();
        }
-        return new ReverseIndexReader("test", wordsFile, docsFile);
+        return new ReverseIndexReader("test", wordsFile, docsFile, new PositionsFileReader(posFile));
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@ -155,15 +155,15 @@ class ReversePreindexDocsTest {
            if (wordId != that.wordId) return false;
            if (start != that.start) return false;
            if (end != that.end) return false;
-            return Arrays.equals(data, that.data);
+            return data[0] == that.data[0]; //Arrays.equals(data, that.data);
        }
        @Override
        public int hashCode() {
-            int result = (int) (wordId ^ (wordId >>> 32));
+            int result = Long.hashCode(wordId);
-            result = 31 * result + (int) (start ^ (start >>> 32));
+            result = 31 * result + Long.hashCode(start);
-            result = 31 * result + (int) (end ^ (end >>> 32));
+            result = 31 * result + Long.hashCode(end);
-            result = 31 * result + Arrays.hashCode(data);
+            result = 31 * result + Long.hashCode(data[0]);
            return result;
        }
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java
@ -79,9 +79,7 @@ class ReversePreindexFinalizeTest {
        assertEquals(1, wordsHeader.numEntries());
        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
    }
@ -122,9 +120,7 @@ class ReversePreindexFinalizeTest {
        long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
        assertEquals(50, wordsArray.get(wordsHeader.dataOffsetLongs()));
        assertEquals(0, wordsArray.get(wordsHeader.dataOffsetLongs() + 1));
        BTreeHeader docsHeader;
@ -133,13 +129,11 @@ class ReversePreindexFinalizeTest {
        assertEquals(1, docsHeader.numEntries());
        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(51, docsArray.get(docsHeader.dataOffsetLongs() + 1));
        docsHeader = new BTreeHeader(docsArray, offset2);
        System.out.println(docsHeader);
        assertEquals(1, docsHeader.numEntries());
        assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
        assertEquals(52, docsArray.get(docsHeader.dataOffsetLongs() + 1));
    }
 }
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
@ -8,11 +8,13 @@ import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.sequence.GammaCodedSequence;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 public class TestJournalFactory {
    Path tempDir = Files.createTempDirectory("journal");
@ -50,10 +52,10 @@ public class TestJournalFactory {
                    '}';
        }
    }
-    public record WordWithMeta(long wordId, long meta) {}
+    public record WordWithMeta(long wordId, long meta, GammaCodedSequence gcs) {}
-    public static WordWithMeta wm(long wordId, long meta) {
+    public static WordWithMeta wm(long wordId, long meta, int... positions) {
-        return new WordWithMeta(wordId, meta);
+        return new WordWithMeta(wordId, meta, GammaCodedSequence.generate(ByteBuffer.allocate(128), positions));
    }
    IndexJournalReader createReader(EntryData... entries) throws IOException {
@ -71,7 +73,7 @@ public class TestJournalFactory {
                positions[i] = new GammaCodedSequence(new byte[1]);
            }
-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
@ -91,10 +93,10 @@ public class TestJournalFactory {
            for (int i = 0; i < entry.wordIds.length; i++) {
                termIds[i] = entry.wordIds[i].wordId;
                meta[i] = entry.wordIds[i].meta;
-                positions[i] = new GammaCodedSequence(new byte[1]);
+                positions[i] = Objects.requireNonNullElseGet(entry.wordIds[i].gcs, () -> new GammaCodedSequence(new byte[1]));
            }
-            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
+            writer.put(new IndexJournalEntryHeader(entries.length, 0, 15, entry.docId, entry.docMeta),
                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
--- a/code/index/java/nu/marginalia/index/IndexFactory.java
+++ b/code/index/java/nu/marginalia/index/IndexFactory.java
@ -4,11 +4,10 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.index.CombinedIndexReader;
 import nu.marginalia.index.positions.PositionsFileReader;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.forward.ForwardIndexReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
@ -40,17 +39,18 @@ public class IndexFactory {
    }
    public ReverseIndexReader getReverseIndexReader() throws IOException {
        return new ReverseIndexReader("full",
                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.CURRENT),
-                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT)
+                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT),
                new PositionsFileReader(ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.CURRENT))
        );
    }
    public ReverseIndexReader getReverseIndexPrioReader() throws IOException {
        return new ReverseIndexReader("prio",
                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
-                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT)
+                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
                null
        );
    }
--- a/code/index/java/nu/marginalia/index/IndexGrpcService.java
+++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java
@ -281,10 +281,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
            awaitCompletion();
            // Return the best results
-            return new SearchResultSet(
+            return new SearchResultSet(resultValuator.selectBestResults(parameters, resultHeap));
                    resultValuator.selectBestResults(parameters,
                            resultRankingContext,
                            resultHeap));
        }
        /** Wait for all tasks to complete */
--- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
+++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
@ -14,12 +14,13 @@ import nu.marginalia.index.query.IndexQueryBuilder;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
 import nu.marginalia.index.query.limit.SpecificationLimitType;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.lang.foreign.Arena;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collections;
@ -169,8 +170,11 @@ public class CombinedIndexReader {
    }
    /** Retrieves the term metadata for the specified word for the provided documents */
-    public DocMetadataList getMetadata(long wordId, CombinedDocIdList docIds) {
+    public TermMetadataList getTermMetadata(Arena arena,
-        return new DocMetadataList(reverseIndexFullReader.getTermMeta(wordId, docIds.array()));
+                                            long wordId,
                                            CombinedDocIdList docIds)
    {
        return new TermMetadataList(reverseIndexFullReader.getTermData(arena, wordId, docIds.array()));
    }
    /** Retrieves the document metadata for the specified document */
@ -186,8 +190,12 @@ public class CombinedIndexReader {
    /** Retrieves the HTML features for the specified document */
    public int getHtmlFeatures(long docId) {
        return forwardIndexReader.getHtmlFeatures(docId);
    }    /** Retrieves the HTML features for the specified document */
    public int getDocumentSize(long docId) {
        return forwardIndexReader.getDocumentSize(docId);
    }
    /** Close the indexes (this is not done immediately)
     * */
    public void close() throws InterruptedException {
--- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java
@ -10,12 +10,13 @@ import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchTermsUtil;
 import nu.marginalia.index.results.model.QuerySearchTerms;
 import nu.marginalia.index.results.model.TermCoherenceGroupList;
 import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.results.model.ids.TermMetadataList;
 import nu.marginalia.index.results.model.ids.TermIdList;
 import java.lang.foreign.Arena;
 import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoherenceGroup;
 import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata;
 public class IndexMetadataService {
    private final StatefulIndex statefulIndex;
@ -25,22 +26,19 @@ public class IndexMetadataService {
        this.statefulIndex = index;
    }
-    public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll,
+    public Long2ObjectArrayMap<TermMetadataList>
-                                                                          TermIdList termIdsList)
+        getTermMetadataForDocuments(Arena arena, CombinedDocIdList combinedIdsAll, TermIdList termIdsList)
    {
        var currentIndex = statefulIndex.get();
-        Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta =
+        Long2ObjectArrayMap<TermMetadataList> termdocToMeta =
                new Long2ObjectArrayMap<>(termIdsList.size());
        for (long termId : termIdsList.array()) {
-            var metadata = currentIndex.getMetadata(termId, combinedIdsAll);
+            termdocToMeta.put(termId, currentIndex.getTermMetadata(arena, termId, combinedIdsAll));
            termdocToMeta.put(termId,
                    new DocumentsWithMetadata(combinedIdsAll, metadata));
        }
-        return new TermMetadataForCombinedDocumentIds(termdocToMeta);
+        return termdocToMeta;
    }
    public QuerySearchTerms getSearchTerms(CompiledQuery<String> compiledQuery, SearchQuery searchQuery) {
--- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java
@ -1,25 +1,22 @@
 package nu.marginalia.index.results;
 import nu.marginalia.api.searchquery.model.compiled.*;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.SearchResultItem;
 import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
 import nu.marginalia.index.index.CombinedIndexReader;
 import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.model.SearchParameters;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.model.QueryParams;
 import nu.marginalia.index.results.model.QuerySearchTerms;
 import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.WordFlags;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.index.query.limit.QueryStrategy;
 import nu.marginalia.ranking.results.ResultValuator;
 import nu.marginalia.sequence.GammaCodedSequence;
 import javax.annotation.Nullable;
-import java.util.List;
+
 import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.*;
 /** This class is responsible for calculating the score of a search result.
 * It holds the data required to perform the scoring, as there is strong
@ -28,94 +25,74 @@ public class IndexResultValuationContext {
    private final CombinedIndexReader index;
    private final QueryParams queryParams;
    private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds;
    private final QuerySearchTerms searchTerms;
    private final ResultRankingContext rankingContext;
    private final ResultValuator searchResultValuator;
    private final CompiledQuery<String> compiledQuery;
    private final CompiledQueryLong compiledQueryIds;
-    public IndexResultValuationContext(IndexMetadataService metadataService,
+    public IndexResultValuationContext(ResultValuator searchResultValuator,
                                       ResultValuator searchResultValuator,
                                       CombinedDocIdList ids,
                                       StatefulIndex statefulIndex,
                                       ResultRankingContext rankingContext,
-                                       SearchParameters params
+                                       SearchParameters params)
-                               ) {
+    {
        this.index = statefulIndex.get();
        this.rankingContext = rankingContext;
        this.searchResultValuator = searchResultValuator;
        this.queryParams = params.queryParams;
        this.compiledQuery = params.compiledQuery;
        this.compiledQueryIds = params.compiledQueryIds;
        this.searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
        this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids,
                searchTerms.termIdsAll);
    }
-    private final long flagsFilterMask =
+    private final long flagsFilterMask = WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();
            WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit() | WordFlags.ExternalLink.asBit();
    @Nullable
-    public SearchResultItem calculatePreliminaryScore(long combinedId) {
+    public SearchResultItem calculatePreliminaryScore(long combinedId,
                                                      QuerySearchTerms searchTerms,
                                                      long[] wordFlags,
                                                      GammaCodedSequence[] positions)
    {
        // FIXME: Reconsider coherence logic with the new position data
 //        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
 //            return null;
        CompiledQuery<GammaCodedSequence> positionsQuery = compiledQuery.root.newQuery(positions);
        CompiledQueryLong wordFlagsQuery = compiledQuery.root.newQuery(wordFlags);
        int[] counts = new int[compiledQuery.size()];
        for (int i = 0; i < counts.length; i++) {
            if (positions[i] != null) {
                counts[i] = positions[i].valueCount();
            }
        }
        CompiledQueryInt positionsCountQuery = compiledQuery.root.newQuery(counts);
        // If the document is not relevant to the query, abort early to reduce allocations and
        // avoid unnecessary calculations
        if (testRelevance(wordFlagsQuery, positionsCountQuery)) {
            return null;
        }
        long docId = UrlIdCodec.removeRank(combinedId);
        if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId))
            return null;
        long docMetadata = index.getDocumentMetadata(docId);
        int htmlFeatures = index.getHtmlFeatures(docId);
-
+        int docSize = index.getDocumentSize(docId);
        SearchResultItem searchResult = new SearchResultItem(docId,
                docMetadata,
                htmlFeatures,
                hasPrioTerm(combinedId));
        long[] wordMetas = new long[compiledQuery.size()];
        SearchResultKeywordScore[] scores = new SearchResultKeywordScore[compiledQuery.size()];
        for (int i = 0; i < wordMetas.length; i++) {
            final long termId = compiledQueryIds.at(i);
            final String term = compiledQuery.at(i);
            wordMetas[i] = termMetadataForCombinedDocumentIds.getTermMetadata(termId, combinedId);
            scores[i] = new SearchResultKeywordScore(term, termId, wordMetas[i]);
        }
        // DANGER: IndexResultValuatorService assumes that searchResult.keywordScores has this specific order, as it needs
        // to be able to re-construct its own CompiledQuery<SearchResultKeywordScore> for re-ranking the results.  This is
        // a very flimsy assumption.
        searchResult.keywordScores.addAll(List.of(scores));
        CompiledQueryLong wordMetasQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
        boolean allSynthetic = CompiledQueryAggregates.booleanAggregate(wordMetasQuery, WordFlags.Synthetic::isPresent);
        int flagsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(wordMeta & flagsFilterMask));
        int positionsCount = CompiledQueryAggregates.intMaxMinAggregate(wordMetasQuery, wordMeta -> Long.bitCount(WordMetadata.decodePositions(wordMeta)));
        if (!meetsQueryStrategyRequirements(wordMetasQuery, queryParams.queryStrategy())) {
            return null;
        }
        if (flagsCount == 0 && !allSynthetic && positionsCount == 0)
            return null;
        double score = searchResultValuator.calculateSearchResultValue(
-                wordMetasQuery,
+                wordFlagsQuery,
                positionsCountQuery,
                positionsQuery,
                docMetadata,
                htmlFeatures,
-                5000, // use a dummy value here as it's not present in the index
+                docSize,
                rankingContext,
                null);
-        if (searchResult.hasPrioTerm) {
+        SearchResultItem searchResult = new SearchResultItem(docId,
                docMetadata,
                htmlFeatures);
        if (hasPrioTerm(searchTerms, positions)) {
            score = 0.75 * score;
        }
@ -124,12 +101,31 @@ public class IndexResultValuationContext {
        return searchResult;
    }
-    private boolean hasPrioTerm(long combinedId) {
+    private boolean testRelevance(CompiledQueryLong wordFlagsQuery, CompiledQueryInt countsQuery) {
-        for (var term : searchTerms.termIdsPrio.array()) {
+        boolean allSynthetic = booleanAggregate(wordFlagsQuery, WordFlags.Synthetic::isPresent);
-            if (termMetadataForCombinedDocumentIds.hasTermMeta(term, combinedId)) {
+        int flagsCount = intMaxMinAggregate(wordFlagsQuery, flags ->  Long.bitCount(flags & flagsFilterMask));
        int positionsCount = intMaxMinAggregate(countsQuery, p -> p);
        if (!meetsQueryStrategyRequirements(wordFlagsQuery, queryParams.queryStrategy())) {
            return true;
        }
        if (flagsCount == 0 && !allSynthetic && positionsCount == 0) {
            return true;
        }
        return false;
    }
    private boolean hasPrioTerm(QuerySearchTerms searchTerms, GammaCodedSequence[] positions) {
        var allTerms = searchTerms.termIdsAll;
        var prioTerms = searchTerms.termIdsPrio;
        for (int i = 0; i < allTerms.size(); i++) {
            if (positions[i] != null && prioTerms.contains(allTerms.at(i))) {
                return true;
            }
        }
        return false;
    }
@ -142,7 +138,7 @@ public class IndexResultValuationContext {
            return true;
        }
-        return CompiledQueryAggregates.booleanAggregate(queryGraphScores,
+        return booleanAggregate(queryGraphScores,
                docs -> meetsQueryStrategyRequirements(docs, queryParams.queryStrategy()));
    }
--- a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
+++ b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java
@ -7,8 +7,6 @@ import gnu.trove.list.array.TLongArrayList;
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
 import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
@ -21,12 +19,13 @@ import nu.marginalia.linkdb.docs.DocumentDbReader;
 import nu.marginalia.linkdb.model.DocdbUrlDetail;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.ranking.results.ResultValuator;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.lang.foreign.Arena;
 import java.sql.SQLException;
 import java.util.*;
 import java.util.function.Consumer;
@Singleton
 public class IndexResultValuatorService {
@ -53,12 +52,42 @@ public class IndexResultValuatorService {
                                              ResultRankingContext rankingContext,
                                              CombinedDocIdList resultIds)
    {
-        final var evaluator = createValuationContext(params, rankingContext, resultIds);
+        IndexResultValuationContext evaluator =
                new IndexResultValuationContext(resultValuator, statefulIndex, rankingContext, params);
        List<SearchResultItem> results = new ArrayList<>(resultIds.size());
-        for (long id : resultIds.array()) {
+        try (var arena = Arena.ofConfined()) {
-            var score = evaluator.calculatePreliminaryScore(id);
+            // Batch-fetch the word metadata for the documents
            var searchTerms = metadataService.getSearchTerms(params.compiledQuery, params.query);
            var termsForDocs = metadataService.getTermMetadataForDocuments(arena, resultIds, searchTerms.termIdsAll);
            // Prepare data for the document.  We do this outside of the calculation function to avoid
            // hash lookups in the inner loop, as it's very hot code and we don't want thrashing in there;
            // out here we can rely on implicit array ordering to match up the data.
            var ra = resultIds.array();
            long[] flags = new long[searchTerms.termIdsAll.size()];
            GammaCodedSequence[] positions = new GammaCodedSequence[searchTerms.termIdsAll.size()];
            for (int i = 0; i < ra.length; i++) {
                long id = ra[i];
                // Prepare term-level data for the document
                for (int ti = 0; ti < flags.length; ti++) {
                    long tid = searchTerms.termIdsAll.at(ti);
                    var tfd = termsForDocs.get(tid);
                    assert tfd != null : "No term data for term " + ti;
                    flags[ti] = tfd.flag(i);
                    positions[ti] = tfd.position(i);
                }
                // Calculate the preliminary score
                var score = evaluator.calculatePreliminaryScore(id, searchTerms, flags, positions);
                if (score != null) {
                    results.add(score);
                }
@ -66,22 +95,10 @@ public class IndexResultValuatorService {
            return results;
        }
    private IndexResultValuationContext createValuationContext(SearchParameters params,
                                                               ResultRankingContext rankingContext,
                                                               CombinedDocIdList resultIds)
    {
        return new IndexResultValuationContext(metadataService,
                resultValuator,
                resultIds,
                statefulIndex,
                rankingContext,
                params);
    }
    public List<DecoratedSearchResultItem> selectBestResults(SearchParameters params,
                                                     ResultRankingContext rankingContext,
                                                     Collection<SearchResultItem> results) throws SQLException {
        var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain);
@ -101,14 +118,13 @@ public class IndexResultValuatorService {
            item.resultsFromDomain = domainCountFilter.getCount(item);
        }
-        return decorateAndRerank(resultsList, params.compiledQuery, rankingContext);
+        return decorateResults(resultsList, params.compiledQuery);
    }
    /** Decorate the result items with additional information from the link database
     * and calculate an updated ranking with the additional information */
-    public List<DecoratedSearchResultItem> decorateAndRerank(List<SearchResultItem> rawResults,
+    public List<DecoratedSearchResultItem> decorateResults(List<SearchResultItem> rawResults,
-                                                             CompiledQuery<String> compiledQuery,
+                                                           CompiledQuery<String> compiledQuery)
                                                             ResultRankingContext rankingContext)
            throws SQLException
    {
        TLongList idsList = new TLongArrayList(rawResults.size());
@ -131,42 +147,18 @@ public class IndexResultValuatorService {
                continue;
            }
            // Reconstruct the compiledquery for re-valuation
            //
            // CAVEAT:  This hinges on a very fragile that IndexResultValuationContext puts them in the same
            // order as the data for the CompiledQuery<String>.
            long[] wordMetas = new long[compiledQuery.size()];
            for (int i = 0; i < compiledQuery.size(); i++) {
                var score = result.keywordScores.get(i);
                wordMetas[i] = score.encodedWordMetadata();
            }
            CompiledQueryLong metaQuery = new CompiledQueryLong(compiledQuery.root, new CqDataLong(wordMetas));
            resultItems.add(createCombinedItem(
                    result,
-                    docData,
+                    docData));
                    metaQuery,
                    rankingContext));
        }
        return resultItems;
    }
    private DecoratedSearchResultItem createCombinedItem(SearchResultItem result,
-                                                         DocdbUrlDetail docData,
+                                                         DocdbUrlDetail docData) {
                                                         CompiledQueryLong wordMetas,
                                                         ResultRankingContext rankingContext) {
        ResultRankingDetailsExtractor detailsExtractor = new ResultRankingDetailsExtractor();
-        Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
+       //  Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
        double score = resultValuator.calculateSearchResultValue(wordMetas,
                result.encodedDocMetadata,
                result.htmlFeatures,
                docData.wordsTotal(),
                rankingContext,
                detailConsumer);
        return new DecoratedSearchResultItem(
                result,
@ -179,8 +171,8 @@ public class IndexResultValuatorService {
                docData.pubYear(),
                docData.dataHash(),
                docData.wordsTotal(),
-                bestPositions(wordMetas),
+                0L, //bestPositions(wordMetas),
-                score,
+                result.getScore(),
                detailsExtractor.get()
        );
    }
--- a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
+++ b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java
@ -1,26 +1,38 @@
 package nu.marginalia.index.results.model;
 import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
 import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap;
 import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
 import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.DocMetadataList;
+import nu.marginalia.index.results.model.ids.TermMetadataList;
-import org.slf4j.Logger;
+import nu.marginalia.sequence.GammaCodedSequence;
-import org.slf4j.LoggerFactory;
+
 import javax.annotation.Nullable;
 public class TermMetadataForCombinedDocumentIds {
    private static final Logger logger = LoggerFactory.getLogger(TermMetadataForCombinedDocumentIds.class);
    private final Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta;
    public TermMetadataForCombinedDocumentIds(Long2ObjectArrayMap<DocumentsWithMetadata> termdocToMeta) {
        this.termdocToMeta = termdocToMeta;
    }
-    public long getTermMetadata(long termId, long combinedId) {
+    public byte getTermMetadata(long termId, long combinedId) {
        var metaByCombinedId = termdocToMeta.get(termId);
        if (metaByCombinedId == null) {
            return 0;
        }
-        return metaByCombinedId.get(combinedId);
+        return metaByCombinedId.get(combinedId).flags();
    }
    @Nullable
    public GammaCodedSequence getPositions(long termId, long combinedId) {
        var metaByCombinedId = termdocToMeta.get(termId);
        if (metaByCombinedId == null) {
            return null;
        }
        return metaByCombinedId.get(combinedId).positions();
    }
    public boolean hasTermMeta(long termId, long combinedId) {
@ -30,16 +42,25 @@ public class TermMetadataForCombinedDocumentIds {
            return false;
        }
-        return metaByCombinedId.get(combinedId) != 0;
+        return metaByCombinedId.data().containsKey(combinedId);
    }
-    public record DocumentsWithMetadata(Long2LongOpenHashMap data) {
+    public record DocumentsWithMetadata(Long2ObjectOpenHashMap<TermData> data) {
-        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, DocMetadataList metadata) {
+        public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, TermMetadataList metadata) {
-            this(new Long2LongOpenHashMap(combinedDocIdsAll.array(), metadata.array()));
+            this(new Long2ObjectOpenHashMap<>(combinedDocIdsAll.size()));
            long[] ids = combinedDocIdsAll.array();
            TermData[] data = metadata.array();
            for (int i = 0; i < combinedDocIdsAll.size(); i++) {
                if (data[i] != null) {
                    this.data.put(ids[i], data[i]);
                }
            }
        }
-        public long get(long combinedId) {
+        public TermData get(long combinedId) {
-            return data.getOrDefault(combinedId, 0);
+            return data.get(combinedId);
        }
    }
 }
--- a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
@ -15,6 +15,10 @@ import java.util.stream.LongStream;
 public final class CombinedDocIdList {
    private final long[] data;
    public CombinedDocIdList(long... data) {
        this.data = Arrays.copyOf(data, data.length);
    }
    public CombinedDocIdList(LongArrayList data) {
        this.data = data.toLongArray();
    }
--- a/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java
@ -1,45 +0,0 @@
 package nu.marginalia.index.results.model.ids;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import java.util.Arrays;
 import java.util.Objects;
 import java.util.stream.LongStream;
 public final class DocMetadataList {
    private final long[] array;
    public DocMetadataList(long[] array) {
        this.array = array;
    }
    public DocMetadataList(LongArrayList list) {
        this(list.toLongArray());
    }
    public int size() {
        return array.length;
    }
    public LongStream stream() {
        return LongStream.of(array);
    }
    public long[] array() {
        return array;
    }
    @Override
    public boolean equals(Object obj) {
        if (obj == this) return true;
        if (obj == null || obj.getClass() != this.getClass()) return false;
        var that = (DocMetadataList) obj;
        return Arrays.equals(this.array, that.array);
    }
    @Override
    public int hashCode() {
        return Arrays.hashCode(array);
    }
 }
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
@ -11,6 +11,7 @@ public final class TermIdList {
    public TermIdList(long[] array) {
        this.array = array;
        Arrays.sort(this.array);
    }
    public TermIdList(LongArrayList list) {
@ -29,6 +30,15 @@ public final class TermIdList {
        return array;
    }
    public long at(int i) {
        return array[i];
    }
    public boolean contains(long id) {
        // Implicitly sorted
        return Arrays.binarySearch(array, id) >= 0;
    }
    @Override
    public boolean equals(Object obj) {
        if (obj == this) return true;
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermMetadataList.java
@ -0,0 +1,55 @@
 package nu.marginalia.index.results.model.ids;
 import nu.marginalia.index.positions.TermData;
 import nu.marginalia.sequence.GammaCodedSequence;
 import javax.annotation.Nullable;
 import java.util.Arrays;
 public final class TermMetadataList {
    private final TermData[] array;
    public TermMetadataList(TermData[] array) {
        this.array = array;
    }
    public int size() {
        return array.length;
    }
    public long flag(int i) {
        if (array[i] == null)
            return 0;
        return array[i].flags();
    }
    /** Returns the position data for the given document index,
     * may be null if the term is not in the document
     */
    @Nullable
    public GammaCodedSequence position(int i) {
        if (array[i] == null)
            return null;
        return array[i].positions();
    }
    public TermData[] array() {
        return array;
    }
    @Override
    public boolean equals(Object obj) {
        if (obj == this) return true;
        if (obj == null || obj.getClass() != this.getClass()) return false;
        var that = (TermMetadataList) obj;
        return Arrays.equals(this.array, that.array);
    }
    @Override
    public int hashCode() {
        return Arrays.hashCode(array);
    }
 }
--- a/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
+++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java
@ -1,5 +1,7 @@
 package nu.marginalia.ranking.results;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryInt;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
@ -14,6 +16,7 @@ import nu.marginalia.ranking.results.factors.*;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.sequence.GammaCodedSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -33,15 +36,15 @@ public class ResultValuator {
        this.termCoherenceFactor = termCoherenceFactor;
    }
-    public double calculateSearchResultValue(CompiledQueryLong wordMeta,
+    public double calculateSearchResultValue(CompiledQueryLong wordFlagsQuery,
-                                             long documentMetadata,
+                                             CompiledQueryInt positionsCountQuery, CompiledQuery<GammaCodedSequence> positionsQuery, long documentMetadata,
                                             int features,
                                             int length,
                                             ResultRankingContext ctx,
                                             @Nullable Consumer<ResultRankingDetails> detailsConsumer
                                             )
    {
-        if (wordMeta.isEmpty())
+        if (wordFlagsQuery.isEmpty())
            return Double.MAX_VALUE;
        if (length < 0) {
@ -82,12 +85,11 @@ public class ResultValuator {
                           + temporalBias
                           + flagsPenalty;
-        double tcfOverlap = rankingParams.tcfOverlapWeight * termCoherenceFactor.calculateOverlap(wordMeta);
+        // FIXME: need a weighting factor here
-        double tcfJaccard = rankingParams.tcfJaccardWeight * termCoherenceFactor.calculateAvgMutualJaccard(wordMeta, ctx);
+        double tcfAvgDist = 25. / termCoherenceFactor.calculateAvgMinDistance(positionsQuery, ctx);
-        double bM25F = rankingParams.bm25FullWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forRegular(rankingParams.fullParams, wordMeta.data, length, ctx));
+        double bM25F = rankingParams.bm25FullWeight * wordFlagsQuery.root.visit(new Bm25FullGraphVisitor(rankingParams.fullParams, positionsCountQuery.data, length, ctx));
-        double bM25N = rankingParams.bm25NgramWeight * wordMeta.root.visit(Bm25FullGraphVisitor.forNgrams(rankingParams.fullParams, wordMeta.data, length, ctx));
+        double bM25P = rankingParams.bm25PrioWeight * wordFlagsQuery.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordFlagsQuery.data, ctx));
        double bM25P = rankingParams.bm25PrioWeight * wordMeta.root.visit(new Bm25PrioGraphVisitor(rankingParams.prioParams, wordMeta.data, ctx));
        double overallPartPositive = Math.max(0, overallPart);
        double overallPartNegative = -Math.min(0, overallPart);
@ -112,10 +114,10 @@ public class ResultValuator {
                            temporalBias,
                            flagsPenalty,
                            overallPart,
-                            tcfOverlap,
+                            0,
-                            tcfJaccard,
+                            0,
                            bM25F,
-                            bM25N,
+                            0, // FIXME: Remove from model
                            bM25P)
            );
@ -125,8 +127,8 @@ public class ResultValuator {
        // Renormalize to 0...15, where 0 is the best possible score;
        // this is a historical artifact of the original ranking function
        double ret = normalize(
-                      tcfOverlap + tcfJaccard
+                      tcfAvgDist
-                      + bM25F + bM25P + bM25N
+                      + bM25F + bM25P
                      + overallPartPositive,
                overallPartNegative);
--- a/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/Bm25FullGraphVisitor.java
@ -13,7 +13,7 @@ import java.util.List;
 public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
    private static final long AVG_LENGTH = 5000;
-    private final CqDataLong wordMetaData;
+    private final CqDataInt counts;
    private final CqDataInt frequencies;
    private final Bm25Parameters bm25Parameters;
@ -22,31 +22,16 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
    private final BitSet mask;
-    private Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
+    public Bm25FullGraphVisitor(Bm25Parameters bm25Parameters,
-                                CqDataLong wordMetaData,
+                                CqDataInt counts,
                                int length,
                                BitSet mask,
                                ResultRankingContext ctx) {
        this.length = length;
        this.bm25Parameters = bm25Parameters;
        this.docCount = ctx.termFreqDocCount();
-        this.wordMetaData = wordMetaData;
+        this.counts = counts;
        this.frequencies = ctx.fullCounts;
-        this.mask = mask;
+        this.mask = ctx.regularMask;
    }
    public static Bm25FullGraphVisitor forRegular(Bm25Parameters bm25Parameters,
                                                  CqDataLong wordMetaData,
                                                  int length,
                                                  ResultRankingContext ctx) {
        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.regularMask, ctx);
    }
    public static Bm25FullGraphVisitor forNgrams(Bm25Parameters bm25Parameters,
                                                 CqDataLong wordMetaData,
                                                 int length,
                                                 ResultRankingContext ctx) {
        return new Bm25FullGraphVisitor(bm25Parameters, wordMetaData, length, ctx.ngramsMask, ctx);
    }
    @Override
@ -73,7 +58,7 @@ public class Bm25FullGraphVisitor implements CqExpression.DoubleVisitor {
            return 0;
        }
-        double count = Long.bitCount(WordMetadata.decodePositions(wordMetaData.get(idx)));
+        double count = counts.get(idx);
        int freq = frequencies.get(idx);
--- a/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
+++ b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java
@ -1,66 +1,44 @@
 package nu.marginalia.ranking.results.factors;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.sequence.SequenceOperations;
 /** Rewards documents where terms appear frequently within the same sentences
 */
 public class TermCoherenceFactor {
-    /** Calculate a factor that rewards the best total position overlap
+    public double calculateAvgMinDistance(CompiledQuery<GammaCodedSequence> positions, ResultRankingContext ctx) {
     * between the terms in the query.  This is high when all the terms
     * found in the same sentences.
     */
    public double calculateOverlap(CompiledQueryLong wordMetadataQuery) {
        if (wordMetadataQuery.size() < 2)
            return 0;
        long mask = CompiledQueryAggregates.longBitmaskAggregate(wordMetadataQuery,
                score -> score >>> WordMetadata.POSITIONS_SHIFT);
        return bitsSetFactor(mask);
    }
    /** Calculate a factor that rewards the best average mutual Jaccard index
     * between the terms in the query.  This is high when the several terms are frequently
     * found in the same sentences.
     */
    public double calculateAvgMutualJaccard(CompiledQueryLong wordMetadataQuery, ResultRankingContext ctx) {
        double sum = 0;
        int cnt = 0;
-        for (int i = 0; i < wordMetadataQuery.size(); i++) {
+        for (int i = 0; i < positions.size(); i++) {
            // Skip terms that are not in the regular mask
            if (!ctx.regularMask.get(i))
                continue;
-            long imask = WordMetadata.decodePositions(wordMetadataQuery.at(i));
+            var posi = positions.at(i);
            // Skip terms that are not in the document
-            if (imask == 0L)
+            if (posi == null)
                continue;
-            for (int j = i + 1; j < wordMetadataQuery.size(); j++) {
+            for (int j = i + 1; j < positions.size(); j++) {
                // Skip terms that are not in the regular mask
                if (!ctx.regularMask.get(j))
                    continue;
-                long jmask = WordMetadata.decodePositions(wordMetadataQuery.at(j));
+                var posj = positions.at(j);
                // Skip terms that are not in the document
-                if (jmask == 0L)
+                if (posj == null)
                    continue;
-                long quot = Long.bitCount(imask & jmask);
+                int distance = SequenceOperations.minDistance(posi.iterator(), posj.iterator());
-                long rem = Long.bitCount(imask | jmask);
+                sum += distance;
                // rem is always > 0 because imask and jmask are not both 0
                sum += quot/(double) rem;
                cnt++;
            }
        }
@ -68,15 +46,8 @@ public class TermCoherenceFactor {
        if (cnt > 0) {
            return sum / cnt;
        } else {
-            return 0;
+            return 1000.;
        }
    }
    double bitsSetFactor(long mask) {
        final int bitsSetInMask = Long.bitCount(mask);
        return Math.pow(bitsSetInMask/(double) WordMetadata.POSITIONS_COUNT, 0.25);
    }
 }
--- a/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
+++ b/code/index/test/nu/marginalia/index/CombinedIndexReaderTest.java
@ -0,0 +1,382 @@
 package nu.marginalia.index;
 import com.google.inject.Guice;
 import com.google.inject.Inject;
 import it.unimi.dsi.fastutil.ints.IntList;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongList;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.domainrankings.DomainRankings;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
 import nu.marginalia.index.index.CombinedIndexReader;
 import nu.marginalia.index.index.StatefulIndex;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.index.positions.TermData;
 import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.linkdb.docs.DocumentDbReader;
 import nu.marginalia.linkdb.docs.DocumentDbWriter;
 import nu.marginalia.linkdb.model.DocdbUrlDetail;
 import nu.marginalia.model.EdgeUrl;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentFlags;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.model.idx.WordFlags;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.service.control.ServiceHeartbeat;
 import nu.marginalia.service.server.Initialization;
 import nu.marginalia.storage.FileStorageService;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.parallel.Execution;
 import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.sql.SQLException;
 import java.util.*;
 import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
@Execution(SAME_THREAD)
 public class CombinedIndexReaderTest {
    @Inject
    Initialization initialization;
    IndexQueryServiceIntegrationTestModule testModule;
    @Inject
    StatefulIndex statefulIndex;
    @Inject
    IndexJournalWriter indexJournalWriter;
    @Inject
    FileStorageService fileStorageService;
    @Inject
    DomainRankings domainRankings;
    @Inject
    ProcessHeartbeat processHeartbeat;
    @Inject
    DocumentDbReader documentDbReader;
    @Inject
    IndexFactory indexFactory;
    @BeforeEach
    public void setUp() throws IOException {
        testModule = new IndexQueryServiceIntegrationTestModule();
        Guice.createInjector(testModule).injectMembers(this);
        initialization.setReady();
    }
    @AfterEach
    public void tearDown() throws IOException {
        testModule.cleanUp();
    }
    private final MockDocumentMeta anyMetadata = new MockDocumentMeta(0, new DocumentMetadata(2, 0, 14, EnumSet.noneOf(DocumentFlags.class)));
    @Test
    public void testSimpleRetrieval() throws Exception {
        new MockData().add(
                d(1, 1),
                anyMetadata,
                w("hello", WordFlags.Title, 33, 55),
                w("world", WordFlags.Subjects, 34)
        ).load();
        var reader = indexFactory.getCombinedIndexReader();
        var query = reader.findFullWord(kw("hello")).build();
        var buffer = new LongQueryBuffer(32);
        query.getMoreResults(buffer);
        assertEquals(
                List.of(d(1, 1)),
                decode(buffer)
        );
        var helloMeta = td(reader, kw("hello"), d(1, 1));
        assertEquals(helloMeta.flags(), WordFlags.Title.asBit());
        assertEquals(IntList.of(33, 55), helloMeta.positions().values());
        var worldMeta = td(reader, kw("world"), d(1, 1));
        assertEquals(worldMeta.flags(), WordFlags.Subjects.asBit());
        assertEquals(IntList.of(34), worldMeta.positions().values());
    }
    TermData td(CombinedIndexReader reader, long wordId, MockDataDocument docId) {
        return (reader.getTermMetadata(Arena.global(), wordId, new CombinedDocIdList(docId.docId())).array())[0];
    }
    @Test
    public void testUnionRetrieval() throws Exception {
        new MockData()
                .add(
                        d(1, 1),
                        anyMetadata,
                        w("hello", WordFlags.Title),
                        w("world", WordFlags.Title)
                )
                .add(
                        d(1, 2),
                        anyMetadata,
                        w("world", WordFlags.Title)
                )
                .add(
                        d(1, 3),
                        anyMetadata,
                        w("world", WordFlags.Title)
                )
                .add(
                        d(2, 4),
                        anyMetadata,
                        w("hello", WordFlags.Title),
                        w("world", WordFlags.Title)
                )
                .load();
        var reader = indexFactory.getCombinedIndexReader();
        var query = reader
                .findFullWord(kw("hello"))
                .also(kw("world"))
                .build();
        var buffer = new LongQueryBuffer(32);
        query.getMoreResults(buffer);
        assertEquals(
                List.of(d(1, 1), d(2, 4)),
                decode(buffer)
        );
    }
    @Test
    public void testNotFilterRetrieval() throws Exception {
        new MockData()
                .add(
                        d(1, 1),
                        anyMetadata,
                        w("hello", WordFlags.Title),
                        w("world", WordFlags.Title),
                        w("goodbye", WordFlags.Title)
                )
                .add(
                        d(1, 2),
                        anyMetadata,
                        w("world", WordFlags.Title)
                )
                .add(
                        d(1, 3),
                        anyMetadata,
                        w("world", WordFlags.Title)
                )
                .add(
                        d(2, 4),
                        anyMetadata,
                        w("hello", WordFlags.Title),
                        w("world", WordFlags.Title)
                )
                .load();
        var reader = indexFactory.getCombinedIndexReader();
        var query = reader.findFullWord(kw("hello"))
                .also(kw("world"))
                .not(kw("goodbye"))
                .build();
        var buffer = new LongQueryBuffer(32);
        query.getMoreResults(buffer);
        assertEquals(
                List.of(d(2, 4)),
                decode(buffer)
        );
    }
    List<MockDataDocument> decode(LongQueryBuffer buffer) {
        List<MockDataDocument> result = new ArrayList<>();
        for (int i = 0; i < buffer.size(); i++) {
            result.add(new MockDataDocument(buffer.data.get(i)));
        }
        return result;
    }
    private MockDataDocument d(int domainId, int ordinal) {
        return new MockDataDocument(domainId, ordinal);
    }
    private void constructIndex() throws IOException {
        createForwardIndex();
        createFullReverseIndex();
        createPrioReverseIndex();
    }
    private void createFullReverseIndex() throws IOException {
        Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
        Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
        Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
        Path tmpDir = workDir.resolve("tmp");
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
        var constructor =
                new ReverseIndexConstructor(
                    outputFileDocs,
                    outputFileWords,
                    outputFilePositions,
                    IndexJournalReader::singleFile,
                    DocIdRewriter.identity(),
                    tmpDir);
        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
    }
    private void createPrioReverseIndex() throws IOException {
        Path outputFileDocs = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
        Path outputFileWords = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
        Path outputFilePositions = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.POSITIONS, ReverseIndexPrioFileNames.FileVersion.NEXT);
        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
        Path tmpDir = workDir.resolve("tmp");
        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
        var constructor = new ReverseIndexConstructor(
                outputFileDocs,
                outputFileWords,
                outputFilePositions,
                IndexJournalReader::singleFile,
                DocIdRewriter.identity(),
                tmpDir);
        constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
    }
    private void createForwardIndex() throws IOException {
        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
        Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
        Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
        ForwardIndexConverter converter = new ForwardIndexConverter(processHeartbeat,
                IndexJournalReader.paging(workDir),
                outputFileDocsId,
                outputFileDocsData,
                domainRankings
        );
        converter.convert();
    }
    MurmurHash3_128 hasher = new MurmurHash3_128();
    long kw(String s) {
        return hasher.hashKeyword(s);
    }
    class MockData {
        private final Map<Long, List<MockDataKeyword>> allData = new HashMap<>();
        private final Map<Long, MockDocumentMeta> metaByDoc = new HashMap<>();
        public MockData add(MockDataDocument document,
                        MockDocumentMeta meta,
                        MockDataKeyword... words)
        {
            long id = UrlIdCodec.encodeId(document.domainId, document.ordinal);
            allData.computeIfAbsent(id, l -> new ArrayList<>()).addAll(List.of(words));
            metaByDoc.put(id, meta);
            return this;
        }
        void load() throws IOException, SQLException, URISyntaxException {
            allData.forEach((doc, words) -> {
                var meta = metaByDoc.get(doc);
                var header = new IndexJournalEntryHeader(
                        doc,
                        meta.features,
                        100,
                        meta.documentMetadata.encode()
                );
                String[] keywords = words.stream().map(w -> w.keyword).toArray(String[]::new);
                long[] metadata = words.stream().map(w -> w.termMetadata).mapToLong(Long::longValue).toArray();
                var positions = words.stream().map(w -> w.positions).map(pos -> GammaCodedSequence.generate(ByteBuffer.allocate(1024), pos.toIntArray())).toArray(GammaCodedSequence[]::new);
                indexJournalWriter.put(header,
                        new IndexJournalEntryData(keywords, metadata, positions));
            });
            var linkdbWriter = new DocumentDbWriter(
                    IndexLocations.getLinkdbLivePath(fileStorageService).resolve(DOCDB_FILE_NAME)
            );
            for (Long key : allData.keySet()) {
                linkdbWriter.add(new DocdbUrlDetail(
                        key,
                        new EdgeUrl("https://www.example.com"),
                        "test",
                        "test",
                        0.,
                        "HTML5",
                        0,
                        null,
                        0,
                        5
                ));
            }
            linkdbWriter.close();
            indexJournalWriter.close();
            constructIndex();
            documentDbReader.reconnect();
            statefulIndex.switchIndex();
        }
    }
    record MockDataDocument(int domainId, int ordinal) {
        public MockDataDocument(long encodedId) {
            this(UrlIdCodec.getDomainId(encodedId), UrlIdCodec.getDocumentOrdinal(encodedId));
        }
        public long docId() {
            return UrlIdCodec.encodeId(domainId, ordinal);
        }
    }
    record MockDocumentMeta(int features, DocumentMetadata documentMetadata) {}
    record MockDataKeyword(String keyword, long termMetadata, IntList positions) {}
    MockDataKeyword w(String keyword, WordFlags flags, int... positions) {
        return new MockDataKeyword(keyword, new WordMetadata(0L, EnumSet.of(flags)).encode(), IntList.of(positions));
    }
 }
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
@ -13,7 +13,6 @@ import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
@ -142,6 +141,53 @@ public class IndexQueryServiceIntegrationSmokeTest {
        Assertions.assertArrayEquals(ids, actual);
    }
    @Test
    public void testSimple() throws Exception {
        var linkdbWriter = new DocumentDbWriter(
                IndexLocations.getLinkdbLivePath(fileStorageService)
                        .resolve(DOCDB_FILE_NAME)
        );
        for (int i = 1; i < 512; i++) {
            loadData(linkdbWriter, i);
        }
        linkdbWriter.close();
        documentDbReader.reconnect();
        indexJournalWriter.close();
        constructIndex();
        statefulIndex.switchIndex();
        var rsp = queryService.justQuery(
                SearchSpecification.builder()
                        .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000))
                        .queryStrategy(QueryStrategy.SENTENCE)
                        .year(SpecificationLimit.none())
                        .quality(SpecificationLimit.none())
                        .size(SpecificationLimit.none())
                        .rank(SpecificationLimit.none())
                        .rankingParams(ResultRankingParameters.sensibleDefaults())
                        .domains(new ArrayList<>())
                        .searchSetIdentifier("NONE")
                        .query(
                                SearchQuery.builder("2")
                                .include("2")
                                .build()
                        ).build()
        );
        int[] idxes = new int[] { 62, 222, 382, 60, 124, 220, 284, 380, 444, 122 };
        long[] ids = IntStream.of(idxes).mapToLong(Long::valueOf).toArray();
        long[] actual = rsp.results
                .stream()
                .mapToLong(i -> i.rawIndexResult.getDocumentId())
                .map(UrlIdCodec::getDocumentOrdinal)
                .toArray();
        System.out.println(Arrays.toString(actual));
        System.out.println(Arrays.toString(ids));
        Assertions.assertArrayEquals(ids, actual);
    }
    @Test
    public void testDomainQuery() throws Exception {
@ -297,7 +343,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
        return UrlIdCodec.encodeId((32 - (id % 32)), id);
    }
    MurmurHash3_128 hasher = new MurmurHash3_128();
    @SneakyThrows
    public void loadData(DocumentDbWriter ldbw, int id) {
        int[] factors = IntStream
@ -305,22 +350,44 @@ public class IndexQueryServiceIntegrationSmokeTest {
                .filter(v -> (id % v) == 0)
                .toArray();
        System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
        long fullId = fullId(id);
-        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
+        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
        long[] data = new long[factors.length * 2];
        for (int i = 0; i < factors.length; i++) {
            data[2 * i] = hasher.hashNearlyASCII(Integer.toString(factors[i]));
            data[2 * i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
        }
        ldbw.add(new DocdbUrlDetail(
                fullId, new EdgeUrl("https://www.example.com/"+id),
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));
-        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
+        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
        long[] metadata = new long[factors.length];
        for (int i = 0; i < factors.length; i++) {
            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
        }
        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
        ByteBuffer wa = ByteBuffer.allocate(32);
        for (int i = 0; i < factors.length; i++) {
            positions[i] = GammaCodedSequence.generate(wa, factors);
        }
        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }
    @SneakyThrows
    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
        long fullId = UrlIdCodec.encodeId(domain, id);
        var header = new IndexJournalEntryHeader(factors.length, 0, 100, fullId, DocumentMetadata.defaultValue());
        ldbw.add(new DocdbUrlDetail(
                fullId, new EdgeUrl("https://www.example.com/"+id),
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));
        String[] keywords = IntStream.of(factors).mapToObj(Integer::toString).toArray(String[]::new);
        long[] metadata = new long[factors.length];
        for (int i = 0; i < factors.length; i++) {
            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
@ -334,30 +401,4 @@ public class IndexQueryServiceIntegrationSmokeTest {
        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }
    @SneakyThrows
    public void loadDataWithDomain(DocumentDbWriter ldbw, int domain, int id) {
        int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
        long fullId = UrlIdCodec.encodeId(domain, id);
        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, DocumentMetadata.defaultValue());
        ldbw.add(new DocdbUrlDetail(
                fullId, new EdgeUrl("https://www.example.com/"+id),
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));
        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
        long[] metadata = new long[factors.length];
        for (int i = 0; i < factors.length; i++) {
            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
        }
        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
        ByteBuffer wa = ByteBuffer.allocate(16);
        for (int i = 0; i < factors.length; i++) {
            positions[i] = GammaCodedSequence.generate(wa, i);
        }
        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }
 }
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
@ -565,6 +565,7 @@ public class IndexQueryServiceIntegrationTest {
                var header = new IndexJournalEntryHeader(
                        doc,
                        meta.features,
                        100,
                        meta.documentMetadata.encode()
                );
--- a/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
+++ b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
@ -1,100 +0,0 @@
 package nu.marginalia.ranking.results;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
 import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
 import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
 import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
 import nu.marginalia.model.idx.DocumentFlags;
 import nu.marginalia.model.idx.WordFlags;
 import nu.marginalia.model.crawl.PubDate;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.ranking.results.factors.*;
 import nu.marginalia.term_frequency_dict.TermFrequencyDict;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import java.util.*;
 import static org.mockito.Mockito.when;
 class ResultValuatorTest {
    TermFrequencyDict dict;
    ResultValuator valuator;
    @BeforeEach
    public void setUp() {
        dict = Mockito.mock(TermFrequencyDict.class);
        when(dict.docCount()).thenReturn(100_000);
        valuator = new ResultValuator(
                new TermCoherenceFactor()
        );
    }
    CqDataInt frequencyData = new CqDataInt(new int[] { 10 });
    CompiledQueryLong titleOnlyLowCountSet = CompiledQuery.just(
            new SearchResultKeywordScore("bob", 1,
                    wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)))
    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);
    CompiledQueryLong highCountNoTitleSet = CompiledQuery.just(
            new SearchResultKeywordScore("bob", 1,
                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)))
    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
    CompiledQueryLong highCountSubjectSet = CompiledQuery.just(
            new SearchResultKeywordScore("bob", 1,
                    wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)))
    ).mapToLong(SearchResultKeywordScore::encodedWordMetadata);;
    @Test
    void evaluateTerms() {
        when(dict.getTermFreq("bob")).thenReturn(10);
        ResultRankingContext context = new ResultRankingContext(100000,
                ResultRankingParameters.sensibleDefaults(),
                new BitSet(),
                new BitSet(),
                frequencyData,
                frequencyData);
        long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class));
        int features = 0;
        double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
        double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
        double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet,  docMeta, features, 10_000, context, null);
        double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null);
        System.out.println(titleOnlyLowCount);
        System.out.println(titleLongOnlyLowCount);
        System.out.println(highCountNoTitle);
        System.out.println(highCountSubject);
    }
    private long docMetadata(int topology,
                             int year,
                             int quality,
                             EnumSet<DocumentFlags> flags) {
        return new DocumentMetadata(topology, PubDate.toYearByte(year), quality, flags).encode();
    }
    private long wordMetadata(Set<Integer> positions, Set<WordFlags> wordFlags) {
        long posBits = positions.stream()
                .mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL))
                .reduce((a,b) -> a|b)
                .orElse(0L);
        return new WordMetadata(posBits, wordFlags).encode();
    }
 }
--- a/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
+++ b/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java
@ -1,107 +0,0 @@
 package nu.marginalia.ranking.results.factors;
 import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
 import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
 import nu.marginalia.bbpc.BrailleBlockPunchCards;
 import nu.marginalia.model.idx.WordMetadata;
 import org.junit.jupiter.api.Test;
 import java.util.ArrayList;
 import java.util.List;
 import static org.junit.jupiter.api.Assertions.*;
 class TermCoherenceFactorTest {
    TermCoherenceFactor termCoherenceFactor = new TermCoherenceFactor();
    @Test
    public void testAllBitsSet() {
        var allPositionsSet = createSet(
                ~0L,
                ~0L
        );
        long mask = CompiledQueryAggregates.longBitmaskAggregate(
                allPositionsSet,
                SearchResultKeywordScore::positions
        );
        assertEquals(1.0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
        assertEquals(1.0,
                termCoherenceFactor.calculateOverlap(
                        allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)
                )
        );
    }
    @Test
    public void testNoBitsSet() {
        var allPositionsSet = createSet(
                0, 0
        );
        long mask = CompiledQueryAggregates.longBitmaskAggregate(allPositionsSet, score -> score.positions() & WordMetadata.POSITIONS_MASK);
        assertEquals(0, termCoherenceFactor.bitsSetFactor(mask), 0.01);
        assertEquals(0, termCoherenceFactor.calculateOverlap(allPositionsSet.mapToLong(SearchResultKeywordScore::encodedWordMetadata)));
    }
    @Test @SuppressWarnings("unchecked")
    public void testLowPosMatches() {
        var positions = createSet(
                List.of(0, 1, 2, 3), List.of(0, 1, 2, 3)
        );
        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
        printMask(mask);
    }
    @Test @SuppressWarnings("unchecked")
    public void testHiPosMatches() {
        var positions = createSet(
                List.of(55, 54, 53, 52), List.of(55, 54, 53, 52)
        );
        long mask = CompiledQueryAggregates.longBitmaskAggregate(positions, score -> score.positions() & WordMetadata.POSITIONS_MASK);
        printMask(mask);
    }
    @Test
    public void testBitMatchScaling() {
        for (int i = 1; i < 48; i++) {
            System.out.println(i + ":" + termCoherenceFactor.bitsSetFactor((1L << i) - 1));
        }
    }
    void printMask(long mask) {
        System.out.println(BrailleBlockPunchCards.printBits(mask, 48));
    }
    CompiledQuery<SearchResultKeywordScore> createSet(List<Integer>... maskPositions) {
        long[] positions = new long[maskPositions.length];
        for (int i = 0; i < maskPositions.length; i++) {
            for (long pos : maskPositions[i]) {
                positions[i] |= (1L<<pos);
            }
        }
        return createSet(positions);
    }
    CompiledQuery<SearchResultKeywordScore> createSet(long... positionMasks) {
        List<SearchResultKeywordScore> keywords = new ArrayList<>();
        for (int i = 0; i < positionMasks.length; i++) {
            keywords.add(new SearchResultKeywordScore("", 0,
                    new WordMetadata(positionMasks[i] & WordMetadata.POSITIONS_MASK, (byte) 0).encode()));
        }
        return CompiledQuery.just(keywords.toArray(SearchResultKeywordScore[]::new));
    }
 }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
@ -17,12 +17,13 @@ public class EliasGammaCodec implements IntIterator {
    private final BitReader reader;
    int rem = 0;
-    private int last = 0;
+    private int last;
    private int next = 0;
-    private EliasGammaCodec(ByteBuffer buffer) {
+    private EliasGammaCodec(ByteBuffer buffer, int zero) {
        reader = new BitReader(buffer);
        last = zero;
        int bits = reader.takeWhileZero();
        if (!reader.hasMore()) {
@ -33,9 +34,24 @@ public class EliasGammaCodec implements IntIterator {
        }
    }
    public static int readCount(ByteBuffer buffer) {
        var reader = new BitReader(buffer);
        if (reader.getCurrentValue() > 0) {
            int bits = reader.takeWhileZero();
            return reader.get(bits);
        }
        else {
            return 0;
        }
    }
    /** Decode a sequence of integers from a ByteBuffer using the Elias Gamma code */
    public static IntIterator decode(ByteBuffer buffer) {
-        return new EliasGammaCodec(buffer);
+        return new EliasGammaCodec(buffer, 0);
    }
    public static IntIterator decodeWithOffset(ByteBuffer buffer, int offset) {
        return new EliasGammaCodec(buffer, offset);
    }
    /** Encode a sequence of integers into a ByteBuffer using the Elias Gamma code.
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
@ -16,6 +16,7 @@ import java.util.StringJoiner;
 * */
 public class GammaCodedSequence implements BinarySerializable, Iterable<Integer> {
    private final ByteBuffer raw;
    int startPos = 0;
    int startLimit = 0;
@ -43,6 +44,12 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        startLimit = bytes.limit();
    }
    public GammaCodedSequence(ByteBuffer bytes, int startPos, int startLimit) {
        this.raw = bytes;
        this.startPos = startPos;
        this.startLimit = startLimit;
    }
    public GammaCodedSequence(byte[] bytes) {
        raw = ByteBuffer.allocate(bytes.length);
        raw.put(bytes);
@ -72,6 +79,18 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return EliasGammaCodec.decode(raw);
    }
    /** Return an iterator over the sequence with a constant offset applied to each value.
     * This is useful for comparing sequences with different offsets, and adds zero
     * extra cost to the decoding process which is already based on adding
     * relative differences.
     * */
    public IntIterator offsetIterator(int offset) {
        raw.position(startPos);
        raw.limit(startLimit);
        return EliasGammaCodec.decodeWithOffset(raw, offset);
    }
    public IntList values() {
        var intItr = iterator();
        IntArrayList ret = new IntArrayList(8);
@ -81,18 +100,6 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return ret;
    }
    /** Decode the sequence into an IntList;
     * this is a somewhat slow operation,
     * iterating over the data directly more performant */
    public IntList decode() {
        IntArrayList ret = new IntArrayList(8);
        var iter = iterator();
        while (iter.hasNext()) {
            ret.add(iter.nextInt());
        }
        return ret;
    }
    public int hashCode() {
        return raw.hashCode();
    }
@ -116,7 +123,11 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        return raw;
    }
-    public int size() {
+    public int bufferSize() {
        return raw.capacity();
    }
    public int valueCount() {
        return EliasGammaCodec.readCount(buffer());
    }
 }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/SequenceOperations.java
@ -0,0 +1,86 @@
 package nu.marginalia.sequence;
 import it.unimi.dsi.fastutil.ints.IntIterator;
 public class SequenceOperations {
    /** Return true if the sequences intersect, false otherwise.
     * */
    public static boolean intersectSequences(IntIterator... sequences) {
        if (sequences.length <= 1)
            return true;
        // Initialize values and find the maximum value
        int[] values = new int[sequences.length];
        for (int i = 0; i < sequences.length; i++) {
            if (sequences[i].hasNext())
                values[i] = sequences[i].nextInt();
            else
                return false;
        }
        // Intersect the sequences by advancing all values smaller than the maximum seen so far
        // until they are equal to the maximum value, or until the end of the sequence is reached
        int max = Integer.MIN_VALUE;
        int successes = 0;
        for (int i = 0; successes < sequences.length; i = (i + 1) % sequences.length)
        {
            if (values[i] == max) {
                successes++;
            } else {
                successes = 0;
                // Discard values until we reach the maximum value seen so far,
                // or until the end of the sequence is reached
                while (values[i] < max) {
                    if (sequences[i].hasNext())
                        values[i] = sequences[i].nextInt();
                    else
                        return false;
                }
                // Update the maximum value, if necessary
                max = Math.max(max, values[i]);
            }
        }
        return true;
    }
    /** Return the minimum word distance between two sequences, or a negative value if either sequence is empty.
     * */
    public static int minDistance(IntIterator seqA, IntIterator seqB)
    {
        int minDistance = Integer.MAX_VALUE;
        if (!seqA.hasNext() || !seqB.hasNext())
            return -1;
        int a = seqA.nextInt();
        int b = seqB.nextInt();
        while (true) {
            int distance = Math.abs(a - b);
            if (distance < minDistance)
                minDistance = distance;
            if (a <= b) {
                if (seqA.hasNext()) {
                    a = seqA.nextInt();
                } else {
                    break;
                }
            } else {
                if (seqB.hasNext()) {
                    b = seqB.nextInt();
                } else {
                    break;
                }
            }
        }
        return minDistance;
    }
 }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
@ -20,6 +20,10 @@ public class BitReader {
        this.currentValue = 0;
    }
    public long getCurrentValue() {
        return currentValue;
    }
    /** Read the next bit from the buffer */
    public boolean getBit() {
        if (bitPosition <= 0) {
--- a/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
+++ b/code/libraries/coded-sequence/test/nu/marginalia/sequence/SequenceOperationsTest.java
@ -0,0 +1,75 @@
 package nu.marginalia.sequence;
 import it.unimi.dsi.fastutil.ints.IntIterator;
 import org.junit.jupiter.api.Test;
 import java.nio.ByteBuffer;
 import static org.junit.jupiter.api.Assertions.*;
 class SequenceOperationsTest {
    @Test
    void intersectSequencesSingle() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
        assertTrue(SequenceOperations.intersectSequences(seq1.iterator()));
    }
    @Test
    void intersectSequencesTrivialMatch() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 1);
        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
    }
    @Test
    void intersectSequencesTrivialMismatch() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2);
        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
    }
    @Test
    void intersectSequencesOffsetMatch() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 3);
        assertTrue(SequenceOperations.intersectSequences(seq1.offsetIterator(0), seq2.offsetIterator(-2)));
    }
    @Test
    void intersectSequencesDeepMatch() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
    }
    @Test
    void intersectSequencesDeepMatch3() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 8, 14);
        GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 1, 5, 8, 9);
        assertTrue(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator(), seq3.iterator()));
    }
    @Test
    void intersectSequencesDeepMismatch() {
        ByteBuffer wa = ByteBuffer.allocate(1024);
        GammaCodedSequence seq1 = GammaCodedSequence.generate(wa, 1, 3, 4, 7, 8, 9, 11);
        GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 2, 5, 14);
        assertFalse(SequenceOperations.intersectSequences(seq1.iterator(), seq2.iterator()));
    }
 }
--- a/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
+++ b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java
@ -26,6 +26,8 @@ public class DocumentRecordKeywordsProjection {
    public int htmlFeatures;
    public long documentMetadata;
    public int length;
    public List<String> words;
    public TLongList metas;
    public List<GammaCodedSequence> positions;
@ -39,13 +41,14 @@ public class DocumentRecordKeywordsProjection {
    }
    public static Collection<String> requiredColumns() {
-        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata");
+        return List.of("domain", "ordinal", "htmlFeatures", "word", "wordMeta", "documentMetadata", "length");
    }
    @SneakyThrows
    public DocumentRecordKeywordsProjection add(String heading, Object value) {
        switch (heading) {
            case "domain" -> domain = (String) value;
            case "length" -> length = (Integer) value;
            case "ordinal" -> ordinal = (Integer) value;
            case "htmlFeatures" -> htmlFeatures = (Integer) value;
            case "documentMetadata" -> documentMetadata = (Long) value;
--- a/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
@ -6,12 +6,10 @@ import lombok.SneakyThrows;
 import nu.marginalia.IndexLocations;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.keyword.model.DocumentKeywords;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -41,18 +39,11 @@ public class LoaderIndexJournalWriter {
        indexWriter = new IndexJournalWriterPagingImpl(indexArea);
    }
    public void putWords(long combinedId,
                         int features,
                         DocumentMetadata metadata,
                         DocumentKeywords wordSet) {
        putWords(combinedId, features, metadata.encode(), wordSet);
    }
    @SneakyThrows
    public void putWords(long combinedId,
                         int features,
                         long metadata,
                         int length,
                         DocumentKeywords wordSet) {
        if (wordSet.isEmpty()) {
@ -65,7 +56,7 @@ public class LoaderIndexJournalWriter {
            return;
        }
-        var header = new IndexJournalEntryHeader(combinedId, features, metadata);
+        var header = new IndexJournalEntryHeader(combinedId, features, length, metadata);
        var data = new IndexJournalEntryData(wordSet.keywords, wordSet.metadata, wordSet.positions);
        indexWriter.put(header, data);
--- a/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java
@ -75,6 +75,7 @@ public class KeywordLoaderService {
        writer.putWords(combinedId,
                projection.htmlFeatures,
                projection.documentMetadata,
                projection.length,
                words);
    }
 }
--- a/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
+++ b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/SearchServicePaperDoll.java
@ -91,7 +91,7 @@ public class SearchServicePaperDoll extends AbstractModule {
            long positions)
    {
        results.add(new DecoratedSearchResultItem(
-                new SearchResultItem(url.hashCode(), 2, 3, false),
+                new SearchResultItem(url.hashCode(), 2, 3),
                new EdgeUrl(url),
                title,
                description,