(index, WIP) Position data partially integrated with forward and reverse indexes.

There's no graceful way of doing this in small commits, pushing to avoid the risk of data loss.
2025-02-23 13:09:00 +00:00 · 2024-06-06 12:54:52 +02:00 · 2024-06-06 12:54:52 +02:00 · 4a8afa6b9f
commit 4a8afa6b9f
parent 9b922af075
42 changed files with 1019 additions and 718 deletions
--- a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java
+++ b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java
@ -1,6 +1,5 @@
 package nu.marginalia.keyword.model;

-import nu.marginalia.model.idx.WordMetadata;
 import nu.marginalia.sequence.GammaCodedSequence;

 import java.io.Serial;
@ -26,26 +25,6 @@ public final class DocumentKeywords implements Serializable {
        assert keywords.length == metadata.length;
    }

-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(getClass().getSimpleName());
-        sb.append('[');
-        var pointer = newPointer();
-        while (pointer.advancePointer()) {
-            sb.append("\n\t ");
-
-            long metadata = pointer.getMetadata();
-            String keyword = pointer.getKeyword();
-            sb.append(keyword);
-
-            if (metadata != 0) {
-                sb.append("/").append(new WordMetadata(metadata));
-            }
-        }
-        return sb.append("\n]").toString();
-    }
-
    public boolean isEmpty() {
        return keywords.length == 0;
    }
@ -54,11 +33,6 @@ public final class DocumentKeywords implements Serializable {
        return keywords.length;
    }

-    /** Return a pointer for traversing this structure */
-    public DocumentKeywordsPointer newPointer() {
-        return new DocumentKeywordsPointer(this);
-    }
-
 }


--- a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java
+++ b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java
@ -41,7 +41,7 @@ public class DocumentKeywordsBuilder {

            meta[i] = entry.getLongValue();
            wordArray[i] = entry.getKey();
-            positions[i] = GammaCodedSequence.generate(workArea, wordToPos.get(entry.getKey()));
+            positions[i] = GammaCodedSequence.generate(workArea, wordToPos.getOrDefault(entry.getKey(), IntList.of()));
        }

        return new DocumentKeywords(wordArray, meta, positions);
--- a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java
+++ b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java
@ -1,48 +0,0 @@
-package nu.marginalia.keyword.model;
-
-import nu.marginalia.sequence.GammaCodedSequence;
-
-/** Pointer into a {@see DocumentKeywords}.  It starts out before the first position,
- * forward with advancePointer().
- * */
-public class DocumentKeywordsPointer {
-    private int pos = -1;
-
-    private final DocumentKeywords keywords;
-
-    DocumentKeywordsPointer(DocumentKeywords keywords) {
-        this.keywords = keywords;
-    }
-
-    /** Number of positions remaining */
-    public int remaining() {
-        return keywords.size() - Math.max(0, pos);
-    }
-
-    /** Return the keyword associated with the current position */
-    public String getKeyword() {
-        return keywords.keywords[pos];
-    }
-
-    /** Return the metadata associated with the current position */
-    public long getMetadata() {
-        return keywords.metadata[pos];
-    }
-
-    /** Return the positions associated with the current position */
-    public GammaCodedSequence getPositions() {
-        return keywords.positions[pos];
-    }
-
-    /** Advance the current position,
-     * returns false if this was the
-     * last position */
-    public boolean advancePointer() {
-        return ++pos < keywords.size();
-    }
-
-    /** Returns true unless the pointer is beyond the last position in the keyword set */
-    public boolean hasMore() {
-        return pos + 1 < keywords.size();
-    }
-}
--- a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java
+++ b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java
@ -92,26 +92,17 @@ class DocumentKeywordExtractorTest {
        );

        var keywordsBuilt = keywords.build();
-        var ptr = keywordsBuilt.newPointer();

        Map<String, WordMetadata> flags = new HashMap<>();
        Map<String, GammaCodedSequence> positions = new HashMap<>();

-        ByteBuffer work = ByteBuffer.allocate(1024);
+        for (int i = 0; i < keywordsBuilt.size(); i++) {
+            String keyword = keywordsBuilt.keywords[i];
+            long metadata = keywordsBuilt.metadata[i];

-        while (ptr.advancePointer()) {
-            System.out.println(ptr.getKeyword() + " " + ptr.getMetadata() + " " + ptr.getPositions());
-
-            int[] vals = ptr.getPositions().decode().toIntArray();
-            for (int i = 0; i < vals.length; i++) {
-                vals[i] = vals[i] + 1;
-            }
-            var out = EliasGammaCodec.encode(work, vals);
-            System.out.println(out.capacity() + "/" + vals.length * 4);
-
-            if (Set.of("dirty", "blues").contains(ptr.getKeyword())) {
-                flags.put(ptr.getKeyword(), new WordMetadata(ptr.getMetadata()));
-                positions.put(ptr.getKeyword(), ptr.getPositions());
+            if (Set.of("dirty", "blues").contains(keyword)) {
+                flags.put(keyword, new WordMetadata(metadata));
+                positions.put(keyword, keywordsBuilt.positions[i]);

            }
        }
--- a/code/index/build.gradle
+++ b/code/index/build.gradle
@ -15,12 +15,14 @@ dependencies {
    implementation 'org.jgrapht:jgrapht-core:1.5.2'

    implementation project(':third-party:commons-codec')
+    implementation project(':third-party:parquet-floor')

    implementation project(':code:index:api')
    implementation project(':code:functions:link-graph:api')

    implementation project(':code:libraries:array')
    implementation project(':code:libraries:btree')
+    implementation project(':code:libraries:coded-sequence')

    implementation project(':code:common:db')
    implementation project(':code:common:config')
--- a/code/index/index-forward/build.gradle
+++ b/code/index/index-forward/build.gradle
@ -15,6 +15,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
 dependencies {
    implementation project(':code:libraries:array')
    implementation project(':code:libraries:btree')
+    implementation project(':code:libraries:coded-sequence')
    implementation project(':code:index:query')
    implementation project(':code:index:index-journal')
    implementation project(':code:common:model')
--- a/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@ -2,12 +2,14 @@ package nu.marginalia.index.forward;

 import lombok.SneakyThrows;
 import nu.marginalia.index.domainrankings.DomainRankings;
-import nu.marginalia.index.journal.model.IndexJournalEntry;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.process.control.FakeProcessHeartbeat;
+import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@ -69,40 +71,40 @@ class ForwardIndexConverterTest {
        TestUtil.clearTempDir(dataDir);
    }

-    public int[] getFactorsI(int id) {
-        return IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
-    }
-
    long createId(long url, long domain) {
        return UrlIdCodec.encodeId((int) domain, (int) url);
    }

    public void createEntry(IndexJournalWriter writer, int id) {
-        int[] factors = getFactorsI(id);
-
-        var entryBuilder = IndexJournalEntry.builder(createId(id, id/20), id%5);
-
-        for (int i = 0; i+1 < factors.length; i+=2) {
-            entryBuilder.add(factors[i], -factors[i+1]);
-        }
-
-        writer.put(entryBuilder.build());
+        writer.put(
+                new IndexJournalEntryHeader(createId(id, id/20),
+                        id%3,
+                        (id % 5)),
+                new IndexJournalEntryData(
+                    new String[]{},
+                    new long[]{},
+                    new GammaCodedSequence[]{}
+                )
+        );
    }

    @Test
    void testForwardIndex() throws IOException {

-        new ForwardIndexConverter(new FakeProcessHeartbeat(), new IndexJournalReaderSingleFile(indexFile), docsFileId, docsFileData, new DomainRankings()).convert();
+        new ForwardIndexConverter(new FakeProcessHeartbeat(),
+                new IndexJournalReaderSingleFile(indexFile),
+                docsFileId,
+                docsFileData,
+                new DomainRankings()).convert();

        var forwardReader = new ForwardIndexReader(docsFileId, docsFileData);

        for (int i = 36; i < workSetSize; i++) {
            long docId = createId(i, i/20);
            assertEquals(0x00FF000000000000L | (i % 5), forwardReader.getDocMeta(docId));
+            assertEquals((i % 3), forwardReader.getHtmlFeatures(docId));
            assertEquals(i/20, UrlIdCodec.getDomainId(docId));
        }
-
    }

-
 }
--- a/code/index/index-journal/build.gradle
+++ b/code/index/index-journal/build.gradle
@ -13,8 +13,11 @@ java {
 apply from: "$rootProject.projectDir/srcsets.gradle"

 dependencies {
+    implementation project(':code:libraries:coded-sequence')
    implementation project(':code:libraries:array')
    implementation project(':code:common:model')
+    implementation project(':third-party:parquet-floor')
+    implementation project(':third-party:commons-codec')

    implementation libs.bundles.slf4j

@ -23,6 +26,7 @@ dependencies {
    implementation libs.guava
    implementation libs.trove
    implementation libs.zstd
+    implementation libs.fastutil
    implementation libs.commons.lang3
    implementation libs.roaringbitmap

--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntry.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntry.java
@ -1,27 +0,0 @@
-package nu.marginalia.index.journal.model;
-
-import nu.marginalia.model.id.UrlIdCodec;
-
-/** An entry in the index journal.
- *
- * @param header the header of the entry, containing document level data
- * @param data the data of the entry, containing keyword level data
- *
- * @see IndexJournalEntryHeader
- * @see IndexJournalEntryData
- */
-public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {
-
-    public static IndexJournalEntryBuilder builder(long documentId, long documentMeta) {
-        return new IndexJournalEntryBuilder(0, documentId, documentMeta);
-    }
-
-    public static IndexJournalEntryBuilder builder(int domainId,
-                                                   int urlId,
-                                                   long documentMeta) {
-
-
-        return builder(UrlIdCodec.encodeId(domainId, urlId), documentMeta);
-    }
-
-}
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java
@ -1,37 +0,0 @@
-package nu.marginalia.index.journal.model;
-
-import gnu.trove.list.array.TLongArrayList;
-
-public class IndexJournalEntryBuilder {
-    private final long documentId;
-    private final int documentFeatures;
-    private final long documentMeta;
-    private final TLongArrayList items = new TLongArrayList();
-
-    public IndexJournalEntryBuilder(
-            int documentFeatures,
-            long documentId,
-            long documentMeta) {
-        this.documentFeatures = documentFeatures;
-        this.documentId = documentId;
-        this.documentMeta = documentMeta;
-    }
-
-    public IndexJournalEntryBuilder add(long wordId, long metadata) {
-
-        items.add(wordId);
-        items.add(metadata);
-
-        return this;
-    }
-
-    public IndexJournalEntry build() {
-        return new IndexJournalEntry(
-                new IndexJournalEntryHeader(items.size(),
-                        documentFeatures,
-                        documentId,
-                        documentMeta),
-                new IndexJournalEntryData(items.toArray())
-        );
-    }
-}
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java
@ -1,77 +1,36 @@
 package nu.marginalia.index.journal.model;

-import nu.marginalia.index.journal.reader.IndexJournalReader;
-import nu.marginalia.model.idx.WordMetadata;
+import nu.marginalia.hash.MurmurHash3_128;
+import nu.marginalia.sequence.GammaCodedSequence;

-import java.util.Arrays;
-import java.util.Iterator;
+public record IndexJournalEntryData(long[] termIds,
+                                    long[] metadata,
+                                    GammaCodedSequence[] positions) {

-/** The keyword data of an index journal entry.
- *  The data itself is an interleaved array of
- *  word ids and metadata.
- * <p>
- *  Odd entries are term ids, even entries are encoded WordMetadata records.
- *  </p>
- *  <p>The civilized way of reading the journal data is to use an IndexJournalReader</p>
- *
- * @see WordMetadata
- * @see IndexJournalReader
- */
-public class IndexJournalEntryData implements Iterable<IndexJournalEntryData.Record> {
-    private final int size;
-    public final long[] underlyingArray;
-
-    public static final int MAX_LENGTH = 1000;
-    public static final int ENTRY_SIZE = 2;
-
-    public IndexJournalEntryData(long[] underlyingArray) {
-        this.size = underlyingArray.length;
-        this.underlyingArray = underlyingArray;
+    public IndexJournalEntryData {
+        assert termIds.length == metadata.length;
+        assert termIds.length == positions.length;
    }

-    public IndexJournalEntryData(int size, long[] underlyingArray) {
-        this.size = size;
-        this.underlyingArray = underlyingArray;
+    public IndexJournalEntryData(String[] keywords,
+                                 long[] metadata,
+                                 GammaCodedSequence[] positions)
+    {
+        this(termIds(keywords), metadata, positions);
    }

-    public long get(int idx) {
-        if (idx >= size)
-            throw new ArrayIndexOutOfBoundsException(idx + " vs " + size);
-        return underlyingArray[idx];
-    }
+    private static final MurmurHash3_128 hash = new MurmurHash3_128();

    public int size() {
-        return size;
-    }
-    public long[] toArray() {
-        if (size == underlyingArray.length)
-            return underlyingArray;
-        else
-            return Arrays.copyOf(underlyingArray, size);
+        return termIds.length;
    }

-    public String toString() {
-        return String.format("%s[%s]", getClass().getSimpleName(), Arrays.toString(toArray()));
-    }

-    public Iterator<Record> iterator() {
-        return new EntryIterator();
-    }
-
-    private class EntryIterator implements Iterator<Record> {
-        int pos = -ENTRY_SIZE;
-
-        public boolean hasNext() {
-            return pos + 2*ENTRY_SIZE - 1 < size;
-        }
-
-        @Override
-        public Record next() {
-            pos+=ENTRY_SIZE;
-
-            return new Record(underlyingArray[pos], underlyingArray[pos+1]);
+    private static long[] termIds(String[] keywords) {
+        long[] termIds = new long[keywords.length];
+        for (int i = 0; i < keywords.length; i++) {
+            termIds[i] = hash.hashKeyword(keywords[i]);
        }
+        return termIds;
    }
-
-    public record Record(long wordId, long metadata) {}
 }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryTermData.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryTermData.java
@ -0,0 +1,20 @@
+package nu.marginalia.index.journal.model;
+
+import nu.marginalia.sequence.GammaCodedSequence;
+
+/** Data corresponding to a term in a document in the index journal.
+ *
+ * @param termId the id of the term
+ * @param metadata the metadata of the term
+ * @param positions the positions of the word in the document, gamma coded
+ *
+ * @see GammaCodedSequence
+ */
+public record IndexJournalEntryTermData(
+        long termId,
+        long metadata,
+        GammaCodedSequence positions)
+{
+
+
+}
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java
@ -1,35 +1,29 @@
 package nu.marginalia.index.journal.reader;

-import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.sequence.GammaCodedSequence;

 import java.io.DataInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.nio.LongBuffer;
+import java.util.Iterator;

-public class IndexJournalReadEntry {
+public class IndexJournalReadEntry implements Iterable<IndexJournalEntryTermData> {
    public final IndexJournalEntryHeader header;

-    private final long[] buffer;
+    private final ByteBuffer buffer;
+    private final int initialPos;

-    public IndexJournalReadEntry(IndexJournalEntryHeader header, long[] buffer) {
+    public IndexJournalReadEntry(IndexJournalEntryHeader header, ByteBuffer buffer) {
        this.header = header;
        this.buffer = buffer;
+        this.initialPos = buffer.position();
    }


-    record WorkArea(byte[] bytes, LongBuffer buffer) {
-        WorkArea(byte[] bytes) {
-            this(bytes, ByteBuffer.wrap(bytes).asLongBuffer());
-        }
-        WorkArea() {
-            this(new byte[8*65536]);
-        }
-    }
-
-    static ThreadLocal<WorkArea> pool = ThreadLocal.withInitial(WorkArea::new);
+    static ThreadLocal<ByteBuffer> pool = ThreadLocal.withInitial(() -> ByteBuffer.allocate(8*65536));

    public static IndexJournalReadEntry read(DataInputStream inputStream) throws IOException {

@ -44,13 +38,11 @@ public class IndexJournalReadEntry {
                meta);

        var workArea = pool.get();
-        inputStream.readFully(workArea.bytes, 0, 8 * header.entrySize());
-
-        long[] out = new long[header.entrySize()];
-        workArea.buffer.get(0, out, 0, out.length);
-
-        return new IndexJournalReadEntry(header, out);
+        inputStream.readFully(workArea.array(), 0, header.entrySize());
+        workArea.position(0);
+        workArea.limit(header.entrySize());

+        return new IndexJournalReadEntry(header, workArea);
    }

    public long docId() {
@ -61,12 +53,54 @@ public class IndexJournalReadEntry {
        return header.documentMeta();
    }

+    public int documentFeatures() {
+        return header.documentFeatures();
+    }
+
    public int domainId() {
        return UrlIdCodec.getDomainId(docId());
    }

-    public IndexJournalEntryData readEntry() {
-        return new IndexJournalEntryData(header.entrySize(), buffer);
+    public void reset() {
+        buffer.position(initialPos);
+    }
+
+    public Iterator<IndexJournalEntryTermData> iterator() {
+        return new TermDataIterator(buffer, initialPos);
    }

 }
+
+class TermDataIterator implements Iterator<IndexJournalEntryTermData> {
+    private final ByteBuffer buffer;
+
+    TermDataIterator(ByteBuffer buffer, int initialPos) {
+        this.buffer = buffer;
+        this.buffer.position(initialPos);
+    }
+
+    @Override
+    public boolean hasNext() {
+        return buffer.position() < buffer.limit();
+    }
+
+    @Override
+    public IndexJournalEntryTermData next() {
+        // read the metadata for the term
+        long termId = buffer.getLong();
+        long meta = buffer.getLong();
+
+        // read the size of the sequence data
+        int size = buffer.get() & 0xFF;
+
+        // slice the buffer to get the sequence data
+        var slice = buffer.slice(buffer.position(), size);
+        var sequence = new GammaCodedSequence(slice);
+
+        // advance the buffer position to the next term
+        buffer.position(buffer.position() + size);
+
+        return new IndexJournalEntryTermData(termId, meta, sequence);
+    }
+
+}
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java
@ -12,6 +12,9 @@ public interface IndexJournalReader {
    int FILE_HEADER_SIZE_LONGS = 2;
    int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;

+    int DOCUMENT_HEADER_SIZE_BYTES = 24;
+    int TERM_HEADER_SIZE_BYTES = 17;
+
    /** Create a reader for a single file. */
    static IndexJournalReader singleFile(Path fileName) throws IOException {
        return new IndexJournalReaderSingleFile(fileName);
@ -25,22 +28,23 @@ public interface IndexJournalReader {
    default void forEachWordId(LongConsumer consumer) {
        var ptr = this.newPointer();
        while (ptr.nextDocument()) {
-            while (ptr.nextRecord()) {
-                consumer.accept(ptr.wordId());
+            for (var termData : ptr) {
+                consumer.accept(termData.termId());
            }
        }
    }

-    default void forEachDocId(LongConsumer consumer) {
-        var ptr = this.newPointer();
-        while (ptr.nextDocument()) {
-            consumer.accept(ptr.documentId());
+    default void forEachDocId(LongConsumer consumer) throws IOException {
+        try (var ptr = this.newPointer()) {
+            while (ptr.nextDocument()) {
+                consumer.accept(ptr.documentId());
+            }
        }
    }

    /** Create a new pointer to the journal.  The IndexJournalPointer is
     * a two-tiered iterator that allows both iteration over document records
-     * and their keywords
+     * and the terms within each document.
     */
    IndexJournalPointer newPointer();

--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java
@ -16,12 +16,15 @@ public class IndexJournalReaderPagingImpl implements IndexJournalReader {
    private final List<IndexJournalReader> readers;

    public IndexJournalReaderPagingImpl(Path baseDir) throws IOException {
-        var inputFiles = IndexJournalFileNames.findJournalFiles(baseDir);
-        if (inputFiles.isEmpty())
+        this(IndexJournalFileNames.findJournalFiles(baseDir));
+
+        if (readers.isEmpty())
            logger.warn("Creating paging index journal file in {}, found no inputs!", baseDir);
        else
-            logger.info("Creating paging index journal reader for {} inputs", inputFiles.size());
+            logger.info("Creating paging index journal reader for {} inputs", readers.size());
+    }

+    public IndexJournalReaderPagingImpl(List<Path> inputFiles) throws IOException {
        this.readers = new ArrayList<>(inputFiles.size());

        for (var inputFile : inputFiles) {
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java
@ -2,18 +2,20 @@ package nu.marginalia.index.journal.reader;

 import com.github.luben.zstd.ZstdInputStream;
 import lombok.SneakyThrows;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
 import nu.marginalia.index.journal.model.IndexJournalFileHeader;
 import nu.marginalia.index.journal.reader.pointer.IndexJournalPointer;
+import org.jetbrains.annotations.NotNull;

 import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
+import java.util.Iterator;

 public class IndexJournalReaderSingleFile implements IndexJournalReader {

-    private Path journalFile;
+    private final Path journalFile;
    public final IndexJournalFileHeader fileHeader;

    @Override
@ -58,8 +60,6 @@ class SingleFileJournalPointer implements IndexJournalPointer {
    private final IndexJournalFileHeader fileHeader;
    private final DataInputStream dataInputStream;
    private IndexJournalReadEntry entry;
-    private IndexJournalEntryData entryData;
-    private int recordIdx = -2;
    private int docIdx = -1;

    public SingleFileJournalPointer(
@ -73,9 +73,6 @@ class SingleFileJournalPointer implements IndexJournalPointer {
    @SneakyThrows
    @Override
    public boolean nextDocument() {
-        recordIdx = -2;
-        entryData = null;
-
        if (++docIdx < fileHeader.fileSizeRecords()) {
            entry = IndexJournalReadEntry.read(dataInputStream);
            return true;
@ -86,19 +83,6 @@ class SingleFileJournalPointer implements IndexJournalPointer {
        return false;
    }

-    @Override
-    public boolean nextRecord() {
-        if (entryData == null) {
-            entryData = entry.readEntry();
-        }
-
-        recordIdx += 2;
-        if (recordIdx < entryData.size()) {
-            return true;
-        }
-        return false;
-    }
-
    @Override
    public long documentId() {
        return entry.docId();
@ -109,22 +93,21 @@ class SingleFileJournalPointer implements IndexJournalPointer {
        return entry.docMeta();
    }

+
    @Override
-    public long wordId() {
-        return entryData.get(recordIdx);
+    public int documentFeatures() { return entry.documentFeatures(); }
+
+    /** Return an iterator over the terms in the current document.
+     *  This iterator is not valid after calling nextDocument().
+     */
+    @NotNull
+    @Override
+    public Iterator<IndexJournalEntryTermData> iterator() {
+        return entry.iterator();
    }

    @Override
-    public long wordMeta() {
-        return entryData.get(recordIdx + 1);
-    }
-
-    @Override
-    public int documentFeatures() {
-        if (entryData == null) {
-            entryData = entry.readEntry();
-        }
-
-        return entry.header.documentFeatures();
+    public void close() throws IOException {
+        dataInputStream.close();
    }
 }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java
@ -1,5 +1,10 @@
 package nu.marginalia.index.journal.reader.pointer;

+import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.IOException;
+import java.util.Iterator;
 import java.util.function.LongPredicate;

 /**
@ -13,7 +18,7 @@ import java.util.function.LongPredicate;
 * nextDocument() will move the pointer from doc1 to doc2;<br>
 * nextRecord() will move the pointer from word1 to word2...<br>
 */
-public interface IndexJournalPointer {
+public interface IndexJournalPointer extends Iterable<IndexJournalEntryTermData>, AutoCloseable {
    /**
     * Advance to the next document in the journal,
     * returning true if such a document exists.
@ -22,11 +27,6 @@ public interface IndexJournalPointer {
     */
    boolean nextDocument();

-    /**
-     * Advance to the next record in the journal
-     */
-    boolean nextRecord();
-
    /**
     * Get the id associated with the current document
     */
@ -37,16 +37,6 @@ public interface IndexJournalPointer {
     */
    long documentMeta();

-    /**
-     * Get the wordId associated with the current record
-     */
-    long wordId();
-
-    /**
-     * Get the termMeta associated with the current record
-     */
-    long wordMeta();
-
    /**
     * Get the documentFeatures associated with the current record
     */
@ -64,6 +54,8 @@ public interface IndexJournalPointer {
    default IndexJournalPointer filterWordMeta(LongPredicate filter) {
        return new FilteringJournalPointer(this, filter);
    }
+
+    void close() throws IOException;
 }

 class JoiningJournalPointer implements IndexJournalPointer {
@ -86,11 +78,6 @@ class JoiningJournalPointer implements IndexJournalPointer {
        return false;
    }

-    @Override
-    public boolean nextRecord() {
-        return pointers[pIndex].nextRecord();
-    }
-
    @Override
    public long documentId() {
        return pointers[pIndex].documentId();
@ -101,20 +88,28 @@ class JoiningJournalPointer implements IndexJournalPointer {
        return pointers[pIndex].documentMeta();
    }

-    @Override
-    public long wordId() {
-        return pointers[pIndex].wordId();
-    }
-
-    @Override
-    public long wordMeta() {
-        return pointers[pIndex].wordMeta();
-    }

    @Override
    public int documentFeatures() {
        return pointers[pIndex].documentFeatures();
    }
+
+    @NotNull
+    @Override
+    public Iterator<IndexJournalEntryTermData> iterator() {
+        return pointers[pIndex].iterator();
+    }
+
+    public void close() {
+        for (var p : pointers) {
+            try {
+                p.close();
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+
+    }
 }

 class FilteringJournalPointer implements IndexJournalPointer {
@ -128,14 +123,10 @@ class FilteringJournalPointer implements IndexJournalPointer {

    @Override
    public boolean nextDocument() {
-        return base.nextDocument();
-    }
-
-    @Override
-    public boolean nextRecord() {
-        while (base.nextRecord()) {
-            if (filter.test(wordMeta()))
+        while (base.nextDocument()) {
+            if (iterator().hasNext()) {
                return true;
+            }
        }
        return false;
    }
@ -150,18 +141,49 @@ class FilteringJournalPointer implements IndexJournalPointer {
        return base.documentMeta();
    }

-    @Override
-    public long wordId() {
-        return base.wordId();
-    }
-
-    @Override
-    public long wordMeta() {
-        return base.wordMeta();
-    }
-
    @Override
    public int documentFeatures() {
        return base.documentFeatures();
    }
+
+    @NotNull
+    @Override
+    public Iterator<IndexJournalEntryTermData> iterator() {
+
+        return new Iterator<>() {
+            private final Iterator<IndexJournalEntryTermData> baseIter = base.iterator();
+            private IndexJournalEntryTermData value = null;
+
+            @Override
+            public boolean hasNext() {
+                if (value != null) {
+                    return true;
+                }
+                while (baseIter.hasNext()) {
+                    value = baseIter.next();
+                    if (filter.test(value.metadata())) {
+                        return true;
+                    }
+                }
+                value = null;
+                return false;
+            }
+
+            @Override
+            public IndexJournalEntryTermData next() {
+                if (hasNext()) {
+                    var ret = value;
+                    value = null;
+                    return ret;
+                } else {
+                    throw new IllegalStateException("No more elements");
+                }
+            }
+        };
+    }
+
+    @Override
+    public void close() throws IOException {
+        base.close();
+    }
 }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java
@ -1,8 +1,8 @@
 package nu.marginalia.index.journal.writer;

-import nu.marginalia.index.journal.model.IndexJournalEntry;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.model.IndexJournalEntryTermData;

 import java.io.IOException;

@ -12,18 +12,7 @@ import java.io.IOException;
 * @see IndexJournalWriterPagingImpl
 */
 public interface IndexJournalWriter extends AutoCloseable {
-    /** Write an entry to the journal.
-     *
-     * @param header the header of the entry
-     * @param entry the data of the entry
-     *
-     * @return the number of bytes written
-     */
-    int put(IndexJournalEntryHeader header, IndexJournalEntryData entry);
-    default int put(IndexJournalEntry entry) {
-        return put(entry.header(), entry.data());
-    }
-
    void close() throws IOException;

+    int put(IndexJournalEntryHeader header, IndexJournalEntryData data);
 }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java
@ -49,13 +49,14 @@ public class IndexJournalWriterPagingImpl implements IndexJournalWriter {

    @Override
    @SneakyThrows
-    public int put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
+    public int put(IndexJournalEntryHeader header, IndexJournalEntryData data)
+    {
        if (bytesWritten >= sizeLimitBytes) {
            bytesWritten = 0;
            switchToNextWriter();
        }

-        int writtenNow = currentWriter.put(header, entry);
+        int writtenNow = currentWriter.put(header, data);
        bytesWritten += writtenNow;

        return writtenNow;
--- a/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java
@ -2,8 +2,9 @@ package nu.marginalia.index.journal.writer;

 import com.github.luben.zstd.ZstdDirectBufferCompressingStream;
 import lombok.SneakyThrows;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -22,6 +23,8 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
    private static final int ZSTD_BUFFER_SIZE = 8192;
    private static final int DATA_BUFFER_SIZE = 8192;

+    private final MurmurHash3_128 hasher = new MurmurHash3_128();
+
    private final ByteBuffer dataBuffer = ByteBuffer.allocateDirect(DATA_BUFFER_SIZE);

    private final ZstdDirectBufferCompressingStream compressingStream;
@ -75,36 +78,48 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{

    @Override
    @SneakyThrows
-    public int put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
+    public int put(IndexJournalEntryHeader header,
+                   IndexJournalEntryData data)
+    {
        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
            dataBuffer.flip();
            compressingStream.compress(dataBuffer);
            dataBuffer.clear();
        }

-        dataBuffer.putInt(entry.size());
+        final long[] keywords = data.termIds();
+        final long[] metadata = data.metadata();
+        final var positions = data.positions();
+
+        int recordSize = 0; // document header size is 3 longs
+        for (int i = 0; i < keywords.length; i++) {
+            // term header size is 2 longs
+            recordSize += IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+        }
+
+        dataBuffer.putInt(recordSize);
        dataBuffer.putInt(header.documentFeatures());
        dataBuffer.putLong(header.combinedId());
        dataBuffer.putLong(header.documentMeta());

-        for (int i = 0; i < entry.size(); ) {
-            int remaining = (dataBuffer.capacity() - dataBuffer.position()) / 8;
-            if (remaining <= 0) {
+        for (int i = 0; i < keywords.length; i++) {
+            int requiredSize = IndexJournalReader.TERM_HEADER_SIZE_BYTES + positions[i].size();
+
+            if (dataBuffer.capacity() - dataBuffer.position() < requiredSize) {
                dataBuffer.flip();
                compressingStream.compress(dataBuffer);
                dataBuffer.clear();
            }
-            else while (remaining-- > 0 && i < entry.size()) {

-                dataBuffer.putLong(entry.underlyingArray[i++]);
-            }
+            dataBuffer.putLong(keywords[i]);
+            dataBuffer.putLong(metadata[i]);
+            dataBuffer.put((byte) positions[i].size());
+            dataBuffer.put(positions[i].buffer());
        }

        numEntries++;

-        final int bytesWritten = 8 * ( /*header = 3 longs */ 3 + entry.size());
-
-        return bytesWritten;
+        return recordSize;
    }

    public void close() throws IOException {
@ -121,7 +136,7 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{


        // Finalize the file by writing a header in the beginning
-        ByteBuffer header = ByteBuffer.allocate(16);
+        ByteBuffer header = ByteBuffer.allocate(IndexJournalReader.FILE_HEADER_SIZE_BYTES);
        header.putLong(numEntries);
        header.putLong(0);  // reserved for future use
        header.flip();
--- a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalTest.java
@ -1,6 +1,5 @@
 package nu.marginalia.index.journal;

-import nu.marginalia.index.journal.model.IndexJournalEntry;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
@ -18,52 +17,52 @@ import java.util.List;
 import static org.junit.jupiter.api.Assertions.assertEquals;

 public class IndexJournalTest {
-    Path tempFile;
-    IndexJournalReader reader;
-
-    long firstDocId = UrlIdCodec.encodeId(44, 10);
-    long secondDocId = UrlIdCodec.encodeId(43, 15);
-
-    @BeforeEach
-    public void setUp() throws IOException {
-        tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
-
-        var journalWriter = new IndexJournalWriterSingleFileImpl( tempFile);
-        journalWriter.put(IndexJournalEntry.builder(44, 10, 55)
-                .add(1, 2)
-                .add(2, 3)
-                .add(3, 4)
-                .add(5, 6).build());
-
-        journalWriter.put(IndexJournalEntry.builder(43, 15, 10)
-                .add(5, 5)
-                .add(6, 6)
-                .build());
-        journalWriter.close();
-
-        reader = new IndexJournalReaderSingleFile(tempFile);
-    }
-    @AfterEach
-    public void tearDown() throws IOException {
-        Files.delete(tempFile);
-    }
-
-    @Test
-    public void forEachDocId() {
-        List<Long> expected = List.of(firstDocId, secondDocId);
-        List<Long> actual = new ArrayList<>();
-
-        reader.forEachDocId(actual::add);
-        assertEquals(expected, actual);
-    }
-
-    @Test
-    public void forEachWordId() {
-        List<Integer> expected = List.of(1, 2, 3, 5, 5 ,6);
-        List<Integer> actual = new ArrayList<>();
-
-        reader.forEachWordId(i -> actual.add((int) i));
-        assertEquals(expected, actual);
-    }
+//    Path tempFile;
+//    IndexJournalReader reader;
+//
+//    long firstDocId = UrlIdCodec.encodeId(44, 10);
+//    long secondDocId = UrlIdCodec.encodeId(43, 15);
+//
+//    @BeforeEach
+//    public void setUp() throws IOException {
+//        tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
+//
+//        var journalWriter = new IndexJournalWriterSingleFileImpl( tempFile);
+//        journalWriter.put(IndexJournalEntry.builder(44, 10, 55)
+//                .add(1, 2)
+//                .add(2, 3)
+//                .add(3, 4)
+//                .add(5, 6).build());
+//
+//        journalWriter.put(IndexJournalEntry.builder(43, 15, 10)
+//                .add(5, 5)
+//                .add(6, 6)
+//                .build());
+//        journalWriter.close();
+//
+//        reader = new IndexJournalReaderSingleFile(tempFile);
+//    }
+//    @AfterEach
+//    public void tearDown() throws IOException {
+//        Files.delete(tempFile);
+//    }
+//
+//    @Test
+//    public void forEachDocId() {
+//        List<Long> expected = List.of(firstDocId, secondDocId);
+//        List<Long> actual = new ArrayList<>();
+//
+//        reader.forEachDocId(actual::add);
+//        assertEquals(expected, actual);
+//    }
+//
+//    @Test
+//    public void forEachWordId() {
+//        List<Integer> expected = List.of(1, 2, 3, 5, 5 ,6);
+//        List<Integer> actual = new ArrayList<>();
+//
+//        reader.forEachWordId(i -> actual.add((int) i));
+//        assertEquals(expected, actual);
+//    }

 }
--- a/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalWriterTest.java
@ -0,0 +1,367 @@
+package nu.marginalia.index.journal;
+
+import it.unimi.dsi.fastutil.ints.IntList;
+import nu.marginalia.hash.MurmurHash3_128;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.index.journal.model.IndexJournalEntryTermData;
+import nu.marginalia.index.journal.reader.IndexJournalReaderPagingImpl;
+import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
+import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
+import nu.marginalia.sequence.GammaCodedSequence;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class IndexJournalWriterTest {
+    Path tempFile;
+    Path tempFile2;
+    ByteBuffer workArea = ByteBuffer.allocate(1024);
+
+    @BeforeEach
+    public void setUp() throws IOException {
+        tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
+        tempFile2 = Files.createTempFile(getClass().getSimpleName(), ".dat");
+    }
+    @AfterEach
+    public void tearDown() throws IOException {
+        Files.delete(tempFile);
+        Files.delete(tempFile2);
+    }
+
+    private GammaCodedSequence gcs(int... values) {
+        return GammaCodedSequence.generate(workArea, values);
+    }
+
+    static MurmurHash3_128 hasher = new MurmurHash3_128();
+    static long wordId(String str) {
+        return hasher.hashKeyword(str);
+    }
+
+    @Test
+    public void testSingleFile() {
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            // Write two documents with two terms each
+            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{44, 55},
+                        new GammaCodedSequence[]{
+                                gcs(1, 3, 5),
+                                gcs(2, 4, 6),
+                        })
+                    );
+            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{45, 56},
+                        new GammaCodedSequence[]{
+                                gcs(2, 4, 6),
+                                gcs(3, 5, 7),
+                        })
+                    );
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+
+        // Read the journal back
+
+        try {
+            var reader = new IndexJournalReaderSingleFile(tempFile);
+
+            Iterator<IndexJournalEntryTermData> iter;
+            IndexJournalEntryTermData termData;
+
+            try (var ptr = reader.newPointer()) {
+
+                /** DOCUMENT 1 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(11, ptr.documentId());
+                assertEquals(22, ptr.documentFeatures());
+                assertEquals(33, ptr.documentMeta());
+
+                iter = ptr.iterator();
+
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(44, termData.metadata());
+                assertEquals(IntList.of(1, 3, 5), termData.positions().values());
+
+                // Term 2
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word2"), termData.termId());
+                assertEquals(55, termData.metadata());
+                assertEquals(IntList.of(2, 4, 6), termData.positions().values());
+
+                // No more terms
+
+                assertFalse(iter.hasNext());
+
+                /** DOCUMENT 2 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(12, ptr.documentId());
+                assertEquals(23, ptr.documentFeatures());
+                assertEquals(34, ptr.documentMeta());
+
+                iter = ptr.iterator();
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(45, termData.metadata());
+                assertEquals(IntList.of(2, 4, 6), termData.positions().values());
+
+                // Term 2
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word2"), termData.termId());
+                assertEquals(56, termData.metadata());
+                assertEquals(IntList.of(3, 5, 7), termData.positions().values());
+
+                // No more terms
+                assertFalse(iter.hasNext());
+
+                // No more documents
+                assertFalse(ptr.nextDocument());
+            }
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+    }
+
+    @Test
+    public void testMultiFile() {
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{44, 55},
+                        new GammaCodedSequence[]{
+                                gcs(1, 3, 5),
+                                gcs(2, 4, 6),
+                        })
+                    );
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile2)) {
+            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{45, 56},
+                        new GammaCodedSequence[]{
+                                gcs(2, 4, 6),
+                                gcs(3, 5, 7),
+                        })
+                    );
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+
+        // Read the journal back
+
+        try {
+            var reader = new IndexJournalReaderPagingImpl(List.of(tempFile, tempFile2));
+
+            Iterator<IndexJournalEntryTermData> iter;
+            IndexJournalEntryTermData termData;
+
+            try (var ptr = reader.newPointer()) {
+
+                /** DOCUMENT 1 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(11, ptr.documentId());
+                assertEquals(22, ptr.documentFeatures());
+                assertEquals(33, ptr.documentMeta());
+
+                iter = ptr.iterator();
+
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(44, termData.metadata());
+                assertEquals(IntList.of(1, 3, 5), termData.positions().values());
+
+                // Term 2
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word2"), termData.termId());
+                assertEquals(55, termData.metadata());
+                assertEquals(IntList.of(2, 4, 6), termData.positions().values());
+
+                // No more terms
+
+                assertFalse(iter.hasNext());
+
+                /** DOCUMENT 2 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(12, ptr.documentId());
+                assertEquals(23, ptr.documentFeatures());
+                assertEquals(34, ptr.documentMeta());
+
+                iter = ptr.iterator();
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(45, termData.metadata());
+                assertEquals(IntList.of(2, 4, 6), termData.positions().values());
+
+                // Term 2
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word2"), termData.termId());
+                assertEquals(56, termData.metadata());
+                assertEquals(IntList.of(3, 5, 7), termData.positions().values());
+
+                // No more terms
+                assertFalse(iter.hasNext());
+
+                // No more documents
+                assertFalse(ptr.nextDocument());
+            }
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+    }
+
+    @Test
+    public void testSingleFileIterTwice() {
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            // Write two documents with two terms each
+            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{44, 55},
+                        new GammaCodedSequence[]{
+                                gcs(1, 3, 5),
+                                gcs(2, 4, 6),
+                        })
+                    );
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+
+        // Read the journal back
+
+        try {
+            var reader = new IndexJournalReaderSingleFile(tempFile);
+
+            Iterator<IndexJournalEntryTermData> iter;
+            IndexJournalEntryTermData termData;
+
+            try (var ptr = reader.newPointer()) {
+
+                /** DOCUMENT 1 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(11, ptr.documentId());
+                assertEquals(22, ptr.documentFeatures());
+                assertEquals(33, ptr.documentMeta());
+
+                iter = ptr.iterator();
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(44, termData.metadata());
+                assertEquals(IntList.of(1, 3, 5), termData.positions().values());
+
+                // Ensure we can iterate again over the same document without persisting state or closing the pointer
+
+                iter = ptr.iterator();
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(44, termData.metadata());
+                assertEquals(IntList.of(1, 3, 5), termData.positions().values());
+            }
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+    }
+
+    @Test
+    public void testFiltered() {
+        try (var writer = new IndexJournalWriterSingleFileImpl(tempFile)) {
+            // Write two documents with two terms each
+            writer.put(new IndexJournalEntryHeader(11, 22, 33),
+                    new IndexJournalEntryData(
+                        new String[]{"word1", "word2"},
+                        new long[]{44, 55},
+                        new GammaCodedSequence[]{
+                                gcs(1, 3, 5),
+                                gcs(2, 4, 6),
+                        })
+                    );
+            writer.put(new IndexJournalEntryHeader(12, 23, 34),
+                    new IndexJournalEntryData(
+                            new String[]{"word1", "word2"},
+                        new long[]{45, 56},
+                        new GammaCodedSequence[]{
+                                gcs(2, 4, 6),
+                                gcs(3, 5, 7),
+                        }
+                    ));
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+
+        // Read the journal back
+
+        try {
+            var reader = new IndexJournalReaderSingleFile(tempFile).filtering(meta -> meta == 45);
+
+            Iterator<IndexJournalEntryTermData> iter;
+            IndexJournalEntryTermData termData;
+
+            try (var ptr = reader.newPointer()) {
+                /** DOCUMENT 2 */
+                assertTrue(ptr.nextDocument());
+                assertEquals(12, ptr.documentId());
+                assertEquals(23, ptr.documentFeatures());
+                assertEquals(34, ptr.documentMeta());
+
+                iter = ptr.iterator();
+                // Term 1
+                assertTrue(iter.hasNext());
+                termData = iter.next();
+                assertEquals(wordId("word1"), termData.termId());
+                assertEquals(45, termData.metadata());
+                assertEquals(IntList.of(2, 4, 6), termData.positions().values());
+
+                // No more terms
+                assertFalse(iter.hasNext());
+                // No more documents
+                assertFalse(ptr.nextDocument());
+            }
+        }
+        catch (IOException ex) {
+            Assertions.fail(ex);
+        }
+    }
+
+}
--- a/code/index/index-journal/test/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java
+++ b/code/index/index-journal/test/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java
@ -9,125 +9,125 @@ import java.util.ArrayList;
 import static org.junit.jupiter.api.Assertions.assertEquals;

 class IndexJournalPointerTest {
-
-    @Test
-    public void concatenate() {
-        MockPointer left = new MockPointer(
-                List.of(new MockDocument(1,  2, 3, List.of(
-                                new MockRecord(4, 5),
-                                new MockRecord(6, 7))
-                ))
-        );
-
-        MockPointer right = new MockPointer(
-                List.of(new MockDocument(8,  9, 10, List.of(
-                        new MockRecord(11, 12),
-                        new MockRecord(13, 14))
-                ))
-        );
-
-        IndexJournalPointer concatenated = IndexJournalPointer.concatenate(left, right);
-        List<Long> docIdsSeq = new ArrayList<>();
-        List<Long> wordIdsSeq = new ArrayList<>();
-        while (concatenated.nextDocument()) {
-            docIdsSeq.add(concatenated.documentId());
-            while (concatenated.nextRecord()) {
-                wordIdsSeq.add(concatenated.wordId());
-            }
-        }
-
-        assertEquals(docIdsSeq, List.of(1L, 8L));
-        assertEquals(wordIdsSeq, List.of(4L, 6L, 11L, 13L));
-    }
-
-    @Test
-    public void filter() {
-        MockPointer left = new MockPointer(
-                List.of(new MockDocument(1,  2, 3, List.of(
-                        new MockRecord(1, 1),
-                        new MockRecord(2, 2),
-                        new MockRecord(3, 3),
-                        new MockRecord(4, 4),
-                        new MockRecord(5, 5)
-                        )
-                ), new MockDocument(2,  2, 3, List.of(
-                        new MockRecord(1, 1),
-                        new MockRecord(3, 3),
-                        new MockRecord(5, 5)
-                        )
-                ))
-
-        );
-        var filtered = left.filterWordMeta(meta -> (meta % 2) == 0);
-
-        List<Long> docIdsSeq = new ArrayList<>();
-        List<Long> wordIdsSeq = new ArrayList<>();
-        while (filtered.nextDocument()) {
-            docIdsSeq.add(filtered.documentId());
-            while (filtered.nextRecord()) {
-                wordIdsSeq.add(filtered.wordId());
-            }
-        }
-
-        assertEquals(docIdsSeq, List.of(1L, 2L));
-        assertEquals(wordIdsSeq, List.of(2L, 4L));
-    }
-
-    class MockPointer implements IndexJournalPointer {
-        private final List<MockDocument> documents;
-
-        int di = -1;
-        int ri;
-
-        public MockPointer(Collection<MockDocument> documents) {
-            this.documents = new ArrayList<>(documents);
-        }
-
-        @Override
-        public boolean nextDocument() {
-            if (++di < documents.size()) {
-                ri = -1;
-                return true;
-            }
-
-            return false;
-        }
-
-        @Override
-        public boolean nextRecord() {
-            if (++ri < documents.get(di).records.size()) {
-                return true;
-            }
-
-            return false;
-        }
-
-        @Override
-        public long documentId() {
-            return documents.get(di).docId;
-        }
-
-        @Override
-        public long documentMeta() {
-            return documents.get(di).docMeta;
-        }
-
-        @Override
-        public long wordId() {
-            return documents.get(di).records.get(ri).wordId;
-        }
-
-        @Override
-        public long wordMeta() {
-            return documents.get(di).records.get(ri).wordMeta;
-        }
-
-        @Override
-        public int documentFeatures() {
-            return documents.get(di).docFeatures;
-        }
-    }
-
-    record MockDocument(long docId, long docMeta, int docFeatures, List<MockRecord> records) {}
-    record MockRecord(long wordId, long wordMeta) {}
+//
+//    @Test
+//    public void concatenate() {
+//        MockPointer left = new MockPointer(
+//                List.of(new MockDocument(1,  2, 3, List.of(
+//                                new MockRecord(4, 5),
+//                                new MockRecord(6, 7))
+//                ))
+//        );
+//
+//        MockPointer right = new MockPointer(
+//                List.of(new MockDocument(8,  9, 10, List.of(
+//                        new MockRecord(11, 12),
+//                        new MockRecord(13, 14))
+//                ))
+//        );
+//
+//        IndexJournalPointer concatenated = IndexJournalPointer.concatenate(left, right);
+//        List<Long> docIdsSeq = new ArrayList<>();
+//        List<Long> wordIdsSeq = new ArrayList<>();
+//        while (concatenated.nextDocument()) {
+//            docIdsSeq.add(concatenated.documentId());
+//            while (concatenated.nextRecord()) {
+//                wordIdsSeq.add(concatenated.termId());
+//            }
+//        }
+//
+//        assertEquals(docIdsSeq, List.of(1L, 8L));
+//        assertEquals(wordIdsSeq, List.of(4L, 6L, 11L, 13L));
+//    }
+//
+//    @Test
+//    public void filter() {
+//        MockPointer left = new MockPointer(
+//                List.of(new MockDocument(1,  2, 3, List.of(
+//                        new MockRecord(1, 1),
+//                        new MockRecord(2, 2),
+//                        new MockRecord(3, 3),
+//                        new MockRecord(4, 4),
+//                        new MockRecord(5, 5)
+//                        )
+//                ), new MockDocument(2,  2, 3, List.of(
+//                        new MockRecord(1, 1),
+//                        new MockRecord(3, 3),
+//                        new MockRecord(5, 5)
+//                        )
+//                ))
+//
+//        );
+//        var filtered = left.filterWordMeta(meta -> (meta % 2) == 0);
+//
+//        List<Long> docIdsSeq = new ArrayList<>();
+//        List<Long> wordIdsSeq = new ArrayList<>();
+//        while (filtered.nextDocument()) {
+//            docIdsSeq.add(filtered.documentId());
+//            while (filtered.nextRecord()) {
+//                wordIdsSeq.add(filtered.termId());
+//            }
+//        }
+//
+//        assertEquals(docIdsSeq, List.of(1L, 2L));
+//        assertEquals(wordIdsSeq, List.of(2L, 4L));
+//    }
+//
+//    class MockPointer implements IndexJournalPointer {
+//        private final List<MockDocument> documents;
+//
+//        int di = -1;
+//        int ri;
+//
+//        public MockPointer(Collection<MockDocument> documents) {
+//            this.documents = new ArrayList<>(documents);
+//        }
+//
+//        @Override
+//        public boolean nextDocument() {
+//            if (++di < documents.size()) {
+//                ri = -1;
+//                return true;
+//            }
+//
+//            return false;
+//        }
+//
+//        @Override
+//        public boolean nextRecord() {
+//            if (++ri < documents.get(di).records.size()) {
+//                return true;
+//            }
+//
+//            return false;
+//        }
+//
+//        @Override
+//        public long documentId() {
+//            return documents.get(di).docId;
+//        }
+//
+//        @Override
+//        public long documentMeta() {
+//            return documents.get(di).docMeta;
+//        }
+//
+//        @Override
+//        public long termId() {
+//            return documents.get(di).records.get(ri).termId;
+//        }
+//
+//        @Override
+//        public long wordMeta() {
+//            return documents.get(di).records.get(ri).wordMeta;
+//        }
+//
+//        @Override
+//        public int documentFeatures() {
+//            return documents.get(di).docFeatures;
+//        }
+//    }
+//
+//    record MockDocument(long docId, long docMeta, int docFeatures, List<MockRecord> records) {}
+//    record MockRecord(long termId, long wordMeta) {}
 }
--- a/code/index/index-reverse/build.gradle
+++ b/code/index/index-reverse/build.gradle
@ -16,12 +16,16 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
 dependencies {
    implementation project(':code:libraries:array')
    implementation project(':code:libraries:btree')
+    implementation project(':code:libraries:coded-sequence')
    implementation project(':code:libraries:random-write-funnel')
    implementation project(':code:index:query')
    implementation project(':code:index:index-journal')
    implementation project(':code:common:model')
    implementation project(':code:common:process')

+    implementation project(':third-party:parquet-floor')
+    implementation project(':third-party:commons-codec')
+

    implementation libs.bundles.slf4j

--- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
@ -0,0 +1,51 @@
+package nu.marginalia.index.construction;
+
+import nu.marginalia.sequence.GammaCodedSequence;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+public class PositionsFileConstructor implements AutoCloseable {
+    private final Path file;
+    private final FileChannel channel;
+
+    private long offset;
+    private final ByteBuffer workBuffer = ByteBuffer.allocate(8192);
+
+    public PositionsFileConstructor(Path file) throws IOException {
+        this.file = file;
+
+        channel = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
+    }
+
+    /** Add a term to the positions file
+     * @param termMeta the term metadata
+     * @param positions the positions of the term
+     * @return the offset of the term in the file
+     */
+    public long add(byte termMeta, GammaCodedSequence positions) throws IOException {
+        synchronized (file) {
+            var positionBuffer = positions.buffer();
+            int size = 1 + positionBuffer.remaining();
+
+            if (workBuffer.remaining() < size) {
+                workBuffer.flip();
+                channel.write(workBuffer);
+                workBuffer.clear();
+            }
+            workBuffer.put(termMeta);
+            workBuffer.put(positionBuffer);
+
+            offset += size;
+            return offset;
+        }
+    }
+
+    public void close() throws IOException {
+        channel.force(false);
+        channel.close();
+    }
+}
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@ -7,6 +7,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import java.io.IOException;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.concurrent.atomic.AtomicInteger;

@ -48,18 +49,22 @@ public class ReverseIndexConstructor {
            return;
        }

+        Path positionsFile = tmpDir.resolve("positions.dat");
+        Files.deleteIfExists(positionsFile);
        try (var heartbeat = processHeartbeat.createProcessTaskHeartbeat(CreateReverseIndexSteps.class, processName)) {

            heartbeat.progress(CreateReverseIndexSteps.CONSTRUCT);

-            try (var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes")) {
+            try (var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes");
+                 PositionsFileConstructor posConstructor = new PositionsFileConstructor(positionsFile);
+            ) {

                AtomicInteger progress = new AtomicInteger(0);
                inputs
                    .parallelStream()
                    .map(in -> {
                        preindexHeartbeat.progress("PREINDEX/MERGE", progress.incrementAndGet(), inputs.size());
-                        return construct(in);
+                        return construct(in, posConstructor);
                    })
                    .reduce(this::merge)
                    .ifPresent((index) -> {
@ -73,9 +78,9 @@ public class ReverseIndexConstructor {
    }

    @SneakyThrows
-    private ReversePreindexReference construct(Path input) {
+    private ReversePreindexReference construct(Path input, PositionsFileConstructor positionsFileConstructor) {
        return ReversePreindex
-                .constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir)
+                .constructPreindex(readerSource.construct(input), positionsFileConstructor, docIdRewriter, tmpDir)
                .closeToReference();
    }

--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java
@ -40,6 +40,7 @@ public class ReversePreindex {
     * will have randomly assigned names.
     */
    public static ReversePreindex constructPreindex(IndexJournalReader reader,
+                                                    PositionsFileConstructor positionsFileConstructor,
                                                    DocIdRewriter docIdRewriter,
                                                    Path workDir) throws IOException
    {
@ -48,7 +49,7 @@ public class ReversePreindex {
        Path docsFile = Files.createTempFile(workDir, "docs", ".dat");

        var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, segments);
+        var docs = ReversePreindexDocuments.construct(docsFile, workDir, reader, docIdRewriter, positionsFileConstructor, segments);
        return new ReversePreindex(segments, docs);
    }

--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@ -21,6 +21,7 @@ import java.util.concurrent.TimeUnit;
 * the associated ReversePreindexWordSegments data
 */
 public class ReversePreindexDocuments {
+    private static PositionsFileConstructor positionsFileConstructor;
    final Path file;
    public  final LongArray documents;
    private static final int RECORD_SIZE_LONGS = 2;
@ -36,7 +37,9 @@ public class ReversePreindexDocuments {
            Path workDir,
            IndexJournalReader reader,
            DocIdRewriter docIdRewriter,
+            PositionsFileConstructor positionsFileConstructor,
            ReversePreindexWordSegments segments) throws IOException {
+        ReversePreindexDocuments.positionsFileConstructor = positionsFileConstructor;

        createUnsortedDocsFile(docsFile, workDir, reader, segments, docIdRewriter);

@ -75,14 +78,14 @@ public class ReversePreindexDocuments {
            var pointer = reader.newPointer();
            while (pointer.nextDocument()) {
                long rankEncodedId = docIdRewriter.rewriteDocId(pointer.documentId());
-                while (pointer.nextRecord()) {
-                    long wordId = pointer.wordId();
-                    long wordMeta = pointer.wordMeta();
+                for (var termData : pointer) {
+                    long termId = termData.termId();

-                    long offset = offsetMap.addTo(wordId, RECORD_SIZE_LONGS);
+                    long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
+                    long posOffset = positionsFileConstructor.add((byte) termData.metadata(), termData.positions());

                    assembly.put(offset + 0, rankEncodedId);
-                    assembly.put(offset + 1, wordMeta);
+                    assembly.put(offset + 1, posOffset);
                }
            }

--- a/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
@ -12,7 +12,7 @@ import java.nio.file.Files;
 import java.nio.file.Path;

 /** A pair of file-backed arrays of sorted wordIds
- * and the count of documents associated with each wordId.
+ * and the count of documents associated with each termId.
 */
 public class ReversePreindexWordSegments {
    public final LongArray wordIds;
@ -34,7 +34,7 @@ public class ReversePreindexWordSegments {
        this.countsFile = countsFile;
    }

-    /** Returns a long-long hash map where each key is a wordId,
+    /** Returns a long-long hash map where each key is a termId,
     * and each value is the start offset of the data.
     */
    public Long2LongOpenHashMap asMap(int recordSize) {
@ -188,7 +188,7 @@ public class ReversePreindexWordSegments {

            if (i == fileSize) {
                // We've reached the end of the iteration and there is no
-                // "next" wordId to fetch
+                // "next" termId to fetch
                wordId = Long.MIN_VALUE;
                return false;
            }
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java
@ -2,12 +2,14 @@ package nu.marginalia.index;

 import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.construction.DocIdRewriter;
+import nu.marginalia.index.construction.PositionsFileConstructor;
 import nu.marginalia.index.construction.ReversePreindex;
 import nu.marginalia.index.construction.TestJournalFactory;
 import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;

 import java.io.IOException;
 import java.nio.file.Files;
@ -89,7 +91,9 @@ class ReverseIndexReaderTest {

    private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
        var reader = journalFactory.createReader(scenario);
-        var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
+        var preindex = ReversePreindex.constructPreindex(reader,
+                Mockito.mock(PositionsFileConstructor.class),
+                DocIdRewriter.identity(), tempDir);


        Path docsFile = tempDir.resolve("docs.dat");
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@ -100,6 +100,7 @@ class ReversePreindexDocsTest {

        assertEquals(expected, actual);
    }
+
    @Test
    public void testDocs2() throws IOException {
        var reader = journalFactory.createReader(
@ -108,7 +109,7 @@ class ReversePreindexDocsTest {
        );

        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
-        var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), segments);
+        var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), segme.nts);

        List<TestSegmentData> expected = List.of(
                new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }),
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java
@ -5,6 +5,7 @@ import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
+import nu.marginalia.sequence.GammaCodedSequence;

 import java.io.IOException;
 import java.nio.file.Files;
@ -60,12 +61,18 @@ public class TestJournalFactory {

        var writer = new IndexJournalWriterSingleFileImpl(jf);
        for (var entry : entries) {
-            long[] data = new long[entry.wordIds.length * 2];
-            for (int i = 0; i < entry.wordIds.length; i++)
-                data[i*2] = entry.wordIds[i];
+            long[] termIds = new long[entry.wordIds.length];
+            long[] meta = new long[entry.wordIds.length];
+
+            GammaCodedSequence[] positions = new GammaCodedSequence[entry.wordIds.length];
+            for (int i = 0; i < entry.wordIds.length; i++) {
+                termIds[i] = entry.wordIds[i];
+                meta[i] = 0;
+                positions[i] = new GammaCodedSequence(new byte[1]);
+            }

            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
-                    new IndexJournalEntryData(data));
+                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
        var ret = new IndexJournalReaderSingleFile(jf);
@ -77,14 +84,18 @@ public class TestJournalFactory {

        var writer = new IndexJournalWriterSingleFileImpl(jf);
        for (var entry : entries) {
-            long[] data = new long[entry.wordIds.length * 2];
+
+            long[] termIds = new long[entry.wordIds.length];
+            long[] meta = new long[entry.wordIds.length];
+            GammaCodedSequence[] positions = new GammaCodedSequence[entry.wordIds.length];
            for (int i = 0; i < entry.wordIds.length; i++) {
-                data[i * 2] = entry.wordIds[i].wordId;
-                data[i * 2 + 1] = entry.wordIds[i].meta;
+                termIds[i] = entry.wordIds[i].wordId;
+                meta[i] = entry.wordIds[i].meta;
+                positions[i] = new GammaCodedSequence(new byte[1]);
            }

            writer.put(new IndexJournalEntryHeader(entries.length, 0, entry.docId, entry.docMeta),
-                    new IndexJournalEntryData(data));
+                    new IndexJournalEntryData(termIds, meta, positions));
        }
        writer.close();
        var ret = new IndexJournalReaderSingleFile(jf);
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java
@ -8,15 +8,16 @@ import nu.marginalia.api.searchquery.model.query.SearchSpecification;
 import nu.marginalia.api.searchquery.model.query.SearchQuery;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
 import nu.marginalia.index.index.StatefulIndex;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.process.control.FakeProcessHeartbeat;
 import nu.marginalia.process.control.ProcessHeartbeat;
+import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
@ -41,6 +42,7 @@ import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.parallel.Execution;

 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.sql.SQLException;
@ -300,7 +302,18 @@ public class IndexQueryServiceIntegrationSmokeTest {
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));

-        indexJournalWriter.put(header, new IndexJournalEntryData(data));
+        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
+        long[] metadata = new long[factors.length];
+        for (int i = 0; i < factors.length; i++) {
+            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+        }
+        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+        ByteBuffer wa = ByteBuffer.allocate(16);
+        for (int i = 0; i < factors.length; i++) {
+            positions[i] = GammaCodedSequence.generate(wa, i + 1);
+        }
+
+        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }

    @SneakyThrows
@ -309,19 +322,24 @@ public class IndexQueryServiceIntegrationSmokeTest {
        long fullId = UrlIdCodec.encodeId(domain, id);
        var header = new IndexJournalEntryHeader(factors.length, 0, fullId, DocumentMetadata.defaultValue());

-        long[] data = new long[factors.length*2];
-        for (int i = 0; i < factors.length; i++) {
-            data[2*i] = hasher.hashNearlyASCII(Integer.toString(factors[i]));
-            data[2*i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
-        }
-
        ldbw.add(new DocdbUrlDetail(
                fullId, new EdgeUrl("https://www.example.com/"+id),
                "test", "test", 0., "HTML5", 0, null, 0, 10
        ));


-        indexJournalWriter.put(header, new IndexJournalEntryData(data));
+        String[] keywords = IntStream.range(0, factors.length).mapToObj(Integer::toString).toArray(String[]::new);
+        long[] metadata = new long[factors.length];
+        for (int i = 0; i < factors.length; i++) {
+            metadata[i] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode();
+        }
+        GammaCodedSequence[] positions = new GammaCodedSequence[factors.length];
+        ByteBuffer wa = ByteBuffer.allocate(16);
+        for (int i = 0; i < factors.length; i++) {
+            positions[i] = GammaCodedSequence.generate(wa, i);
+        }
+
+        indexJournalWriter.put(header, new IndexJournalEntryData(keywords, metadata, positions));
    }

 }
--- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
+++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java
@ -7,13 +7,14 @@ import nu.marginalia.api.searchquery.model.query.SearchSpecification;
 import nu.marginalia.api.searchquery.model.query.SearchQuery;
 import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
 import nu.marginalia.index.index.StatefulIndex;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.sequence.GammaCodedSequence;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
 import nu.marginalia.index.construction.DocIdRewriter;
 import nu.marginalia.index.construction.ReverseIndexConstructor;
 import nu.marginalia.index.forward.ForwardIndexConverter;
 import nu.marginalia.index.forward.ForwardIndexFileNames;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
@ -44,6 +45,7 @@ import org.junit.jupiter.api.parallel.Execution;
 import javax.annotation.CheckReturnValue;
 import java.io.IOException;
 import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.sql.SQLException;
@ -549,13 +551,13 @@ public class IndexQueryServiceIntegrationTest {
                        meta.documentMetadata.encode()
                );

-                long[] dataArray = new long[words.size() * 2];
-                for (int i = 0; i < words.size(); i++) {
-                    dataArray[2*i] = hasher.hashNearlyASCII(words.get(i).keyword);
-                    dataArray[2*i+1] = words.get(i).termMetadata;
-                }
-                var entry = new IndexJournalEntryData(dataArray);
-                indexJournalWriter.put(header, entry);
+                String[] keywords = words.stream().map(w -> w.keyword).toArray(String[]::new);
+                long[] metadata = words.stream().map(w -> w.termMetadata).mapToLong(Long::longValue).toArray();
+                GammaCodedSequence[] positions = new GammaCodedSequence[words.size()]; // FIXME: positions?
+                Arrays.setAll(positions, i -> new GammaCodedSequence(ByteBuffer.allocate(1)));
+
+                indexJournalWriter.put(header,
+                        new IndexJournalEntryData(keywords, metadata, positions));
            });

            var linkdbWriter = new DocumentDbWriter(
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
@ -7,18 +7,30 @@ import nu.marginalia.sequence.io.BitWriter;

 import java.nio.ByteBuffer;

-/** Implement coding and decoding of sequences of integers using the Elias Gamma code
- *
- * https://en.wikipedia.org/wiki/Elias_gamma_coding
+/** Implement coding and decoding of sequences of integers using the Elias Gamma code.
+ *  The sequence is prefixed by the number of integers in the sequence, then the delta between
+ *  each integer in the sequence is encoded using the Elias Gamma code.
+ * <p></p>
+ * <a href="https://en.wikipedia.org/wiki/Elias_gamma_coding">https://en.wikipedia.org/wiki/Elias_gamma_coding</a>
 * */
 public class EliasGammaCodec implements IntIterator {

    private final BitReader reader;
+    int rem = 0;
    private int last = 0;
    private int next = 0;

    private EliasGammaCodec(ByteBuffer buffer) {
        reader = new BitReader(buffer);
+
+        int bits = reader.takeWhileZero();
+
+        if (!reader.hasMore()) {
+            rem = 0;
+        }
+        else {
+            rem = reader.get(bits);
+        }
    }

    /** Decode a sequence of integers from a ByteBuffer using the Elias Gamma code */
@ -31,7 +43,13 @@ public class EliasGammaCodec implements IntIterator {
     * or equal to zero.
     */
    public static ByteBuffer encode(ByteBuffer workArea, IntList sequence) {
+        if (sequence.isEmpty())
+            return ByteBuffer.allocate(0);
+
        var writer = new BitWriter(workArea);
+
+        writer.putGammaCoded(sequence.size());
+
        int last = 0;

        for (var iter = sequence.iterator(); iter.hasNext(); ) {
@ -42,9 +60,7 @@ public class EliasGammaCodec implements IntIterator {
            // can't encode zeroes
            assert delta > 0 : "Sequence must be strictly increasing and may not contain zeroes or negative values";

-            int bits = Integer.numberOfTrailingZeros(Integer.highestOneBit(delta));
-            writer.put(0, bits + 1);
-            writer.put(delta, bits + 1);
+            writer.putGammaCoded(delta);
        }

        return writer.finish();
@ -60,16 +76,13 @@ public class EliasGammaCodec implements IntIterator {

    @Override
    public boolean hasNext() {
-        if (next > 0)
-            return true;
-        if (!reader.hasMore())
-            return false;
+        if (next > 0) return true;
+        if (!reader.hasMore() || --rem < 0) return false;

        int bits = reader.takeWhileZero();

-        if (!reader.hasMore()) {
-            return false;
-        }
+        if (!reader.hasMore()) return false;
+        
        int delta = reader.get(bits);
        last += delta;
        next = last;
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java
@ -16,6 +16,8 @@ import java.util.StringJoiner;
 * */
 public class GammaCodedSequence implements BinarySerializable, Iterable<Integer> {
    private final ByteBuffer raw;
+    int startPos = 0;
+    int startLimit = 0;

    /** Create a new GammaCodedSequence from a sequence of integers.
     *
@ -37,12 +39,16 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>

    public GammaCodedSequence(ByteBuffer bytes) {
        this.raw = bytes;
+        startPos = bytes.position();
+        startLimit = bytes.limit();
    }

    public GammaCodedSequence(byte[] bytes) {
        raw = ByteBuffer.allocate(bytes.length);
        raw.put(bytes);
        raw.clear();
+        startPos = 0;
+        startLimit = bytes.length;
    }

    /** Return the raw bytes of the sequence. */
@ -52,21 +58,29 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
            return raw.array();
        }
        else {
-            raw.clear();
-
            byte[] bytes = new byte[raw.capacity()];
-            raw.get(bytes, 0, bytes.length);
+            raw.get(0, bytes, 0, bytes.length);
            return bytes;
        }
    }

    @Override
    public IntIterator iterator() {
-        raw.clear();
+        raw.position(startPos);
+        raw.limit(startLimit);

        return EliasGammaCodec.decode(raw);
    }

+    public IntList values() {
+        var intItr = iterator();
+        IntArrayList ret = new IntArrayList(8);
+        while (intItr.hasNext()) {
+            ret.add(intItr.nextInt());
+        }
+        return ret;
+    }
+
    /** Decode the sequence into an IntList;
     * this is a somewhat slow operation,
     * iterating over the data directly more performant */
@ -94,4 +108,15 @@ public class GammaCodedSequence implements BinarySerializable, Iterable<Integer>
        }
        return sj.toString();
    }
+
+    public ByteBuffer buffer() {
+        raw.position(startPos);
+        raw.limit(startLimit);
+
+        return raw;
+    }
+
+    public int size() {
+        return raw.capacity();
+    }
 }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitReader.java
@ -78,7 +78,7 @@ public class BitReader {

        int result = 0;

-        for (;;) {
+        do {
            // Ensure we have bits to read
            if (bitPosition <= 0) {
                if (underlying.hasRemaining())
@ -96,10 +96,8 @@ public class BitReader {
            // Subtract the number of bits read from the current position
            bitPosition -= zeroes;

-            // If bitPosition isn't zero, we've found a 1 and can stop
-            if (bitPosition > 0)
-                break;
-        }
+            // If bit position is not positive, we've found a 1 and can stop
+        } while (bitPosition <= 0);

        return result;
    }
--- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitWriter.java
+++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/io/BitWriter.java
@ -72,6 +72,17 @@ public class BitWriter {
        }
    }

+    /** Write the provided value in a gamma-coded format,
+     * e.g. by first finding the number of significant bits,
+     * then writing that many zeroes, then the bits themselves
+     */
+    public void putGammaCoded(int value) {
+        int bits = 1 + Integer.numberOfTrailingZeros(Integer.highestOneBit(value));
+
+        put(0, bits);
+        put(value, bits);
+    }
+
    public ByteBuffer finish() {
        finishLastByte();

--- a/code/libraries/coded-sequence/test/nu/marginalia/sequence/BitReaderTest.java
+++ b/code/libraries/coded-sequence/test/nu/marginalia/sequence/BitReaderTest.java
@ -115,16 +115,17 @@ class BitReaderTest {
    }

    @Test
-    public void testTakeWhileZeroOverInt32() {
+    public void testTakeWhileZeroOverInt64() {
        var writer = new BitWriter(ByteBuffer.allocate(1024));
        writer.put(0, 32);
+        writer.put(0, 32);
        writer.put(0, 2);
        writer.putBit(true);
        var buffer = writer.finish();

        var reader = new BitReader(buffer);
        int val = reader.takeWhileZero();
-        assertEquals(34, val);
+        assertEquals(66, val);
        assertTrue(reader.getBit());
    }
 }
--- a/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
+++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
@ -4,9 +4,9 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import lombok.SneakyThrows;
 import nu.marginalia.IndexLocations;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.hash.MurmurHash3_128;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
@ -18,9 +18,7 @@ import org.slf4j.LoggerFactory;

 import java.io.IOException;
 import java.nio.file.Files;
-import java.sql.SQLException;

-import static nu.marginalia.index.journal.model.IndexJournalEntryData.MAX_LENGTH;

@Singleton
 public class LoaderIndexJournalWriter {
@ -28,12 +26,11 @@ public class LoaderIndexJournalWriter {
    private final IndexJournalWriter indexWriter;
    private static final Logger logger = LoggerFactory.getLogger(LoaderIndexJournalWriter.class);

-    private final MurmurHash3_128 hasher = new MurmurHash3_128();
-    private final long[] buffer = new long[MAX_LENGTH * 2];
+    private final long[] buffer = new long[65536];


    @Inject
-    public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException, SQLException {
+    public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException {
        var indexArea = IndexLocations.getIndexConstructionArea(fileStorageService);

        var existingIndexFiles = IndexJournalFileNames.findJournalFiles(indexArea);
@ -68,26 +65,10 @@ public class LoaderIndexJournalWriter {
            return;
        }

-        var pointer = wordSet.newPointer();
-
-        while (pointer.hasMore()) {
-            int i = 0;
-
-            while (i < buffer.length
-                && pointer.advancePointer())
-            {
-                final long hashedKeyword = hasher.hashKeyword(pointer.getKeyword());
-
-                buffer[i++] = hashedKeyword;
-                buffer[i++] = pointer.getMetadata();
-            }
-
-            var entry = new IndexJournalEntryData(i, buffer);
-            var header = new IndexJournalEntryHeader(combinedId, features, metadata);
-
-            indexWriter.put(header, entry);
-        }
+        var header = new IndexJournalEntryHeader(combinedId, features, metadata);
+        var data = new IndexJournalEntryData(wordSet.keywords, wordSet.metadata, wordSet.positions);

+        indexWriter.put(header, data);
    }

    public void close() throws Exception {
--- a/code/processes/loading-process/test/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java
+++ b/code/processes/loading-process/test/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java
@ -1,87 +0,0 @@
-package nu.marginalia.loading.loader;
-
-import nu.marginalia.sequence.GammaCodedSequence;
-import nu.marginalia.storage.FileStorageService;
-import nu.marginalia.storage.model.FileStorageBase;
-import nu.marginalia.storage.model.FileStorageBaseType;
-import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
-import nu.marginalia.keyword.model.DocumentKeywords;
-import nu.marginalia.loading.LoaderIndexJournalWriter;
-import nu.marginalia.model.idx.DocumentMetadata;
-import nu.marginalia.index.journal.IndexJournalFileNames;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.LongStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-class LoaderIndexJournalWriterTest {
-
-    Path tempDir;
-    LoaderIndexJournalWriter writer;
-    @BeforeEach
-    public void setUp() throws IOException, SQLException {
-        tempDir = Files.createTempDirectory(getClass().getSimpleName());
-        FileStorageService storageService = Mockito.mock(FileStorageService.class);
-
-        Mockito.when(storageService.getStorageBase(FileStorageBaseType.CURRENT)).thenReturn(new FileStorageBase(null, null,  1,null, tempDir.toString()));
-
-        writer = new LoaderIndexJournalWriter(storageService);
-    }
-
-    @AfterEach
-    public void tearDown() throws Exception {
-        writer.close();
-        List<Path> junk = Files.list(tempDir.resolve("iw")).toList();
-        for (var item : junk)
-            Files.delete(item);
-        Files.delete(tempDir.resolve("iw"));
-        Files.delete(tempDir);
-    }
-
-    @Test
-    public void testBreakup() throws Exception {
-        String[] keywords = new String[2000];
-        long[] metadata = new long[2000];
-        GammaCodedSequence[] positions = new GammaCodedSequence[2000];
-        ByteBuffer workArea = ByteBuffer.allocate(1024);
-        for (int i = 0; i < 2000; i++) {
-            keywords[i] = Integer.toString(i);
-            metadata[i] = i+1;
-            positions[i] = GammaCodedSequence.generate(workArea, 1, 2, 3);
-        }
-        DocumentKeywords words = new DocumentKeywords(keywords, metadata, positions);
-        writer.putWords(1, 0, new DocumentMetadata(0),
-                words);
-
-        writer.close();
-
-        List<Path> journalFiles = IndexJournalFileNames.findJournalFiles(tempDir.resolve("iw"));
-        assertEquals(1, journalFiles.size());
-
-        var reader = new IndexJournalReaderSingleFile(journalFiles.get(0));
-        List<Long> docIds = new ArrayList<>();
-        reader.forEachDocId(docIds::add);
-        assertEquals(List.of(1L, 1L), docIds);
-
-        List<Long> metas = new ArrayList<Long>();
-        var ptr = reader.newPointer();
-        while (ptr.nextDocument()) {
-            while (ptr.nextRecord()) {
-                metas.add(ptr.wordMeta());
-            }
-        }
-
-        assertEquals(LongStream.of(metadata).boxed().toList(), metas);
-    }
-}
--- a/code/services-application/search-service/java/nu/marginalia/search/SearchMain.java
+++ b/code/services-application/search-service/java/nu/marginalia/search/SearchMain.java
@ -33,6 +33,7 @@ public class SearchMain extends MainClass {
                new ServiceDiscoveryModule(),
                new DatabaseModule(false)
        );
+        

        // Orchestrate the boot order for the services
        var registry = injector.getInstance(ServiceRegistryIf.class);