Merge pull request #42 from MarginaliaSearch/no-downtime-upgrades

Zero downtime upgrades, merge-based index construction
2025-02-23 21:18:58 +00:00 · 2023-08-29 17:05:48 +02:00 · 2023-08-29 17:05:48 +02:00 · bdcbfb11a8
commit bdcbfb11a8
parent 229c63c46d 3f288e264b
243 changed files with 5601 additions and 5213 deletions
--- a/build.gradle
+++ b/build.gradle
@ -13,6 +13,10 @@ tasks.register('dist', Copy) {
    from subprojects.collect { it.tasks.withType(Tar) }
    into "$buildDir/dist"

+    // For local development, each processes that are to be triggerable
+    // from the control-service need to go here to end up somewhere the
+    // control-service can find them
+
    doLast {
        copy {
            from tarTree("$buildDir/dist/converter-process.tar")
@ -34,10 +38,18 @@ tasks.register('dist', Copy) {
            from tarTree("$buildDir/dist/crawl-job-extractor-process.tar")
            into "$projectDir/run/dist/"
        }
+        copy {
+            from tarTree("$buildDir/dist/index-construction-process.tar")
+            into "$projectDir/run/dist/"
+        }
    }
 }
 idea {
    module {
+        // Exclude these directories from being indexed by IntelliJ
+        // as they tend to bring the IDE to its knees and use up all
+        // Inotify spots in a hurry
+        excludeDirs.add(file("$projectDir/run/backup"))
        excludeDirs.add(file("$projectDir/run/model"))
        excludeDirs.add(file("$projectDir/run/dist"))
        excludeDirs.add(file("$projectDir/run/samples"))
--- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexMqEndpoints.java
+++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexMqEndpoints.java
@ -3,8 +3,6 @@ package nu.marginalia.index.client;
 public class IndexMqEndpoints {
    public static final String INDEX_IS_BLOCKED = "INDEX-IS-BLOCKED";
    public static final String INDEX_REPARTITION = "INDEX-REPARTITION";
-
-    public static final String INDEX_RELOAD_LEXICON = "INDEX-RELOAD-LEXICON";
-    public static final String INDEX_REINDEX = "INDEX-REINDEX";
+    public static final String SWITCH_INDEX = "SWITCH-INDEX";

 }
--- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java
+++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java
@ -2,16 +2,17 @@ package nu.marginalia.index.client.model.results;

 import lombok.AllArgsConstructor;
 import lombok.Getter;
-import nu.marginalia.model.EdgeUrl;
-import nu.marginalia.model.id.EdgeId;
+import nu.marginalia.model.id.UrlIdCodec;
+import org.jetbrains.annotations.NotNull;

 import java.util.ArrayList;
 import java.util.List;

 /** Represents a document matching a search query */
@AllArgsConstructor @Getter
-public class SearchResultItem {
-    /** Encoded ID that contains both the URL id and its ranking */
+public class SearchResultItem implements Comparable<SearchResultItem> {
+    /** Encoded ID that contains both the URL id and its ranking.  This is
+     * probably not what you want, use getDocumentId() instead */
    public final long combinedId;

    /** How did the subqueries match against the document ? */
@ -20,20 +21,18 @@ public class SearchResultItem {
    /** How many other potential results existed in the same domain */
    public int resultsFromDomain;

-    public SearchResultItem(long val) {
-        this.combinedId = val;
+    public SearchResultItem(long combinedId) {
+        this.combinedId = combinedId;
        this.keywordScores = new ArrayList<>(16);
    }

-    public EdgeId<EdgeUrl> getUrlId() {
-        return new EdgeId<>(getUrlIdInt());
+
+    public long getDocumentId() {
+        return UrlIdCodec.removeRank(combinedId);
    }

-    public int getUrlIdInt() {
-        return (int)(combinedId & 0xFFFF_FFFFL);
-    }
    public int getRanking() {
-        return (int)(combinedId >>> 32);
+        return UrlIdCodec.getRank(combinedId);
    }

    /* Used for evaluation */
@ -45,20 +44,16 @@ public class SearchResultItem {
        return scoreValue;
    }

-    private transient int domainId = Integer.MIN_VALUE;
-    public void setDomainId(int domainId) {
-        this.domainId = domainId;
-    }
    public int getDomainId() {
-        return this.domainId;
+        return UrlIdCodec.getDomainId(this.combinedId);
    }

    public int hashCode() {
-        return getUrlIdInt();
+        return Long.hashCode(combinedId);
    }

    public String toString() {
-        return getClass().getSimpleName() + "[ url= " + getUrlId() + ", rank=" + getRanking() + "]";
+        return getClass().getSimpleName() + "[ url= " + getDocumentId() + ", rank=" + getRanking() + "]";
    }

    public boolean equals(Object other) {
@ -67,18 +62,18 @@ public class SearchResultItem {
        if (other == this)
            return true;
        if (other instanceof SearchResultItem o) {
-            return o.getUrlIdInt()  == getUrlIdInt();
+            return o.getDocumentId()  == getDocumentId();
        }
        return false;
    }

-    public long deduplicationKey() {
-        final int domainId = getDomainId();
+    @Override
+    public int compareTo(@NotNull SearchResultItem o) {
+        // this looks like a bug, but we actually want this in a reversed order
+        int diff = o.getScore().compareTo(getScore());
+        if (diff != 0)
+            return diff;

-        if (domainId == Integer.MAX_VALUE || domainId == Integer.MIN_VALUE) {
-            return 0;
-        }
-
-        return domainId;
+        return Long.compare(this.combinedId, o.combinedId);
    }
 }
--- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java
+++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java
@ -6,7 +6,6 @@ import static java.lang.Boolean.compare;
 import static java.lang.Double.compare;

 public record SearchResultPreliminaryScore(
-        boolean disqualified,
        boolean hasPriorityTerm,
        double searchRankingScore)
        implements Comparable<SearchResultPreliminaryScore>
@ -25,7 +24,4 @@ public record SearchResultPreliminaryScore(
        return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
    }

-    public boolean isDisqualified() {
-        return disqualified;
-    }
 }
--- a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/ProcessInboxNames.java
+++ b/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/ProcessInboxNames.java
@ -4,4 +4,6 @@ public class ProcessInboxNames {
    public static final String CONVERTER_INBOX = "converter";
    public static final String LOADER_INBOX = "loader";
    public static final String CRAWLER_INBOX = "crawler";
+
+    public static final String INDEX_CONSTRUCTOR_INBOX = "index_constructor";
 }
--- a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/CreateIndexRequest.java
+++ b/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/CreateIndexRequest.java
@ -0,0 +1,5 @@
+package nu.marginalia.mqapi.index;
+
+public record CreateIndexRequest(IndexName indexName)
+{
+}
--- a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/IndexName.java
+++ b/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/IndexName.java
@ -0,0 +1,7 @@
+package nu.marginalia.mqapi.index;
+
+public enum IndexName {
+    FORWARD,
+    REVERSE_FULL,
+    REVERSE_PRIO
+}
--- a/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java
+++ b/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java
@ -3,4 +3,5 @@ package nu.marginalia.search.client;
 public class SearchMqEndpoints {
    /** Flushes the URL caches, run if significant changes have occurred in the URLs database */
    public static final String FLUSH_CACHES = "FLUSH_CACHES";
+    public static final String SWITCH_LINKDB = "SWITCH_LINKDB";
 }
--- a/code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java
@ -9,16 +9,16 @@ import com.google.inject.Singleton;
 import com.zaxxer.hikari.HikariDataSource;
 import lombok.SneakyThrows;
 import nu.marginalia.model.EdgeDomain;
-import nu.marginalia.model.id.EdgeId;

 import java.util.NoSuchElementException;
 import java.util.Optional;
+import java.util.OptionalInt;

@Singleton
 public class DbDomainQueries {
    private final HikariDataSource dataSource;

-    private final Cache<EdgeDomain, EdgeId<EdgeDomain>> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
+    private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();

    @Inject
    public DbDomainQueries(HikariDataSource dataSource)
@ -28,7 +28,7 @@ public class DbDomainQueries {


    @SneakyThrows
-    public EdgeId<EdgeDomain> getDomainId(EdgeDomain domain) {
+    public Integer getDomainId(EdgeDomain domain) {
        try (var connection = dataSource.getConnection()) {

            return domainIdCache.get(domain, () -> {
@ -36,7 +36,7 @@ public class DbDomainQueries {
                    stmt.setString(1, domain.toString());
                    var rsp = stmt.executeQuery();
                    if (rsp.next()) {
-                        return new EdgeId<>(rsp.getInt(1));
+                        return rsp.getInt(1);
                    }
                }
                throw new NoSuchElementException();
@ -48,12 +48,12 @@ public class DbDomainQueries {
    }

    @SneakyThrows
-    public Optional<EdgeId<EdgeDomain>> tryGetDomainId(EdgeDomain domain) {
+    public OptionalInt tryGetDomainId(EdgeDomain domain) {

-        var maybe = Optional.ofNullable(domainIdCache.getIfPresent(domain));
-
-        if (maybe.isPresent())
-            return maybe;
+        Integer maybeId = domainIdCache.getIfPresent(domain);
+        if (maybeId != null) {
+            return OptionalInt.of(maybeId);
+        }

        try (var connection = dataSource.getConnection()) {

@ -61,25 +61,25 @@ public class DbDomainQueries {
                stmt.setString(1, domain.toString());
                var rsp = stmt.executeQuery();
                if (rsp.next()) {
-                    var id = new EdgeId<EdgeDomain>(rsp.getInt(1));
+                    var id = rsp.getInt(1);

                    domainIdCache.put(domain, id);
-                    return Optional.of(id);
+                    return OptionalInt.of(id);
                }
            }
-            return Optional.empty();
+            return OptionalInt.empty();
        }
        catch (UncheckedExecutionException ex) {
-            return Optional.empty();
+            return OptionalInt.empty();
        }
    }

    @SneakyThrows
-    public Optional<EdgeDomain> getDomain(EdgeId<EdgeDomain> id) {
+    public Optional<EdgeDomain> getDomain(int id) {
        try (var connection = dataSource.getConnection()) {

            try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
-                stmt.setInt(1, id.id());
+                stmt.setInt(1, id);
                var rsp = stmt.executeQuery();
                if (rsp.next()) {
                    return Optional.of(new EdgeDomain(rsp.getString(1)));
--- a/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java
@ -2,15 +2,10 @@ package nu.marginalia.db;

 import com.google.inject.ImplementedBy;
 import gnu.trove.set.hash.TIntHashSet;
-import nu.marginalia.model.EdgeDomain;
-import nu.marginalia.model.id.EdgeId;

@ImplementedBy(DomainBlacklistImpl.class)
 public interface DomainBlacklist {
    boolean isBlacklisted(int domainId);
-    default boolean isBlacklisted(EdgeId<EdgeDomain> domainId) {
-        return isBlacklisted(domainId.id());
-    }
    default TIntHashSet getSpamDomains() {
        return new TIntHashSet();
    }
--- a/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java
@ -1,13 +1,14 @@
 package nu.marginalia.db;

 import com.zaxxer.hikari.HikariDataSource;
+import gnu.trove.list.TIntList;
+import gnu.trove.list.array.TIntArrayList;
 import nu.marginalia.model.EdgeDomain;
-import nu.marginalia.model.id.EdgeIdList;
 import org.slf4j.LoggerFactory;
 import org.slf4j.Logger;

-import javax.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
@ -58,10 +59,10 @@ public class DomainTypes {
        return ret;
    }

-    /** Retrieve the EdgeId of all domains of a certain type,
+    /** Retrieve the domain id of all domains of a certain type,
     * ignoring entries that are not in the EC_DOMAIN table */
-    public EdgeIdList<EdgeDomain> getKnownDomainsByType(Type type) {
-        EdgeIdList<EdgeDomain> ret = new EdgeIdList<>();
+    public TIntList getKnownDomainsByType(Type type) {
+        TIntList ret = new TIntArrayList();

        try (var conn = dataSource.getConnection();
             var stmt = conn.prepareStatement("""
--- a/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java
@ -5,8 +5,8 @@ import nu.marginalia.db.storage.model.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import javax.inject.Inject;
-import javax.inject.Singleton;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
--- a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java
+++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java
@ -5,9 +5,9 @@ public enum FileStorageType {
    CRAWL_DATA,
    PROCESSED_DATA,
    INDEX_STAGING,
-    LEXICON_STAGING,
+    LINKDB_STAGING,
+    LINKDB_LIVE,
    INDEX_LIVE,
-    LEXICON_LIVE,
    BACKUP,
    EXPORT,
    SEARCH_SETS
--- a/code/common/db/src/main/resources/db/migration/V23_09_0_000__filestorage_livedb.sql
+++ b/code/common/db/src/main/resources/db/migration/V23_09_0_000__filestorage_livedb.sql
@ -0,0 +1,9 @@
+ALTER TABLE FILE_STORAGE MODIFY COLUMN TYPE ENUM ('CRAWL_SPEC', 'CRAWL_DATA', 'PROCESSED_DATA', 'INDEX_STAGING', 'LEXICON_STAGING', 'INDEX_LIVE', 'LEXICON_LIVE', 'SEARCH_SETS', 'BACKUP', 'EXPORT', 'LINKDB_LIVE', 'LINKDB_STAGING') NOT NULL;
+
+INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
+SELECT ID, 'ldbr', "Linkdb Current", 'LINKDB_LIVE'
+FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
+
+INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
+SELECT ID, 'ldbw', "Linkdb Staging Area", 'LINKDB_STAGING'
+FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
--- a/code/common/db/src/main/resources/db/migration/V23_09_1_000__drop_ecurl.sql
+++ b/code/common/db/src/main/resources/db/migration/V23_09_1_000__drop_ecurl.sql
@ -0,0 +1,3 @@
+DROP VIEW EC_URL_VIEW;
+DROP TABLE EC_PAGE_DATA;
+DROP TABLE EC_URL;
--- a/code/common/db/src/main/resources/db/migration/V23_09_2_000__filestorage_backup.sql
+++ b/code/common/db/src/main/resources/db/migration/V23_09_2_000__filestorage_backup.sql
@ -0,0 +1,3 @@
+INSERT IGNORE INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, PERMIT_TEMP)
+VALUES
+('Backup Storage', '/backup', 'BACKUP', true);
--- a/code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql
+++ b/code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql
@ -0,0 +1 @@
+DELETE FROM FILE_STORAGE WHERE TYPE IN ('LEXICON_STAGING', 'LEXICON_LIVE');
--- a/code/common/linkdb/build.gradle
+++ b/code/common/linkdb/build.gradle
@ -0,0 +1,56 @@
+plugins {
+    id 'java'
+    id "io.freefair.lombok" version "8.2.2"
+    id 'jvm-test-suite'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(20))
+    }
+}
+
+configurations {
+    flywayMigration.extendsFrom(implementation)
+}
+
+dependencies {
+    implementation project(':code:common:model')
+
+    implementation libs.lombok
+    annotationProcessor libs.lombok
+    implementation libs.bundles.slf4j
+
+    implementation libs.guice
+    implementation libs.bundles.gson
+
+    implementation libs.notnull
+
+    implementation libs.sqlite
+    implementation libs.commons.lang3
+
+    implementation libs.trove
+
+    testImplementation libs.bundles.slf4j.test
+    testImplementation libs.bundles.junit
+    testImplementation libs.mockito
+
+    testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
+    testImplementation 'org.testcontainers:mariadb:1.17.4'
+    testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
+}
+
+
+test {
+    maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
+    maxHeapSize = "8G"
+    useJUnitPlatform()
+}
+
+task fastTests(type: Test) {
+    maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
+    maxHeapSize = "8G"
+    useJUnitPlatform {
+        excludeTags "slow"
+    }
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbReader.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbReader.java
@ -0,0 +1,102 @@
+package nu.marginalia.linkdb;
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.google.inject.name.Named;
+import gnu.trove.list.TLongList;
+import nu.marginalia.linkdb.model.LdbUrlDetail;
+import nu.marginalia.model.EdgeUrl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import java.nio.file.StandardCopyOption;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+@Singleton
+public class LinkdbReader {
+    private Path dbFile;
+    private volatile Connection connection;
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+
+    @Inject
+    public LinkdbReader(@Named("linkdb-file") Path dbFile) throws SQLException {
+        this.dbFile = dbFile;
+
+        if (Files.exists(dbFile)) {
+            try {
+                connection = createConnection();
+            }
+            catch (SQLException ex) {
+                connection = null;
+                logger.error("Failed to load linkdb file", ex);
+            }
+        }
+        else {
+            logger.warn("No linkdb file {}", dbFile);
+        }
+    }
+
+    private Connection createConnection() throws SQLException {
+        String connStr = "jdbc:sqlite:" + dbFile.toString();
+        return DriverManager.getConnection(connStr);
+    }
+
+    public void switchInput(Path newDbFile) throws IOException, SQLException {
+        if (connection != null) {
+            connection.close();
+        }
+
+        Files.move(newDbFile, dbFile, StandardCopyOption.REPLACE_EXISTING);
+
+        connection = createConnection();
+    }
+
+    public List<LdbUrlDetail> getUrlDetails(TLongList ids) throws SQLException {
+        List<LdbUrlDetail> ret = new ArrayList<>(ids.size());
+
+        if (connection == null ||
+            connection.isClosed())
+        {
+            throw new RuntimeException("URL query temporarily unavailable due to database switch");
+        }
+
+        try (var stmt = connection.prepareStatement("""
+                SELECT ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR
+                FROM DOCUMENT WHERE ID = ?
+                """)) {
+            for (int i = 0; i < ids.size(); i++) {
+                long id = ids.get(i);
+                stmt.setLong(1, id);
+                var rs = stmt.executeQuery();
+                if (rs.next()) {
+                    var url = new EdgeUrl(rs.getString("URL"));
+                    ret.add(new LdbUrlDetail(
+                            rs.getLong("ID"),
+                            url,
+                            rs.getString("TITLE"),
+                            rs.getString("DESCRIPTION"),
+                            rs.getDouble("QUALITY"),
+                            rs.getString("FORMAT"),
+                            rs.getInt("FEATURES"),
+                            rs.getInt("PUB_YEAR"),
+                            rs.getLong("DATA_HASH"),
+                            rs.getInt("WORDS_TOTAL")
+                    ));
+                }
+            }
+        } catch (URISyntaxException e) {
+            throw new RuntimeException(e);
+        }
+        return ret;
+    }
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbStatusWriter.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbStatusWriter.java
@ -0,0 +1,64 @@
+package nu.marginalia.linkdb;
+
+import nu.marginalia.linkdb.model.UrlStatus;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.List;
+
+public class LinkdbStatusWriter {
+
+    private final Connection connection;
+
+    public LinkdbStatusWriter(Path outputFile) throws SQLException {
+        String connStr = "jdbc:sqlite:" + outputFile.toString();
+        connection = DriverManager.getConnection(connStr);
+
+        try (var stream = ClassLoader.getSystemResourceAsStream("db/linkdb-status.sql");
+             var stmt = connection.createStatement()
+        ) {
+            var sql = new String(stream.readAllBytes());
+            stmt.executeUpdate(sql);
+
+            // Disable synchronous writing as this is a one-off operation with no recovery
+            stmt.execute("PRAGMA synchronous = OFF");
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void add(List<UrlStatus> statuses) throws SQLException {
+        try (var stmt = connection.prepareStatement("""
+                INSERT INTO STATUS(ID, URL, STATUS, DESCRIPTION)
+                VALUES (?, ?, ?, ?)
+                """)) {
+            int count = 0;
+            for (var status : statuses) {
+                stmt.setLong(1, status.id());
+                stmt.setString(2, status.url().toString());
+                stmt.setString(3, status.status());
+                if (status.description() == null) {
+                    stmt.setNull(4, Types.VARCHAR);
+                } else {
+                    stmt.setString(4, status.description());
+                }
+                stmt.addBatch();
+                if (++count > 1000) {
+                    count = 0;
+                    stmt.executeBatch();
+                }
+            }
+            if (count != 0) {
+                stmt.executeBatch();
+            }
+        }
+    }
+
+    public void close() throws SQLException {
+        connection.close();
+    }
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbWriter.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbWriter.java
@ -0,0 +1,80 @@
+package nu.marginalia.linkdb;
+
+import nu.marginalia.linkdb.model.LdbUrlDetail;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.List;
+
+public class LinkdbWriter {
+
+    private final Connection connection;
+
+    public LinkdbWriter(Path outputFile) throws SQLException {
+        String connStr = "jdbc:sqlite:" + outputFile.toString();
+        connection = DriverManager.getConnection(connStr);
+
+        try (var stream = ClassLoader.getSystemResourceAsStream("db/linkdb-document.sql");
+             var stmt = connection.createStatement()
+        ) {
+            var sql = new String(stream.readAllBytes());
+            stmt.executeUpdate(sql);
+
+            // Disable synchronous writing as this is a one-off operation with no recovery
+            stmt.execute("PRAGMA synchronous = OFF");
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void add(LdbUrlDetail ldbUrlDetail) throws SQLException {
+        add(List.of(ldbUrlDetail));
+    }
+
+    public void add(List<LdbUrlDetail> ldbUrlDetail) throws SQLException {
+
+        try (var stmt = connection.prepareStatement("""
+                INSERT OR IGNORE INTO DOCUMENT(ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """)) {
+
+            int i = 0;
+            for (var document : ldbUrlDetail) {
+                var url = document.url();
+
+                stmt.setLong(1, document.urlId());
+                stmt.setString(2, url.toString());
+
+                stmt.setString(3, document.title());
+                stmt.setString(4, document.description());
+                stmt.setInt(5, document.wordsTotal());
+                stmt.setString(6, document.format());
+                stmt.setInt(7, document.features());
+                stmt.setLong(8, document.dataHash());
+                stmt.setDouble(9, document.urlQuality());
+                if (document.pubYear() == null) {
+                    stmt.setNull(10, Types.INTEGER);
+                } else {
+                    stmt.setInt(10, document.pubYear());
+                }
+
+                stmt.addBatch();
+
+                if (++i > 1000) {
+                    stmt.executeBatch();
+                    i = 0;
+                }
+            }
+
+            if (i != 0) stmt.executeBatch();
+        }
+    }
+
+    public void close() throws SQLException {
+        connection.close();
+    }
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/LdbUrlDetail.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/LdbUrlDetail.java
@ -0,0 +1,18 @@
+package nu.marginalia.linkdb.model;
+
+import nu.marginalia.model.EdgeUrl;
+
+public record LdbUrlDetail(long urlId,
+                           EdgeUrl url,
+                           String title,
+                           String description,
+                           double urlQuality,
+                           String format,
+                           int features,
+                           Integer pubYear,
+                           long dataHash,
+                           int wordsTotal
+                        )
+
+{
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/UrlProtocol.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/UrlProtocol.java
@ -0,0 +1,24 @@
+package nu.marginalia.linkdb.model;
+
+public enum UrlProtocol {
+    HTTP,
+    HTTPS;
+
+    public static int encode(String str) {
+        if ("http".equalsIgnoreCase(str)) {
+            return HTTP.ordinal();
+        }
+        else if ("https".equalsIgnoreCase(str)) {
+            return HTTPS.ordinal();
+        }
+
+        throw new IllegalArgumentException(str);
+    }
+
+    public static String decode(int ordinal) {
+        return switch (values()[ordinal]) {
+            case HTTP -> "http";
+            case HTTPS -> "https";
+        };
+    };
+}
--- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/UrlStatus.java
+++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/UrlStatus.java
@ -0,0 +1,8 @@
+package nu.marginalia.linkdb.model;
+
+import nu.marginalia.model.EdgeUrl;
+
+import javax.annotation.Nullable;
+
+public record UrlStatus(long id, EdgeUrl url, String status, @Nullable String description) {
+}
--- a/code/common/linkdb/src/main/resources/db/linkdb-document.sql
+++ b/code/common/linkdb/src/main/resources/db/linkdb-document.sql
@ -0,0 +1,17 @@
+CREATE TABLE DOCUMENT (
+    ID INT8 PRIMARY KEY,
+
+    URL TEXT,
+
+    STATE INT,
+    TITLE TEXT NOT NULL,
+    DESCRIPTION TEXT NOT NULL,
+
+    WORDS_TOTAL INTEGER NOT NULL,
+    FORMAT TEXT NOT NULL,
+    FEATURES INTEGER NOT NULL,
+
+    DATA_HASH INTEGER NOT NULL,
+    QUALITY REAL NOT NULL,
+    PUB_YEAR INTEGER NOT NULL
+);
--- a/code/common/linkdb/src/main/resources/db/linkdb-status.sql
+++ b/code/common/linkdb/src/main/resources/db/linkdb-status.sql
@ -0,0 +1,6 @@
+CREATE TABLE STATUS (
+    ID INT8 PRIMARY KEY,
+    URL TEXT,
+    STATUS TEXT NOT NULL,
+    DESCRIPTION TEXT
+);
--- a/code/common/linkdb/src/test/java/nu/marginalia/linkdb/LinkdbStatusWriterTest.java
+++ b/code/common/linkdb/src/test/java/nu/marginalia/linkdb/LinkdbStatusWriterTest.java
@ -0,0 +1,33 @@
+package nu.marginalia.linkdb;
+
+import nu.marginalia.linkdb.model.UrlStatus;
+import nu.marginalia.model.EdgeUrl;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+import java.util.List;
+
+public class LinkdbStatusWriterTest {
+    @Test
+    public void testCreate() throws IOException {
+        Path tempPath = Files.createTempFile("linkdb-status", ".db");
+        try {
+            var writer = new LinkdbStatusWriter(tempPath);
+            writer.add(List.of(
+                    new UrlStatus(5, new EdgeUrl("https://www.marginalia.nu/x"), "y", null),
+                    new UrlStatus(6, new EdgeUrl("https://www.marginalia.nu/y"), "y", "z")
+                    ));
+            writer.close();
+        } catch (SQLException e) {
+            throw new RuntimeException(e);
+        } catch (URISyntaxException e) {
+            throw new RuntimeException(e);
+        } finally {
+            Files.deleteIfExists(tempPath);
+        }
+    }
+}
--- a/code/common/linkdb/src/test/java/nu/marginalia/linkdb/LinkdbWriterTest.java
+++ b/code/common/linkdb/src/test/java/nu/marginalia/linkdb/LinkdbWriterTest.java
@ -0,0 +1,42 @@
+package nu.marginalia.linkdb;
+
+import gnu.trove.list.array.TLongArrayList;
+import nu.marginalia.linkdb.model.LdbUrlDetail;
+import nu.marginalia.model.EdgeDomain;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+
+public class LinkdbWriterTest {
+    @Test
+    public void testCreate() throws IOException {
+        Path tempPath = Files.createTempFile("linkdb", ".db");
+        try {
+            var writer = new LinkdbWriter(tempPath);
+            writer.add(new LdbUrlDetail(
+                    1,
+                    new nu.marginalia.model.EdgeUrl("http", new EdgeDomain("example.com"), null, "/", null),
+                    "Test",
+                    "This is a test",
+                    -4.,
+                    "XHTML",
+                    5,
+                    2020,
+                    0xF00BA3,
+                    444
+            ));
+            writer.close();
+
+            var reader = new LinkdbReader(tempPath);
+            var deets = reader.getUrlDetails(new TLongArrayList(new long[]{1}));
+            System.out.println(deets);
+        } catch (SQLException e) {
+            throw new RuntimeException(e);
+        } finally {
+            Files.deleteIfExists(tempPath);
+        }
+    }
+}
--- a/code/common/model/src/main/java/nu/marginalia/model/gson/GsonFactory.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/gson/GsonFactory.java
@ -6,7 +6,6 @@ import nu.marginalia.bigstring.BigString;
 import nu.marginalia.bigstring.CompressedBigString;
 import nu.marginalia.model.EdgeDomain;
 import nu.marginalia.model.EdgeUrl;
-import nu.marginalia.model.id.EdgeId;

 import java.net.URISyntaxException;

@ -24,8 +23,6 @@ public class GsonFactory {
                    }
                })
                .registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
-                .registerTypeAdapter(EdgeId.class, (JsonDeserializer<EdgeId<?>>) (json, typeOfT, context) -> new EdgeId<>(json.getAsInt()))
-                .registerTypeAdapter(EdgeId.class, (JsonSerializer<EdgeId<?>>) (src, typeOfSrc, context) -> new JsonPrimitive(src.id()))
                .registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString()))
                .registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
                .registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeId.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeId.java
@ -1,10 +0,0 @@
-package nu.marginalia.model.id;
-
-
-/**
- * This exists entirely for strengthening the typing of IDs
- *
- * @param <T>
- */
-public record EdgeId<T>(int id) {
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdArray.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdArray.java
@ -1,34 +0,0 @@
-package nu.marginalia.model.id;
-
-import java.util.Arrays;
-import java.util.stream.IntStream;
-
-public record EdgeIdArray<T> (int... values) implements EdgeIdCollection<T> {
-
-    public static <T> EdgeIdArray<T> gather(IntStream stream) {
-        return new EdgeIdArray<>(stream.toArray());
-    }
-
-    @Override
-    public int[] values() {
-        return values;
-    }
-
-    @Override
-    public boolean isEmpty() {
-        return values.length == 0;
-    }
-
-    @Override
-    public int size() {
-        return values.length;
-    }
-
-    public int get(int idx) {
-        return values[idx];
-    }
-
-    public void sort() {
-        Arrays.sort(values);
-    }
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdCollection.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdCollection.java
@ -1,28 +0,0 @@
-package nu.marginalia.model.id;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.stream.IntStream;
-
-public interface EdgeIdCollection<T> extends Iterable<EdgeId<T>> {
-    int size();
-    boolean isEmpty();
-    int[] values();
-
-    default IntStream stream() {
-        return Arrays.stream(values());
-    }
-
-    default Iterator<EdgeId<T>> iterator() {
-        return Arrays.stream(values()).mapToObj(EdgeId<T>::new).iterator();
-    }
-    default EdgeIdArray<T> asArray() {
-        return new EdgeIdArray<>(values());
-    }
-    default EdgeIdList<T> asList() {
-        return new EdgeIdList<>(values());
-    }
-    default EdgeIdSet<T> asSet() {
-        return new EdgeIdSet<>(values());
-    }
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdCollectionMutable.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdCollectionMutable.java
@ -1,12 +0,0 @@
-package nu.marginalia.model.id;
-
-import gnu.trove.TIntCollection;
-
-public interface EdgeIdCollectionMutable<T> {
-    TIntCollection underlyingCollection();
-
-    default void addAll(EdgeIdArray<T> other) { underlyingCollection().addAll(other.values()); }
-    default void addAll(EdgeIdList<T> other) { underlyingCollection().addAll(other.list()); }
-    default void addAll(EdgeIdCollection<T> other) { underlyingCollection().addAll(other.values()); }
-
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdList.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdList.java
@ -1,48 +0,0 @@
-package nu.marginalia.model.id;
-
-import gnu.trove.TIntCollection;
-import gnu.trove.list.array.TIntArrayList;
-
-import java.util.stream.IntStream;
-
-public record EdgeIdList<T> (TIntArrayList list) implements
-        EdgeIdCollection<T>,
-        EdgeIdCollectionMutable<T> {
-
-    public EdgeIdList(int... values) { this(new TIntArrayList(values)); }
-    public static <T> EdgeIdList<T> gather(IntStream stream) {
-        return stream.collect(EdgeIdList::new, EdgeIdList::add, EdgeIdList::addAll);
-    }
-
-    @Override
-    public int[] values() {
-        return list.toArray();
-    }
-
-    @Override
-    public boolean isEmpty() {
-        return list.isEmpty();
-    }
-
-    @Override
-    public int size() {
-        return list.size();
-    }
-
-    public int get(int idx) {
-        return list.get(idx);
-    }
-
-    public void add(int id) {
-        list.add(id);
-    }
-
-    public void sort() {
-        list.sort();
-    }
-
-    @Override
-    public TIntCollection underlyingCollection() {
-        return list;
-    }
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdSet.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/EdgeIdSet.java
@ -1,52 +0,0 @@
-package nu.marginalia.model.id;
-
-import gnu.trove.TIntCollection;
-import gnu.trove.set.hash.TIntHashSet;
-
-import java.util.stream.IntStream;
-
-public record EdgeIdSet<T> (TIntHashSet set) implements EdgeIdCollection<T>, EdgeIdCollectionMutable<T> {
-
-    public EdgeIdSet(int... values) {
-        this(new TIntHashSet(values.length, 0.5f, -1));
-
-        set.addAll(values);
-    }
-
-    public EdgeIdSet(int initialCapacity, float loadFactor) {
-        this(new TIntHashSet(initialCapacity, loadFactor, -1));
-    }
-
-    @Override
-    public TIntCollection underlyingCollection() {
-        return set;
-    }
-
-    public static <T> EdgeIdSet<T> gather(IntStream stream) {
-        return new EdgeIdSet<>(stream.toArray());
-    }
-
-    @Override
-    public int[] values() {
-        return set.toArray();
-    }
-
-    @Override
-    public boolean isEmpty() {
-        return set.isEmpty();
-    }
-
-    @Override
-    public int size() {
-        return set.size();
-    }
-
-    public boolean contains(int id) {
-        return set.contains(id);
-    }
-    public boolean add(int id) {
-        return set.add(id);
-    }
-    public boolean remove(int id) { return set.remove(id); }
-
-}
--- a/code/common/model/src/main/java/nu/marginalia/model/id/UrlIdCodec.java
+++ b/code/common/model/src/main/java/nu/marginalia/model/id/UrlIdCodec.java
@ -0,0 +1,78 @@
+package nu.marginalia.model.id;
+
+/** URL id encoding scheme, including an optional ranking part that's used in the indices and washed away
+ * outside.   The ranking part is put in the highest bits so that when we sort the documents by id, they're
+ * actually sorted by rank.  Next is the domain id part, which keeps documents from the same domain clustered.
+ * Finally is the document ordinal part, which is a non-unique sequence number for within the current set of
+ * documents loaded.  The same ID may be re-used over time as a new index is loaded.
+ * <p></p>
+ * <table>
+ *     <tr><th>Part</th><th>Bits</th><th>Cardinality</th></tr>
+ *     <tr>
+ *         <td>rank</td><td>6 bits</td><td>64</td>
+ *     </tr>
+ *     <tr>
+ *         <td>domain</td><td>31 bits</td><td>2 billion</td>
+ *     </tr>
+ *     <tr>
+ *         <td>document</td><td>26 bits</td><td>67 million</td>
+ *     </tr>
+ * </table>
+ *  <p></p>
+ *  Most significant bit is unused for now because I'm not routing Long.compareUnsigned() all over the codebase.
+ *  <i>If</i> we end up needing more domains, we'll cross that bridge when we come to it.
+ *
+ * <h2>Coding Scheme</h2>
+ * <code><pre>
+ * [    | rank | domain | url ]
+ *  0   1       6       38    64
+ * </pre></code>
+ */
+public class UrlIdCodec {
+    private static final long RANK_MASK = 0xFE00_0000_0000_0000L;
+    private static final int DOCORD_MASK = 0x03FF_FFFF;
+
+    /** Encode a URL id without a ranking element */
+    public static long encodeId(int domainId, int documentOrdinal) {
+        domainId &= 0x7FFF_FFFF;
+        documentOrdinal &= 0x03FF_FFFF;
+
+        return ((long) domainId << 26) | documentOrdinal;
+    }
+
+    /** Add a ranking element to an existing combined URL id.
+     *
+     * @param rank [0,1] the importance of the domain, low is good
+     * @param urlId
+     */
+    public static long addRank(float rank, long urlId) {
+        long rankPart = (int)(rank * (1<<6));
+
+        if (rankPart >= 64) rankPart = 63;
+        if (rankPart < 0) rankPart = 0;
+
+        return (urlId&(~RANK_MASK)) | (rankPart << 57);
+    }
+
+    /** Extract the domain component from this URL id */
+    public static int getDomainId(long combinedId) {
+        return (int) ((combinedId >>> 26) & 0x7FFF_FFFFL);
+    }
+
+    /** Extract the document ordinal component from this URL id */
+    public static int getDocumentOrdinal(long combinedId) {
+        return (int) (combinedId & DOCORD_MASK);
+    }
+
+
+    /** Extract the document ordinal component from this URL id */
+    public static int getRank(long combinedId) {
+        return (int) (combinedId >>> 57);
+    }
+
+    /** Mask out the ranking element from this URL id */
+    public static long removeRank(long combinedId) {
+        return combinedId & ~RANK_MASK;
+    }
+
+}
--- a/code/common/model/src/test/java/nu/marginalia/model/id/UrlIdCodecTest.java
+++ b/code/common/model/src/test/java/nu/marginalia/model/id/UrlIdCodecTest.java
@ -0,0 +1,41 @@
+package nu.marginalia.model.id;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class UrlIdCodecTest {
+    @Test
+    public void testDocumentBounds() {
+        long encoded = UrlIdCodec.encodeId(0, ~0);
+        assertEquals(0, UrlIdCodec.getDomainId(encoded));
+    }
+
+    @Test
+    public void testDomainBounds() {
+        long encoded = UrlIdCodec.encodeId(~0, 0);
+        assertEquals(0x7FFF_FFFF, UrlIdCodec.getDomainId(encoded));
+        assertEquals(0, UrlIdCodec.getRank(encoded));
+        assertEquals(0, UrlIdCodec.getDocumentOrdinal(encoded));
+    }
+
+    @Test
+    public void testRankBoundsAdd() {
+        long encoded = UrlIdCodec.encodeId(0, 0);
+        encoded = UrlIdCodec.addRank(1.f, encoded);
+        assertEquals(0, UrlIdCodec.getDomainId(encoded));
+        assertEquals(63, UrlIdCodec.getRank(encoded));
+        assertEquals(0, UrlIdCodec.getDocumentOrdinal(encoded));
+    }
+
+    @Test
+    public void testRemoveRank() {
+        long encoded = UrlIdCodec.encodeId(0x7FFF_FFFF, ~0);
+        encoded = UrlIdCodec.addRank(1.f, encoded);
+        encoded = UrlIdCodec.removeRank(encoded);
+        assertEquals(0x7FFF_FFFFL, UrlIdCodec.getDomainId(encoded));
+        assertEquals(0, UrlIdCodec.getRank(encoded));
+        assertEquals(0x03FF_FFFF, UrlIdCodec.getDocumentOrdinal(encoded));
+    }
+
+}
--- a/code/common/process/src/main/java/nu/marginalia/process/control/FakeProcessHeartbeat.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/FakeProcessHeartbeat.java
@ -0,0 +1,35 @@
+package nu.marginalia.process.control;
+
+/** Dummy implementation of ProcessHeartbeat that does nothing */
+public class FakeProcessHeartbeat implements ProcessHeartbeat {
+
+    @Override
+    public <T extends Enum<T>> ProcessTaskHeartbeat<T> createProcessTaskHeartbeat(Class<T> steps, String processName) {
+        return new ProcessTaskHeartbeat<>() {
+            @Override
+            public void progress(T step) {}
+
+            @Override
+            public void shutDown() {}
+
+            @Override
+            public void close() {}
+        };
+    }
+
+    @Override
+    public ProcessAdHocTaskHeartbeat createAdHocTaskHeartbeat(String processName) {
+        return new ProcessAdHocTaskHeartbeat() {
+            @Override
+            public void progress(String step, int progress, int total) {}
+
+            @Override
+            public void close() {}
+        };
+    }
+
+    @Override
+    public void setProgress(double progress) {}
+
+
+}
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java
@ -0,0 +1,7 @@
+package nu.marginalia.process.control;
+
+public interface ProcessAdHocTaskHeartbeat extends AutoCloseable {
+    void progress(String step, int progress, int total);
+
+    void close();
+}
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java
@ -0,0 +1,187 @@
+package nu.marginalia.process.control;
+
+
+import com.zaxxer.hikari.HikariDataSource;
+import nu.marginalia.ProcessConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+/** This object sends a heartbeat to the database every few seconds,
+ * updating with the progress of a task within a service. Progress is tracked by providing
+ * enumerations corresponding to the steps in the task.  It's important they're arranged in the same
+ * order as the steps in the task in order to get an accurate progress tracking.
+ */
+public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHocTaskHeartbeat {
+    private final Logger logger = LoggerFactory.getLogger(ProcessAdHocTaskHeartbeatImpl.class);
+    private final String taskName;
+    private final String taskBase;
+    private final String instanceUUID;
+    private final HikariDataSource dataSource;
+
+
+    private final Thread runnerThread;
+    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
+    private final String serviceInstanceUUID;
+    private int progress;
+
+    private volatile boolean running = false;
+    private volatile String step = "-";
+
+    ProcessAdHocTaskHeartbeatImpl(ProcessConfiguration configuration,
+                                  String taskName,
+                                  HikariDataSource dataSource)
+    {
+        this.taskName = configuration.processName() + "." + taskName + ":" + configuration.node();
+        this.taskBase = configuration.processName() + "." + taskName;
+        this.dataSource = dataSource;
+
+        this.instanceUUID = UUID.randomUUID().toString();
+        this.serviceInstanceUUID = configuration.instanceUuid().toString();
+
+        heartbeatInit();
+
+        runnerThread = new Thread(this::run);
+        runnerThread.start();
+    }
+
+    /** Update the progress of the task.  This is a fast function that doesn't block;
+     * the actual update is done in a separate thread.
+     *
+     * @param step The current step in the task.
+     */
+    @Override
+    public void progress(String step, int stepProgress, int stepCount) {
+        this.step = step;
+
+
+        // off by one since we calculate the progress based on the number of steps,
+        // and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
+        // final progress being 80% and not 100%)
+
+        this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
+
+        logger.info("ProcessTask {} progress: {}%", taskBase, progress);
+    }
+
+    public void shutDown() {
+        if (!running)
+            return;
+
+        running = false;
+
+        try {
+            runnerThread.join();
+            heartbeatStop();
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.warn("ProcessHeartbeat shutdown failed", ex);
+        }
+    }
+
+    private void run() {
+        if (!running)
+            running = true;
+        else
+            return;
+
+        try {
+            while (running) {
+                try {
+                    heartbeatUpdate();
+                }
+                catch (SQLException ex) {
+                    logger.warn("ProcessHeartbeat failed to update", ex);
+                }
+
+                TimeUnit.SECONDS.sleep(heartbeatInterval);
+            }
+        }
+        catch (InterruptedException ex) {
+            logger.error("ProcessHeartbeat caught irrecoverable exception, killing service", ex);
+            System.exit(255);
+        }
+    }
+
+    private void heartbeatInit() {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
+                        VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
+                        ON DUPLICATE KEY UPDATE
+                            INSTANCE = ?,
+                            SERVICE_INSTANCE = ?,
+                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'STARTING'
+                        """
+            ))
+            {
+                stmt.setString(1, taskName);
+                stmt.setString(2, taskBase);
+                stmt.setString(3, instanceUUID);
+                stmt.setString(4, serviceInstanceUUID);
+                stmt.setString(5, instanceUUID);
+                stmt.setString(6, serviceInstanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+        catch (SQLException ex) {
+            logger.error("ProcessHeartbeat failed to initialize", ex);
+            throw new RuntimeException(ex);
+        }
+
+    }
+
+    private void heartbeatUpdate() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'RUNNING',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, progress);
+                stmt.setString(2, step);
+                stmt.setString(3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatStop() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS='STOPPED',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, progress);
+                stmt.setString( 2, step);
+                stmt.setString( 3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    @Override
+    public void close() {
+        shutDown();
+    }
+
+}
+
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeat.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeat.java
@ -1,155 +1,11 @@
 package nu.marginalia.process.control;

+import com.google.inject.ImplementedBy;

-import com.google.inject.Inject;
-import com.google.inject.Singleton;
-import com.zaxxer.hikari.HikariDataSource;
-import nu.marginalia.ProcessConfiguration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+@ImplementedBy(ProcessHeartbeatImpl.class)
+public interface ProcessHeartbeat {
+    <T extends Enum<T>> ProcessTaskHeartbeat<T> createProcessTaskHeartbeat(Class<T> steps, String processName);
+    ProcessAdHocTaskHeartbeat createAdHocTaskHeartbeat(String processName);

-import java.sql.SQLException;
-import java.util.concurrent.TimeUnit;
-
-/** This service sends a heartbeat to the database every 5 seconds.
- */
-@Singleton
-public class ProcessHeartbeat {
-    private final Logger logger = LoggerFactory.getLogger(ProcessHeartbeat.class);
-    private final String processName;
-    private final String processBase;
-    private final String instanceUUID;
-    private final HikariDataSource dataSource;
-
-
-    private final Thread runnerThread;
-    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
-
-    private volatile boolean running = false;
-
-    private volatile int progress = -1;
-
-    @Inject
-    public ProcessHeartbeat(ProcessConfiguration configuration,
-                            HikariDataSource dataSource)
-    {
-        this.processName = configuration.processName() + ":" + configuration.node();
-        this.processBase = configuration.processName();
-        this.dataSource = dataSource;
-
-        this.instanceUUID = configuration.instanceUuid().toString();
-
-        runnerThread = new Thread(this::run);
-
-        Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
-    }
-
-    public void setProgress(double progress) {
-        this.progress = (int) (progress * 100);
-    }
-
-    public void start() {
-        if (!running) {
-            runnerThread.start();
-        }
-    }
-
-    public void shutDown() {
-        if (!running)
-            return;
-
-        running = false;
-
-        try {
-            runnerThread.join();
-            heartbeatStop();
-        }
-        catch (InterruptedException|SQLException ex) {
-            logger.warn("ServiceHeartbeat shutdown failed", ex);
-        }
-    }
-
-    private void run() {
-        if (!running)
-            running = true;
-        else
-            return;
-
-        try {
-            heartbeatInit();
-
-            while (running) {
-
-                try {
-                    heartbeatUpdate();
-                }
-                catch (SQLException ex) {
-                    logger.warn("ServiceHeartbeat failed to update", ex);
-                }
-
-                TimeUnit.SECONDS.sleep(heartbeatInterval);
-            }
-        }
-        catch (InterruptedException|SQLException ex) {
-            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
-            System.exit(255);
-        }
-    }
-
-    private void heartbeatInit() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        INSERT INTO PROCESS_HEARTBEAT (PROCESS_NAME, PROCESS_BASE, INSTANCE, HEARTBEAT_TIME, STATUS)
-                        VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
-                        ON DUPLICATE KEY UPDATE
-                            INSTANCE = ?,
-                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
-                            STATUS = 'STARTING'
-                        """
-            ))
-            {
-                stmt.setString(1, processName);
-                stmt.setString(2, processBase);
-                stmt.setString(3, instanceUUID);
-                stmt.setString(4, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
-
-    private void heartbeatUpdate() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE PROCESS_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), STATUS = 'RUNNING', PROGRESS = ?
-                        WHERE INSTANCE = ?
-                        """)
-            )
-            {
-                stmt.setInt(1, progress);
-                stmt.setString(2, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
-
-    private void heartbeatStop() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE PROCESS_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), STATUS='STOPPED', PROGRESS=?
-                        WHERE INSTANCE = ?
-                        """)
-            )
-            {
-                stmt.setInt(1, progress);
-                stmt.setString( 2, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
+    void setProgress(double progress);
 }
-
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java
@ -0,0 +1,170 @@
+package nu.marginalia.process.control;
+
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.zaxxer.hikari.HikariDataSource;
+import nu.marginalia.ProcessConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.concurrent.TimeUnit;
+
+/** This service sends a heartbeat to the database every 5 seconds.
+ */
+@Singleton
+public class ProcessHeartbeatImpl implements ProcessHeartbeat {
+    private final Logger logger = LoggerFactory.getLogger(ProcessHeartbeatImpl.class);
+    private final String processName;
+    private final String processBase;
+    private final String instanceUUID;
+    @org.jetbrains.annotations.NotNull
+    private final ProcessConfiguration configuration;
+    private final HikariDataSource dataSource;
+
+
+    private final Thread runnerThread;
+    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
+
+    private volatile boolean running = false;
+
+    private volatile int progress = -1;
+
+    @Inject
+    public ProcessHeartbeatImpl(ProcessConfiguration configuration,
+                                HikariDataSource dataSource)
+    {
+        this.processName = configuration.processName() + ":" + configuration.node();
+        this.processBase = configuration.processName();
+        this.configuration = configuration;
+        this.dataSource = dataSource;
+
+        this.instanceUUID = configuration.instanceUuid().toString();
+
+        runnerThread = new Thread(this::run);
+
+        Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
+    }
+
+
+    @Override
+    public <T extends Enum<T>> ProcessTaskHeartbeat<T> createProcessTaskHeartbeat(Class<T> steps, String processName) {
+        return new ProcessTaskHeartbeatImpl<>(steps, configuration, processName, dataSource);
+    }
+
+    @Override
+    public ProcessAdHocTaskHeartbeat createAdHocTaskHeartbeat(String processName) {
+        return new ProcessAdHocTaskHeartbeatImpl(configuration, processName, dataSource);
+    }
+
+    @Override
+    public void setProgress(double progress) {
+        this.progress = (int) (progress * 100);
+    }
+
+    public void start() {
+        if (!running) {
+            runnerThread.start();
+        }
+    }
+
+    public void shutDown() {
+        if (!running)
+            return;
+
+        running = false;
+
+        try {
+            runnerThread.join();
+            heartbeatStop();
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.warn("ServiceHeartbeat shutdown failed", ex);
+        }
+    }
+
+    private void run() {
+        if (!running)
+            running = true;
+        else
+            return;
+
+        try {
+            heartbeatInit();
+
+            while (running) {
+
+                try {
+                    heartbeatUpdate();
+                }
+                catch (SQLException ex) {
+                    logger.warn("ServiceHeartbeat failed to update", ex);
+                }
+
+                TimeUnit.SECONDS.sleep(heartbeatInterval);
+            }
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
+            System.exit(255);
+        }
+    }
+
+    private void heartbeatInit() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        INSERT INTO PROCESS_HEARTBEAT (PROCESS_NAME, PROCESS_BASE, INSTANCE, HEARTBEAT_TIME, STATUS)
+                        VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
+                        ON DUPLICATE KEY UPDATE
+                            INSTANCE = ?,
+                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'STARTING'
+                        """
+            ))
+            {
+                stmt.setString(1, processName);
+                stmt.setString(2, processBase);
+                stmt.setString(3, instanceUUID);
+                stmt.setString(4, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatUpdate() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE PROCESS_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), STATUS = 'RUNNING', PROGRESS = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, progress);
+                stmt.setString(2, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatStop() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE PROCESS_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), STATUS='STOPPED', PROGRESS=?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, progress);
+                stmt.setString( 2, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+}
+
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java
@ -0,0 +1,9 @@
+package nu.marginalia.process.control;
+
+public interface ProcessTaskHeartbeat<T extends Enum<T>> extends AutoCloseable {
+    void progress(T step);
+
+    void shutDown();
+
+    void close();
+}
--- a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java
+++ b/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java
@ -0,0 +1,192 @@
+package nu.marginalia.process.control;
+
+
+import com.zaxxer.hikari.HikariDataSource;
+import nu.marginalia.ProcessConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+/** This object sends a heartbeat to the database every few seconds,
+ * updating with the progress of a task within a service. Progress is tracked by providing
+ * enumerations corresponding to the steps in the task.  It's important they're arranged in the same
+ * order as the steps in the task in order to get an accurate progress tracking.
+ */
+public class ProcessTaskHeartbeatImpl<T extends Enum<T>> implements AutoCloseable, ProcessTaskHeartbeat<T> {
+    private final Logger logger = LoggerFactory.getLogger(ProcessTaskHeartbeatImpl.class);
+    private final String taskName;
+    private final String taskBase;
+    private final String instanceUUID;
+    private final HikariDataSource dataSource;
+
+
+    private final Thread runnerThread;
+    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
+    private final String serviceInstanceUUID;
+    private final int stepCount;
+
+    private volatile boolean running = false;
+    private volatile int stepNum = 0;
+    private volatile String step = "-";
+
+    ProcessTaskHeartbeatImpl(Class<T> stepClass,
+                             ProcessConfiguration configuration,
+                             String taskName,
+                             HikariDataSource dataSource)
+    {
+        this.taskName = configuration.processName() + "." + taskName + ":" + configuration.node();
+        this.taskBase = configuration.processName() + "." + taskName;
+        this.dataSource = dataSource;
+
+        this.instanceUUID = UUID.randomUUID().toString();
+        this.serviceInstanceUUID = configuration.instanceUuid().toString();
+
+        this.stepCount = stepClass.getEnumConstants().length;
+
+        heartbeatInit();
+
+        runnerThread = new Thread(this::run);
+        runnerThread.start();
+    }
+
+    /** Update the progress of the task.  This is a fast function that doesn't block;
+     * the actual update is done in a separate thread.
+     *
+     * @param step The current step in the task.
+     */
+    @Override
+    public void progress(T step) {
+        this.step = step.name();
+
+
+        // off by one since we calculate the progress based on the number of steps,
+        // and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
+        // final progress being 80% and not 100%)
+
+        this.stepNum = 1 + step.ordinal();
+
+        logger.info("ProcessTask {} progress: {}", taskBase, step.name());
+    }
+
+    @Override
+    public void shutDown() {
+        if (!running)
+            return;
+
+        running = false;
+
+        try {
+            runnerThread.join();
+            heartbeatStop();
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.warn("ProcessHeartbeat shutdown failed", ex);
+        }
+    }
+
+    private void run() {
+        if (!running)
+            running = true;
+        else
+            return;
+
+        try {
+            while (running) {
+                try {
+                    heartbeatUpdate();
+                }
+                catch (SQLException ex) {
+                    logger.warn("ProcessHeartbeat failed to update", ex);
+                }
+
+                TimeUnit.SECONDS.sleep(heartbeatInterval);
+            }
+        }
+        catch (InterruptedException ex) {
+            logger.error("ProcessHeartbeat caught irrecoverable exception, killing service", ex);
+            System.exit(255);
+        }
+    }
+
+    private void heartbeatInit() {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
+                        VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
+                        ON DUPLICATE KEY UPDATE
+                            INSTANCE = ?,
+                            SERVICE_INSTANCE = ?,
+                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'STARTING'
+                        """
+            ))
+            {
+                stmt.setString(1, taskName);
+                stmt.setString(2, taskBase);
+                stmt.setString(3, instanceUUID);
+                stmt.setString(4, serviceInstanceUUID);
+                stmt.setString(5, instanceUUID);
+                stmt.setString(6, serviceInstanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+        catch (SQLException ex) {
+            logger.error("ProcessHeartbeat failed to initialize", ex);
+            throw new RuntimeException(ex);
+        }
+
+    }
+
+    private void heartbeatUpdate() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'RUNNING',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
+                stmt.setString(2, step);
+                stmt.setString(3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatStop() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS='STOPPED',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
+                stmt.setString( 2, step);
+                stmt.setString( 3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    @Override
+    public void close() {
+        shutDown();
+    }
+
+}
+
--- a/code/common/service/src/main/java/nu/marginalia/service/control/FakeServiceHeartbeat.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/control/FakeServiceHeartbeat.java
@ -0,0 +1,14 @@
+package nu.marginalia.service.control;
+
+/** Dummy implementation of ServiceHeartbeat that does nothing */
+public class FakeServiceHeartbeat implements ServiceHeartbeat {
+    @Override
+    public <T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName) {
+        return new ServiceTaskHeartbeat<T>() {
+            @Override
+            public void progress(T step) {}
+            @Override
+            public void close() {}
+        };
+    }
+}
--- a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeat.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeat.java
@ -1,157 +1,8 @@
 package nu.marginalia.service.control;

-import com.google.inject.Inject;
-import com.google.inject.Singleton;
-import com.zaxxer.hikari.HikariDataSource;
-import nu.marginalia.service.module.ServiceConfiguration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.sql.SQLException;
-import java.util.concurrent.TimeUnit;
-
-/** This service sends a heartbeat to the database every 5 seconds,
- * updating the control service with the liveness information for the service.
- */
-@Singleton
-public class ServiceHeartbeat {
-    private final Logger logger = LoggerFactory.getLogger(ServiceHeartbeat.class);
-    private final String serviceName;
-    private final String serviceBase;
-    private final String instanceUUID;
-    private final ServiceConfiguration configuration;
-    private final ServiceEventLog eventLog;
-    private final HikariDataSource dataSource;
-
-
-    private final Thread runnerThread;
-    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 5);
-
-    private volatile boolean running = false;
-
-    @Inject
-    public ServiceHeartbeat(ServiceConfiguration configuration,
-                            ServiceEventLog eventLog,
-                            HikariDataSource dataSource)
-    {
-        this.serviceName = configuration.serviceName() + ":" + configuration.node();
-        this.serviceBase = configuration.serviceName();
-        this.configuration = configuration;
-        this.eventLog = eventLog;
-        this.dataSource = dataSource;
-
-        this.instanceUUID = configuration.instanceUuid().toString();
-
-        runnerThread = new Thread(this::run);
-
-        Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
-    }
-
-    public <T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName) {
-        return new ServiceTaskHeartbeat<>(steps, configuration, processName, eventLog, dataSource);
-    }
-
-
-    public void start() {
-        if (!running) {
-            runnerThread.start();
-        }
-    }
-
-    public void shutDown() {
-        if (!running)
-            return;
-
-        running = false;
-
-        try {
-            runnerThread.join();
-            heartbeatStop();
-        }
-        catch (InterruptedException|SQLException ex) {
-            logger.warn("ServiceHeartbeat shutdown failed", ex);
-        }
-    }
-
-    private void run() {
-        if (!running)
-            running = true;
-        else
-            return;
-
-        try {
-            heartbeatInit();
-
-            while (running) {
-
-                try {
-                    heartbeatUpdate();
-                }
-                catch (SQLException ex) {
-                    logger.warn("ServiceHeartbeat failed to update", ex);
-                }
-
-                TimeUnit.SECONDS.sleep(heartbeatInterval);
-            }
-        }
-        catch (InterruptedException|SQLException ex) {
-            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
-            System.exit(255);
-        }
-    }
-
-    private void heartbeatInit() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        INSERT INTO SERVICE_HEARTBEAT (SERVICE_NAME, SERVICE_BASE, INSTANCE, HEARTBEAT_TIME, ALIVE)
-                        VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 1)
-                        ON DUPLICATE KEY UPDATE
-                            INSTANCE = ?,
-                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
-                            ALIVE = 1
-                        """
-                    ))
-            {
-                stmt.setString(1, serviceName);
-                stmt.setString(2, serviceBase);
-                stmt.setString(3, instanceUUID);
-                stmt.setString(4, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
-
-    private void heartbeatUpdate() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE SERVICE_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6)
-                        WHERE INSTANCE = ? AND ALIVE = 1
-                        """)
-            )
-            {
-                stmt.setString(1, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
-
-    private void heartbeatStop() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE SERVICE_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), ALIVE = 0
-                        WHERE INSTANCE = ?
-                        """)
-            )
-            {
-                stmt.setString(1, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
+import com.google.inject.ImplementedBy;

+@ImplementedBy(ServiceHeartbeatImpl.class)
+public interface ServiceHeartbeat {
+    <T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName);
 }
--- a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java
@ -0,0 +1,158 @@
+package nu.marginalia.service.control;
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.zaxxer.hikari.HikariDataSource;
+import nu.marginalia.service.module.ServiceConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.concurrent.TimeUnit;
+
+/** This service sends a heartbeat to the database every 5 seconds,
+ * updating the control service with the liveness information for the service.
+ */
+@Singleton
+public class ServiceHeartbeatImpl implements ServiceHeartbeat {
+    private final Logger logger = LoggerFactory.getLogger(ServiceHeartbeatImpl.class);
+    private final String serviceName;
+    private final String serviceBase;
+    private final String instanceUUID;
+    private final ServiceConfiguration configuration;
+    private final ServiceEventLog eventLog;
+    private final HikariDataSource dataSource;
+
+
+    private final Thread runnerThread;
+    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 5);
+
+    private volatile boolean running = false;
+
+    @Inject
+    public ServiceHeartbeatImpl(ServiceConfiguration configuration,
+                                ServiceEventLog eventLog,
+                                HikariDataSource dataSource)
+    {
+        this.serviceName = configuration.serviceName() + ":" + configuration.node();
+        this.serviceBase = configuration.serviceName();
+        this.configuration = configuration;
+        this.eventLog = eventLog;
+        this.dataSource = dataSource;
+
+        this.instanceUUID = configuration.instanceUuid().toString();
+
+        runnerThread = new Thread(this::run);
+
+        Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
+    }
+
+    @Override
+    public <T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName) {
+        return new ServiceTaskHeartbeatImpl<>(steps, configuration, processName, eventLog, dataSource);
+    }
+
+
+    public void start() {
+        if (!running) {
+            runnerThread.start();
+        }
+    }
+
+    public void shutDown() {
+        if (!running)
+            return;
+
+        running = false;
+
+        try {
+            runnerThread.join();
+            heartbeatStop();
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.warn("ServiceHeartbeat shutdown failed", ex);
+        }
+    }
+
+    private void run() {
+        if (!running)
+            running = true;
+        else
+            return;
+
+        try {
+            heartbeatInit();
+
+            while (running) {
+
+                try {
+                    heartbeatUpdate();
+                }
+                catch (SQLException ex) {
+                    logger.warn("ServiceHeartbeat failed to update", ex);
+                }
+
+                TimeUnit.SECONDS.sleep(heartbeatInterval);
+            }
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
+            System.exit(255);
+        }
+    }
+
+    private void heartbeatInit() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        INSERT INTO SERVICE_HEARTBEAT (SERVICE_NAME, SERVICE_BASE, INSTANCE, HEARTBEAT_TIME, ALIVE)
+                        VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 1)
+                        ON DUPLICATE KEY UPDATE
+                            INSTANCE = ?,
+                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            ALIVE = 1
+                        """
+                    ))
+            {
+                stmt.setString(1, serviceName);
+                stmt.setString(2, serviceBase);
+                stmt.setString(3, instanceUUID);
+                stmt.setString(4, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatUpdate() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE SERVICE_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6)
+                        WHERE INSTANCE = ? AND ALIVE = 1
+                        """)
+            )
+            {
+                stmt.setString(1, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatStop() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE SERVICE_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6), ALIVE = 0
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setString(1, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+}
--- a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java
@ -1,196 +1,8 @@
 package nu.marginalia.service.control;

-
-import com.zaxxer.hikari.HikariDataSource;
-import nu.marginalia.service.module.ServiceConfiguration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.sql.SQLException;
-import java.util.UUID;
-import java.util.concurrent.TimeUnit;
-
-/** This object sends a heartbeat to the database every few seconds,
- * updating with the progress of a task within a service. Progress is tracked by providing
- * enumerations corresponding to the steps in the task.  It's important they're arranged in the same
- * order as the steps in the task in order to get an accurate progress tracking.
- */
-public class ServiceTaskHeartbeat<T extends Enum<T>> implements AutoCloseable {
-    private final Logger logger = LoggerFactory.getLogger(ServiceTaskHeartbeat.class);
-    private final String taskName;
-    private final String taskBase;
-    private final String instanceUUID;
-    private final HikariDataSource dataSource;
-
-
-    private final Thread runnerThread;
-    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
-    private final String serviceInstanceUUID;
-    private final int stepCount;
-    private final ServiceEventLog eventLog;
-
-    private volatile boolean running = false;
-    private volatile int stepNum = 0;
-    private volatile String step = "-";
-
-    ServiceTaskHeartbeat(Class<T> stepClass,
-                        ServiceConfiguration configuration,
-                         String taskName,
-                         ServiceEventLog eventLog,
-                         HikariDataSource dataSource)
-    {
-        this.eventLog = eventLog;
-        this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
-        this.taskBase = configuration.serviceName() + "." + taskName;
-        this.dataSource = dataSource;
-
-        this.instanceUUID = UUID.randomUUID().toString();
-        this.serviceInstanceUUID = configuration.instanceUuid().toString();
-
-        this.stepCount = stepClass.getEnumConstants().length;
-
-        heartbeatInit();
-
-        runnerThread = new Thread(this::run);
-        runnerThread.start();
-    }
-
-    /** Update the progress of the task.  This is a fast function that doesn't block;
-     * the actual update is done in a separate thread.
-     *
-     * @param step The current step in the task.
-     */
-    public void progress(T step) {
-        this.step = step.name();
-
-
-        // off by one since we calculate the progress based on the number of steps,
-        // and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
-        // final progress being 80% and not 100%)
-
-        this.stepNum = 1 + step.ordinal();
-
-        logger.info("ServiceTask {} progress: {}", taskBase, step.name());
-        eventLog.logEvent("TASK-STEP", taskName + " = " + step.name());
-    }
-
-    public void shutDown() {
-        if (!running)
-            return;
-
-        running = false;
-
-        try {
-            runnerThread.join();
-            heartbeatStop();
-        }
-        catch (InterruptedException|SQLException ex) {
-            logger.warn("ServiceHeartbeat shutdown failed", ex);
-        }
-    }
-
-    private void run() {
-        if (!running)
-            running = true;
-        else
-            return;
-
-        try {
-            while (running) {
-                try {
-                    heartbeatUpdate();
-                }
-                catch (SQLException ex) {
-                    logger.warn("ServiceHeartbeat failed to update", ex);
-                }
-
-                TimeUnit.SECONDS.sleep(heartbeatInterval);
-            }
-        }
-        catch (InterruptedException ex) {
-            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
-            System.exit(255);
-        }
-    }
-
-    private void heartbeatInit() {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
-                        VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
-                        ON DUPLICATE KEY UPDATE
-                            INSTANCE = ?,
-                            SERVICE_INSTANCE = ?,
-                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
-                            STATUS = 'STARTING'
-                        """
-            ))
-            {
-                stmt.setString(1, taskName);
-                stmt.setString(2, taskBase);
-                stmt.setString(3, instanceUUID);
-                stmt.setString(4, serviceInstanceUUID);
-                stmt.setString(5, instanceUUID);
-                stmt.setString(6, serviceInstanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-        catch (SQLException ex) {
-            logger.error("ServiceHeartbeat failed to initialize", ex);
-            throw new RuntimeException(ex);
-        }
-
-        eventLog.logEvent("TASK-STARTED", taskName);
-    }
-
-    private void heartbeatUpdate() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE TASK_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
-                            STATUS = 'RUNNING',
-                            PROGRESS = ?,
-                            STAGE_NAME = ?
-                        WHERE INSTANCE = ?
-                        """)
-            )
-            {
-                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
-                stmt.setString(2, step);
-                stmt.setString(3, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-    }
-
-    private void heartbeatStop() throws SQLException {
-        try (var connection = dataSource.getConnection()) {
-            try (var stmt = connection.prepareStatement(
-                    """
-                        UPDATE TASK_HEARTBEAT
-                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
-                            STATUS='STOPPED',
-                            PROGRESS = ?,
-                            STAGE_NAME = ?
-                        WHERE INSTANCE = ?
-                        """)
-            )
-            {
-                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
-                stmt.setString( 2, step);
-                stmt.setString( 3, instanceUUID);
-                stmt.executeUpdate();
-            }
-        }
-        eventLog.logEvent("TASK-TERMINATED", taskName);
-    }
+public interface ServiceTaskHeartbeat<T extends Enum<T>> extends AutoCloseable {
+    void progress(T step);

    @Override
-    public void close() {
-        shutDown();
-    }
-
+    void close();
 }
-
--- a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java
@ -0,0 +1,197 @@
+package nu.marginalia.service.control;
+
+
+import com.zaxxer.hikari.HikariDataSource;
+import nu.marginalia.service.module.ServiceConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+/** This object sends a heartbeat to the database every few seconds,
+ * updating with the progress of a task within a service. Progress is tracked by providing
+ * enumerations corresponding to the steps in the task.  It's important they're arranged in the same
+ * order as the steps in the task in order to get an accurate progress tracking.
+ */
+public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskHeartbeat<T> {
+    private final Logger logger = LoggerFactory.getLogger(ServiceTaskHeartbeatImpl.class);
+    private final String taskName;
+    private final String taskBase;
+    private final String instanceUUID;
+    private final HikariDataSource dataSource;
+
+
+    private final Thread runnerThread;
+    private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
+    private final String serviceInstanceUUID;
+    private final int stepCount;
+    private final ServiceEventLog eventLog;
+
+    private volatile boolean running = false;
+    private volatile int stepNum = 0;
+    private volatile String step = "-";
+
+    ServiceTaskHeartbeatImpl(Class<T> stepClass,
+                             ServiceConfiguration configuration,
+                             String taskName,
+                             ServiceEventLog eventLog,
+                             HikariDataSource dataSource)
+    {
+        this.eventLog = eventLog;
+        this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
+        this.taskBase = configuration.serviceName() + "." + taskName;
+        this.dataSource = dataSource;
+
+        this.instanceUUID = UUID.randomUUID().toString();
+        this.serviceInstanceUUID = configuration.instanceUuid().toString();
+
+        this.stepCount = stepClass.getEnumConstants().length;
+
+        heartbeatInit();
+
+        runnerThread = new Thread(this::run);
+        runnerThread.start();
+    }
+
+    /** Update the progress of the task.  This is a fast function that doesn't block;
+     * the actual update is done in a separate thread.
+     *
+     * @param step The current step in the task.
+     */
+    @Override
+    public void progress(T step) {
+        this.step = step.name();
+
+
+        // off by one since we calculate the progress based on the number of steps,
+        // and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
+        // final progress being 80% and not 100%)
+
+        this.stepNum = 1 + step.ordinal();
+
+        logger.info("ServiceTask {} progress: {}", taskBase, step.name());
+        eventLog.logEvent("TASK-STEP", taskName + " = " + step.name());
+    }
+
+    public void shutDown() {
+        if (!running)
+            return;
+
+        running = false;
+
+        try {
+            runnerThread.join();
+            heartbeatStop();
+        }
+        catch (InterruptedException|SQLException ex) {
+            logger.warn("ServiceHeartbeat shutdown failed", ex);
+        }
+    }
+
+    private void run() {
+        if (!running)
+            running = true;
+        else
+            return;
+
+        try {
+            while (running) {
+                try {
+                    heartbeatUpdate();
+                }
+                catch (SQLException ex) {
+                    logger.warn("ServiceHeartbeat failed to update", ex);
+                }
+
+                TimeUnit.SECONDS.sleep(heartbeatInterval);
+            }
+        }
+        catch (InterruptedException ex) {
+            logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
+            System.exit(255);
+        }
+    }
+
+    private void heartbeatInit() {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
+                        VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
+                        ON DUPLICATE KEY UPDATE
+                            INSTANCE = ?,
+                            SERVICE_INSTANCE = ?,
+                            HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'STARTING'
+                        """
+            ))
+            {
+                stmt.setString(1, taskName);
+                stmt.setString(2, taskBase);
+                stmt.setString(3, instanceUUID);
+                stmt.setString(4, serviceInstanceUUID);
+                stmt.setString(5, instanceUUID);
+                stmt.setString(6, serviceInstanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+        catch (SQLException ex) {
+            logger.error("ServiceHeartbeat failed to initialize", ex);
+            throw new RuntimeException(ex);
+        }
+
+        eventLog.logEvent("TASK-STARTED", taskName);
+    }
+
+    private void heartbeatUpdate() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS = 'RUNNING',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
+                stmt.setString(2, step);
+                stmt.setString(3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+    }
+
+    private void heartbeatStop() throws SQLException {
+        try (var connection = dataSource.getConnection()) {
+            try (var stmt = connection.prepareStatement(
+                    """
+                        UPDATE TASK_HEARTBEAT
+                        SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
+                            STATUS='STOPPED',
+                            PROGRESS = ?,
+                            STAGE_NAME = ?
+                        WHERE INSTANCE = ?
+                        """)
+            )
+            {
+                stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
+                stmt.setString( 2, step);
+                stmt.setString( 3, instanceUUID);
+                stmt.executeUpdate();
+            }
+        }
+        eventLog.logEvent("TASK-TERMINATED", taskName);
+    }
+
+    @Override
+    public void close() {
+        shutDown();
+    }
+
+}
+
--- a/code/common/service/src/main/java/nu/marginalia/service/server/BaseServiceParams.java
+++ b/code/common/service/src/main/java/nu/marginalia/service/server/BaseServiceParams.java
@ -4,7 +4,7 @@ import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.mq.MessageQueueFactory;
 import nu.marginalia.service.control.ServiceEventLog;
-import nu.marginalia.service.control.ServiceHeartbeat;
+import nu.marginalia.service.control.ServiceHeartbeatImpl;
 import nu.marginalia.service.module.ServiceConfiguration;

 /** This class exists to reduce Service boilerplate */
@ -13,14 +13,14 @@ public class BaseServiceParams {
    public final ServiceConfiguration configuration;
    public final Initialization initialization;
    public final MetricsServer metricsServer;
-    public final ServiceHeartbeat heartbeat;
+    public final ServiceHeartbeatImpl heartbeat;
    public final ServiceEventLog eventLog;
    public final MessageQueueFactory messageQueueInboxFactory;
    @Inject
    public BaseServiceParams(ServiceConfiguration configuration,
                             Initialization initialization,
                             MetricsServer metricsServer,
-                             ServiceHeartbeat heartbeat,
+                             ServiceHeartbeatImpl heartbeat,
                             ServiceEventLog eventLog,
                             MessageQueueFactory messageQueueInboxFactory) {
        this.configuration = configuration;
--- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java
+++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java
@ -7,7 +7,7 @@ import nu.marginalia.language.model.WordRep;
 import nu.marginalia.term_frequency_dict.TermFrequencyDict;
 import nu.marginalia.model.EdgeUrl;

-import javax.inject.Inject;
+import com.google.inject.Inject;
 import java.util.*;
 import java.util.stream.Stream;

--- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java
+++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java
@ -2,6 +2,7 @@ package nu.marginalia.ranking;

 import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
 import it.unimi.dsi.fastutil.ints.Int2ShortOpenHashMap;
+import nu.marginalia.model.id.UrlIdCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -37,6 +38,11 @@ public class DomainRankings {
        return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE);
    }

+    public float getSortRanking(long docId) {
+        int domainId = UrlIdCodec.getDomainId(docId);
+        return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE) / (float) MAX_RANK_VALUE;
+    }
+
    public int size() {
        return rankings.size();
    }
--- a/code/features-index/index-forward/build.gradle
+++ b/code/features-index/index-forward/build.gradle
@ -16,9 +16,8 @@ dependencies {
    implementation project(':code:features-index:domain-ranking')
    implementation project(':code:features-index:index-query')
    implementation project(':code:features-index:index-journal')
-    implementation project(':code:features-index:lexicon')
    implementation project(':code:common:model')
-    implementation project(':code:common:service')
+    implementation project(':code:common:process')

    implementation project(':third-party:uppend')

--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java
@ -6,10 +6,10 @@ import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.model.idx.DocumentMetadata;
+import nu.marginalia.process.control.ProcessHeartbeat;
 import nu.marginalia.ranking.DomainRankings;
-import nu.marginalia.service.control.ServiceHeartbeat;
-import org.roaringbitmap.IntConsumer;
-import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.longlong.LongConsumer;
+import org.roaringbitmap.longlong.Roaring64Bitmap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -20,24 +20,24 @@ import java.nio.file.Path;

 public class ForwardIndexConverter {

-    private final ServiceHeartbeat heartbeat;
-    private final File inputFile;
+    private final ProcessHeartbeat heartbeat;

    private final Logger logger = LoggerFactory.getLogger(getClass());

+    private final IndexJournalReader journalReader;
    private final Path outputFileDocsId;
    private final Path outputFileDocsData;
    private final DomainRankings domainRankings;


-    public ForwardIndexConverter(ServiceHeartbeat heartbeat,
-                                 File inputFile,
+    public ForwardIndexConverter(ProcessHeartbeat heartbeat,
+                                 IndexJournalReader journalReader,
                                 Path outputFileDocsId,
                                 Path outputFileDocsData,
                                 DomainRankings domainRankings
                                 ) {
        this.heartbeat = heartbeat;
-        this.inputFile = inputFile;
+        this.journalReader = journalReader;
        this.outputFileDocsId = outputFileDocsId;
        this.outputFileDocsData = outputFileDocsData;
        this.domainRankings = domainRankings;
@ -54,17 +54,9 @@ public class ForwardIndexConverter {
    public void convert() throws IOException {
        deleteOldFiles();

-        IndexJournalReaderSingleCompressedFile journalReader = new IndexJournalReaderSingleCompressedFile(inputFile.toPath());
-        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
-            logger.warn("Bailing: Journal is empty!");
-            return;
-        }
-
-        logger.info("Converting  {} {}", inputFile, journalReader.fileHeader);
-
        logger.info("Domain Rankings size = {}", domainRankings.size());

-        try (var progress = heartbeat.createServiceTaskHeartbeat(TaskSteps.class, "forwardIndexConverter")) {
+        try (var progress = heartbeat.createProcessTaskHeartbeat(TaskSteps.class, "forwardIndexConverter")) {
            progress.progress(TaskSteps.GET_DOC_IDS);

            LongArray docsFileId = getDocIds(outputFileDocsId, journalReader);
@ -83,12 +75,11 @@ public class ForwardIndexConverter {
            LongArray docFileData = LongArray.mmapForWriting(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());

            journalReader.forEach(entry -> {
-                long entryOffset = (long) ForwardIndexParameters.ENTRY_SIZE * docIdToIdx.get(entry.urlId());
+                long entryOffset = (long) ForwardIndexParameters.ENTRY_SIZE * docIdToIdx.get(entry.docId());

                int ranking = domainRankings.getRanking(entry.domainId());
                long meta = DocumentMetadata.encodeRank(entry.docMeta(), ranking);

-                docFileData.set(entryOffset + ForwardIndexParameters.DOMAIN_OFFSET, entry.domainId());
                docFileData.set(entryOffset + ForwardIndexParameters.METADATA_OFFSET, meta);
                docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, entry.header.documentFeatures());
            });
@ -109,17 +100,18 @@ public class ForwardIndexConverter {
    }

    private LongArray getDocIds(Path outputFileDocs, IndexJournalReader journalReader) throws IOException {
-        RoaringBitmap rbm = new RoaringBitmap();
-        journalReader.forEachUrlId(rbm::add);
+        Roaring64Bitmap rbm = new Roaring64Bitmap();
+        journalReader.forEachDocId(rbm::add);

-        LongArray ret = LongArray.mmapForWriting(outputFileDocs, rbm.getCardinality());
-        rbm.forEach(new IntConsumer() {
+        LongArray ret = LongArray.mmapForWriting(outputFileDocs, rbm.getIntCardinality());
+        rbm.forEach(new LongConsumer() {
            int offset;
            @Override
-            public void accept(int value) {
+            public void accept(long value) {
                ret.set(offset++, value);
            }
        });
+
        return ret;
    }

--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexFileNames.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexFileNames.java
@ -0,0 +1,28 @@
+package nu.marginalia.index.forward;
+
+import java.nio.file.Path;
+
+public class ForwardIndexFileNames {
+    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
+        return switch (identifier) {
+            case DOC_ID -> switch (version) {
+                case NEXT -> basePath.resolve("fwd-doc-id.dat.next");
+                case CURRENT -> basePath.resolve("fwd-doc-id.dat");
+            };
+            case DOC_DATA -> switch (version) {
+                case NEXT -> basePath.resolve("fwd-doc-data.dat.next");
+                case CURRENT -> basePath.resolve("fwd-doc-data.dat");
+            };
+        };
+    }
+
+    public enum FileVersion {
+        CURRENT,
+        NEXT
+    };
+
+    public enum FileIdentifier {
+        DOC_DATA,
+        DOC_ID
+    }
+}
--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexParameters.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexParameters.java
@ -1,9 +1,8 @@
 package nu.marginalia.index.forward;

 class ForwardIndexParameters {
-    public static final int ENTRY_SIZE = 3;
-    public static final int DOMAIN_OFFSET = 0;
-    public static final int METADATA_OFFSET = 1;
-    public static final int FEATURES_OFFSET = 2;
+    public static final int ENTRY_SIZE = 2;
+    public static final int METADATA_OFFSET = 0;
+    public static final int FEATURES_OFFSET = 1;

 }
--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexReader.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexReader.java
@ -3,6 +3,7 @@ package nu.marginalia.index.forward;
 import com.upserve.uppend.blobs.NativeIO;
 import gnu.trove.map.hash.TLongIntHashMap;
 import nu.marginalia.array.LongArray;
+import nu.marginalia.model.id.UrlIdCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -71,6 +72,8 @@ public class ForwardIndexReader {
    }

    public long getDocMeta(long docId) {
+        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
+
        long offset = idxForDoc(docId);
        if (offset < 0) return 0;

@ -78,20 +81,17 @@ public class ForwardIndexReader {
    }

    public int getHtmlFeatures(long docId) {
+        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
+
        long offset = idxForDoc(docId);
        if (offset < 0) return 0;

        return (int) data.get(ENTRY_SIZE * offset + FEATURES_OFFSET);
    }

-    public int getDomainId(long docId) {
-        long offset = idxForDoc(docId);
-        if (offset < 0) return 0;
-
-        return Math.max(0, (int) data.get(ENTRY_SIZE * offset + DOMAIN_OFFSET));
-    }
-
    private int idxForDoc(long docId) {
+        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
+
        return idToOffset.get(docId);
    }

--- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ParamMatchingQueryFilter.java
+++ b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ParamMatchingQueryFilter.java
@ -1,5 +1,6 @@
 package nu.marginalia.index.forward;

+import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.index.query.limit.SpecificationLimitType;
 import nu.marginalia.index.query.IndexQueryParams;
@ -15,10 +16,11 @@ public class ParamMatchingQueryFilter implements QueryFilterStepIf {
    }

    @Override
-    public boolean test(long docId) {
-        int urlId = (int) (docId & 0xFFFF_FFFFL);
-        int domainId = forwardIndexReader.getDomainId(urlId);
-        long meta = forwardIndexReader.getDocMeta(urlId);
+    public boolean test(long combinedId) {
+        long docId = UrlIdCodec.removeRank(combinedId);
+        int domainId = UrlIdCodec.getDomainId(docId);
+
+        long meta = forwardIndexReader.getDocMeta(docId);

        if (!validateDomain(domainId, meta)) {
            return false;
--- a/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java
+++ b/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java
@ -2,14 +2,14 @@ package nu.marginalia.index.forward;

 import lombok.SneakyThrows;
 import nu.marginalia.index.journal.model.IndexJournalEntry;
-import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
+import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
 import nu.marginalia.index.journal.writer.IndexJournalWriter;
-import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
+import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
+import nu.marginalia.model.id.UrlIdCodec;
+import nu.marginalia.process.control.FakeProcessHeartbeat;
+import nu.marginalia.process.control.ProcessHeartbeatImpl;
+import nu.marginalia.process.control.ProcessTaskHeartbeatImpl;
 import nu.marginalia.ranking.DomainRankings;
-import nu.marginalia.lexicon.KeywordLexicon;
-import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
-import nu.marginalia.service.control.ServiceHeartbeat;
-import nu.marginalia.service.control.ServiceTaskHeartbeat;
 import nu.marginalia.test.TestUtil;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
@ -28,7 +28,6 @@ import static org.mockito.Mockito.when;

 class ForwardIndexConverterTest {

-    KeywordLexicon keywordLexicon;
    IndexJournalWriter writer;

    Path indexFile;
@ -49,12 +48,9 @@ class ForwardIndexConverterTest {
        dictionaryFile = Files.createTempFile("tmp", ".dict");
        dictionaryFile.toFile().deleteOnExit();

-        keywordLexicon = new KeywordLexicon(new KeywordLexiconJournal(dictionaryFile.toFile(), KeywordLexiconJournalMode.READ_WRITE));
-        keywordLexicon.getOrInsert("0");
-
        indexFile = Files.createTempFile("tmp", ".idx");
        indexFile.toFile().deleteOnExit();
-        writer = new IndexJournalWriterImpl(keywordLexicon, indexFile);
+        writer = new IndexJournalWriterSingleFileImpl(indexFile);

        wordsFile1 = Files.createTempFile("words1", ".idx");
        urlsFile1 = Files.createTempFile("urls1", ".idx");
@ -62,11 +58,9 @@ class ForwardIndexConverterTest {
        dataDir = Files.createTempDirectory(getClass().getSimpleName());

        for (int i = 1; i < workSetSize; i++) {
-            createEntry(writer, keywordLexicon, i);
+            createEntry(writer, i);
        }

-
-        keywordLexicon.commitToDisk();
        writer.close();


@ -84,15 +78,16 @@ class ForwardIndexConverterTest {
    }

    long createId(long url, long domain) {
-        return (domain << 32) | url;
+        return UrlIdCodec.encodeId((int) domain, (int) url);
    }
-    public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
+
+    public void createEntry(IndexJournalWriter writer, int id) {
        int[] factors = getFactorsI(id);

        var entryBuilder = IndexJournalEntry.builder(createId(id, id/20), id%5);

        for (int i = 0; i+1 < factors.length; i+=2) {
-            entryBuilder.add(keywordLexicon.getOrInsert(Integer.toString(factors[i])), -factors[i+1]);
+            entryBuilder.add(factors[i], -factors[i+1]);
        }

        writer.put(entryBuilder.build());
@ -101,18 +96,14 @@ class ForwardIndexConverterTest {
    @Test
    void testForwardIndex() throws IOException {

-        // RIP fairies
-        var serviceHeartbeat = Mockito.mock(ServiceHeartbeat.class);
-        when(serviceHeartbeat.createServiceTaskHeartbeat(Mockito.any(), Mockito.any()))
-                .thenReturn(Mockito.mock(ServiceTaskHeartbeat.class));
-
-        new ForwardIndexConverter(serviceHeartbeat, indexFile.toFile(), docsFileId, docsFileData, new DomainRankings()).convert();
+        new ForwardIndexConverter(new FakeProcessHeartbeat(), new IndexJournalReaderSingleCompressedFile(indexFile), docsFileId, docsFileData, new DomainRankings()).convert();

        var forwardReader = new ForwardIndexReader(docsFileId, docsFileData);

        for (int i = 36; i < workSetSize; i++) {
-            assertEquals(0x00FF000000000000L | (i % 5), forwardReader.getDocMeta(i));
-            assertEquals(i/20, forwardReader.getDomainId(i));
+            long docId = createId(i, i/20);
+            assertEquals(0x00FF000000000000L | (i % 5), forwardReader.getDocMeta(docId));
+            assertEquals(i/20, UrlIdCodec.getDomainId(docId));
        }

    }
--- a/code/features-index/index-journal/build.gradle
+++ b/code/features-index/index-journal/build.gradle
@ -13,7 +13,6 @@ java {
 dependencies {
    implementation project(':code:libraries:array')
    implementation project(':code:common:model')
-    implementation project(':code:features-index:lexicon')

    implementation libs.lombok
    annotationProcessor libs.lombok
@ -22,6 +21,7 @@ dependencies {
    implementation libs.prometheus
    implementation libs.notnull
    implementation libs.rxjava
+    implementation libs.guava
    implementation libs.trove
    implementation libs.zstd
    implementation libs.commons.lang3
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntry.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntry.java
@ -1,8 +1,6 @@
 package nu.marginalia.index.journal.model;

-import nu.marginalia.model.EdgeDomain;
-import nu.marginalia.model.EdgeUrl;
-import nu.marginalia.model.id.EdgeId;
+import nu.marginalia.model.id.UrlIdCodec;

 public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {

@ -15,18 +13,7 @@ public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntr
                                                   long documentMeta) {


-        return builder(new EdgeId<>(domainId),
-                new EdgeId<>(urlId),
-                documentMeta);
+        return builder(UrlIdCodec.encodeId(domainId, urlId), documentMeta);
    }

-    public static IndexJournalEntryBuilder builder(EdgeId<EdgeDomain> domainId,
-                                                   EdgeId<EdgeUrl> urlId,
-                                                   long documentMeta) {
-
-
-        return new IndexJournalEntryBuilder(0,
-                IndexJournalEntryHeader.combineIds(domainId, urlId),
-                documentMeta);
-    }
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryData.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryData.java
@ -25,7 +25,7 @@ public class IndexJournalEntryData implements Iterable<IndexJournalEntryData.Rec

    public long get(int idx) {
        if (idx >= size)
-            throw new ArrayIndexOutOfBoundsException();
+            throw new ArrayIndexOutOfBoundsException(idx + " vs " + size);
        return underlyingArray[idx];
    }

@ -58,9 +58,9 @@ public class IndexJournalEntryData implements Iterable<IndexJournalEntryData.Rec
        public Record next() {
            pos+=ENTRY_SIZE;

-            return new Record((int) underlyingArray[pos], underlyingArray[pos+1]);
+            return new Record(underlyingArray[pos], underlyingArray[pos+1]);
        }
    }

-    public record Record(int wordId, long metadata) {}
+    public record Record(long wordId, long metadata) {}
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryHeader.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/model/IndexJournalEntryHeader.java
@ -1,29 +1,17 @@
 package nu.marginalia.index.journal.model;

-import nu.marginalia.model.EdgeDomain;
-import nu.marginalia.model.EdgeUrl;
-import nu.marginalia.model.id.EdgeId;
-
 public record IndexJournalEntryHeader(int entrySize,
                                      int documentFeatures,
                                      long combinedId,
                                      long documentMeta) {

-    public IndexJournalEntryHeader(EdgeId<EdgeDomain> domainId,
+    public IndexJournalEntryHeader(long combinedId,
                                   int documentFeatures,
-                                   EdgeId<EdgeUrl> urlId,
                                   long documentMeta) {
        this(-1,
                documentFeatures,
-                combineIds(domainId, urlId),
+                combinedId,
                documentMeta);
    }

-    static long combineIds(EdgeId<EdgeDomain> domainId, EdgeId<EdgeUrl> urlId) {
-        long did = domainId.id();
-        long uid = urlId.id();
-
-        return (did << 32L) | uid;
-    }
-
 }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReadEntry.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReadEntry.java
@ -2,11 +2,13 @@ package nu.marginalia.index.journal.reader;

 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginalia.model.id.UrlIdCodec;

 import java.io.DataInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.LongBuffer;
+import java.util.Arrays;

 public class IndexJournalReadEntry {
    public final IndexJournalEntryHeader header;
@ -51,11 +53,7 @@ public class IndexJournalReadEntry {
    }

    public int domainId() {
-        return (int) (docId() >>> 32L);
-    }
-
-    public int urlId() {
-        return (int) (docId() & 0xFFFF_FFFFL);
+        return UrlIdCodec.getDomainId(docId());
    }

    public IndexJournalEntryData readEntry() {
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java
@ -1,31 +1,48 @@
 package nu.marginalia.index.journal.reader;

 import nu.marginalia.index.journal.model.IndexJournalEntryData;
-import nu.marginalia.index.journal.model.IndexJournalFileHeader;
-import nu.marginalia.index.journal.model.IndexJournalStatistics;
+import nu.marginalia.model.idx.WordFlags;
 import org.jetbrains.annotations.NotNull;

 import java.io.IOException;
+import java.nio.file.Path;
 import java.util.Iterator;
-import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+import java.util.function.Predicate;

 public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {
    int FILE_HEADER_SIZE_LONGS = 2;
    int FILE_HEADER_SIZE_BYTES = 8 * FILE_HEADER_SIZE_LONGS;

-    IndexJournalFileHeader fileHeader();
+    static IndexJournalReader singleFile(Path fileName) throws IOException {
+        return new IndexJournalReaderSingleCompressedFile(fileName);
+    }

-    IndexJournalStatistics getStatistics();
+    static IndexJournalReader paging(Path baseDir) throws IOException {
+        return new IndexJournalReaderPagingImpl(baseDir);
+    }

-    void forEachWordId(IntConsumer consumer);
+    static IndexJournalReader singleFileWithPriorityFilters(Path path) throws IOException {

-    void forEachUrlIdWordId(BiIntConsumer consumer);
+        long highPriorityFlags =
+                WordFlags.Title.asBit()
+                        | WordFlags.Subjects.asBit()
+                        | WordFlags.TfIdfHigh.asBit()
+                        | WordFlags.NamesWords.asBit()
+                        | WordFlags.UrlDomain.asBit()
+                        | WordFlags.UrlPath.asBit()
+                        | WordFlags.Site.asBit()
+                        | WordFlags.SiteAdjacent.asBit();

-    void forEachDocIdWordId(LongIntConsumer consumer);
+        return new IndexJournalReaderSingleCompressedFile(path, null,
+                r -> (r.metadata() & highPriorityFlags) != 0);
+    }
+
+    void forEachWordId(LongConsumer consumer);

    void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer);

-    void forEachUrlId(IntConsumer consumer);
+    void forEachDocId(LongConsumer consumer);

    @NotNull
    @Override
@ -33,13 +50,7 @@ public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {

    void close() throws IOException;

-    interface BiIntConsumer {
-        void accept(int left, int right);
-    }

-    interface LongIntConsumer {
-        void accept(long left, int right);
-    }

    interface LongObjectConsumer<T> {
        void accept(long left, T right);
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderPagingImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderPagingImpl.java
@ -0,0 +1,61 @@
+package nu.marginalia.index.journal.reader;
+
+import com.google.common.collect.Iterators;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalStatistics;
+import nu.marginallia.index.journal.IndexJournalFileNames;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.function.LongConsumer;
+
+public class IndexJournalReaderPagingImpl implements IndexJournalReader {
+
+    private final List<IndexJournalReader> readers;
+
+    public IndexJournalReaderPagingImpl(Path baseDir) throws IOException {
+        var inputFiles = IndexJournalFileNames.findJournalFiles(baseDir);
+        this.readers = new ArrayList<>(inputFiles.size());
+
+        for (var inputFile : inputFiles) {
+            readers.add(new IndexJournalReaderSingleCompressedFile(inputFile));
+        }
+    }
+
+    @Override
+    public void forEachWordId(LongConsumer consumer) {
+        for (var reader : readers) {
+            reader.forEachWordId(consumer);
+        }
+    }
+
+    @Override
+    public void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer) {
+        for (var reader : readers) {
+            reader.forEachDocIdRecord(consumer);
+        }
+    }
+
+    @Override
+    public void forEachDocId(LongConsumer consumer) {
+        for (var reader : readers) {
+            reader.forEachDocId(consumer);
+        }
+    }
+
+    @Override
+    public @NotNull Iterator<IndexJournalReadEntry> iterator() {
+        return Iterators.concat(readers.stream().map(IndexJournalReader::iterator).iterator());
+    }
+
+    @Override
+    public void close() throws IOException {
+        for (var reader : readers) {
+            reader.close();
+        }
+    }
+}
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderSingleCompressedFile.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReaderSingleCompressedFile.java
@ -12,21 +12,30 @@ import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
 import java.util.function.Predicate;

 public class IndexJournalReaderSingleCompressedFile implements IndexJournalReader {

-    private static Path journalFile;
+    private Path journalFile;
    public final IndexJournalFileHeader fileHeader;

+    @Override
+    public String toString() {
+        return "IndexJournalReaderSingleCompressedFile{" + journalFile + " }";
+    }
+
    private DataInputStream dataInputStream = null;

    final Predicate<IndexJournalReadEntry> entryPredicate;
    final Predicate<IndexJournalEntryData.Record> recordPredicate;

    public IndexJournalReaderSingleCompressedFile(Path file) throws IOException {
+        this.journalFile = file;
+
        fileHeader = readHeader(file);

        this.recordPredicate = null;
@ -34,7 +43,8 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
    }

    public IndexJournalReaderSingleCompressedFile(Path file, Predicate<IndexJournalReadEntry> entryPredicate, Predicate<IndexJournalEntryData.Record> recordPredicate) throws IOException {
-        journalFile = file;
+        this.journalFile = file;
+
        fileHeader = readHeader(file);

        this.recordPredicate = recordPredicate;
@ -42,8 +52,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
    }

    private static IndexJournalFileHeader readHeader(Path file) throws IOException {
-        journalFile = file;
-
        try (var raf = new RandomAccessFile(file.toFile(), "r")) {
            long unused = raf.readLong();
            long wordCount = raf.readLong();
@ -61,10 +69,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
        return new DataInputStream(new ZstdInputStream(new BufferedInputStream(fileInputStream)));
    }

-    public IndexJournalFileHeader fileHeader() {
-        return fileHeader;
-    }
-
    public boolean filter(IndexJournalReadEntry entry) {
        return entryPredicate == null || entryPredicate.test(entry);
    }
@ -80,31 +84,7 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade


    @Override
-    public IndexJournalStatistics getStatistics() {
-        int highestWord = 0;
-
-        // Docs cardinality is a candidate for a HyperLogLog
-        Roaring64Bitmap docsBitmap = new Roaring64Bitmap();
-
-        for (var entry : this) {
-            var entryData = entry.readEntry();
-
-            if (filter(entry)) {
-                docsBitmap.addLong(entry.docId() & 0x0000_0000_FFFF_FFFFL);
-
-                for (var item : entryData) {
-                    if (filter(entry, item)) {
-                        highestWord = Integer.max(item.wordId(), highestWord);
-                    }
-                }
-            }
-        }
-
-        return new IndexJournalStatistics(highestWord, docsBitmap.getIntCardinality());
-    }
-
-    @Override
-    public void forEachWordId(IntConsumer consumer) {
+    public void forEachWordId(LongConsumer consumer) {
        for (var entry : this) {
            var data = entry.readEntry();
            for (var post : data) {
@ -115,32 +95,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
        }
    }

-    @Override
-    public void forEachUrlIdWordId(BiIntConsumer consumer) {
-        for (var entry : this) {
-            var data = entry.readEntry();
-
-            for (var post : data) {
-                if (filter(entry, post)) {
-                    consumer.accept(entry.urlId(), post.wordId());
-                }
-            }
-        }
-    }
-
-    @Override
-    public void forEachDocIdWordId(LongIntConsumer consumer) {
-        for (var entry : this) {
-            var data = entry.readEntry();
-
-            for (var post : data) {
-                if (filter(entry, post)) {
-                    consumer.accept(entry.docId(), post.wordId());
-                }
-            }
-        }
-    }
-
    @Override
    public void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer) {
        for (var entry : this) {
@ -154,10 +108,10 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
        }
    }
    @Override
-    public void forEachUrlId(IntConsumer consumer) {
+    public void forEachDocId(LongConsumer consumer) {
        for (var entry : this) {
            if (filter(entry)) {
-                consumer.accept(entry.urlId());
+                consumer.accept(entry.docId());
            }
        }
    }
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterPagingImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterPagingImpl.java
@ -0,0 +1,48 @@
+package nu.marginalia.index.journal.writer;
+
+import lombok.SneakyThrows;
+import nu.marginalia.index.journal.model.IndexJournalEntryData;
+import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
+import nu.marginallia.index.journal.IndexJournalFileNames;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Path;
+
+public class IndexJournalWriterPagingImpl implements IndexJournalWriter {
+    private final Path outputDir;
+    private int fileNumber = 0;
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+    private IndexJournalWriter currentWriter = null;
+    private int inputsForFile = 0;
+
+    public IndexJournalWriterPagingImpl(Path outputDir) throws IOException {
+        this.outputDir = outputDir;
+        switchToNextWriter();
+
+        logger.info("Creating Journal Writer {}", outputDir);
+    }
+
+    private void switchToNextWriter() throws IOException {
+        if (currentWriter != null)
+            currentWriter.close();
+
+        currentWriter = new IndexJournalWriterSingleFileImpl(IndexJournalFileNames.allocateName(outputDir, fileNumber++));
+    }
+
+    @Override
+    @SneakyThrows
+    public void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
+        if (++inputsForFile > 100_000) {
+            inputsForFile = 0;
+            switchToNextWriter();
+        }
+        currentWriter.put(header, entry);
+    }
+
+    public void close() throws IOException {
+        currentWriter.close();
+    }
+}
--- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java
+++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java
@ -1,12 +1,11 @@
 package nu.marginalia.index.journal.writer;

 import com.github.luben.zstd.ZstdDirectBufferCompressingStream;
-import com.github.luben.zstd.ZstdOutputStream;
 import lombok.SneakyThrows;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
-import nu.marginalia.lexicon.KeywordLexicon;
+import nu.marginallia.index.journal.IndexJournalFileNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -16,27 +15,34 @@ import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
+import java.nio.file.attribute.PosixFilePermissions;

-public class IndexJournalWriterImpl implements IndexJournalWriter{
-    private final KeywordLexicon lexicon;
+public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{

    private static final int ZSTD_BUFFER_SIZE = 8192;
    private static final int DATA_BUFFER_SIZE = 8192;

    private final ByteBuffer dataBuffer = ByteBuffer.allocateDirect(DATA_BUFFER_SIZE);

-
    private final ZstdDirectBufferCompressingStream compressingStream;
-    private int numEntries = 0;
    private final FileChannel fileChannel;

-    public IndexJournalWriterImpl(KeywordLexicon lexicon, Path outputFile) throws IOException {
-        this.lexicon = lexicon;
+    private int numEntries = 0;
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+
+    public IndexJournalWriterSingleFileImpl(Path outputFile) throws IOException {
+
+        logger.info("Creating Journal Writer {}", outputFile);
+
+        Files.deleteIfExists(outputFile);
+        Files.createFile(outputFile, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));

        fileChannel = FileChannel.open(outputFile, StandardOpenOption.CREATE,
                StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);

        writeHeaderPlaceholder(fileChannel);
+
        compressingStream = new ZstdDirectBufferCompressingStream(ByteBuffer.allocateDirect(ZSTD_BUFFER_SIZE), 3) {
            protected ByteBuffer flushBuffer(ByteBuffer toFlush) throws IOException {
                toFlush.flip();
@ -64,7 +70,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{

    @Override
    @SneakyThrows
-    public synchronized void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
+    public void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
        if (dataBuffer.capacity() - dataBuffer.position() < 3*8) {
            dataBuffer.flip();
            compressingStream.compress(dataBuffer);
@ -84,6 +90,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{
                dataBuffer.clear();
            }
            else while (remaining-- > 0 && i < entry.size()) {
+
                dataBuffer.putLong(entry.underlyingArray[i++]);
            }
        }
@ -103,7 +110,7 @@ public class IndexJournalWriterImpl implements IndexJournalWriter{

        ByteBuffer header = ByteBuffer.allocate(16);
        header.putLong(numEntries);
-        header.putLong(lexicon.size());
+        header.putLong(0);
        header.flip();

        while (header.position() < header.limit()) {
--- a/code/features-index/index-journal/src/main/java/nu/marginallia/index/journal/IndexJournalFileNames.java
+++ b/code/features-index/index-journal/src/main/java/nu/marginallia/index/journal/IndexJournalFileNames.java
@ -0,0 +1,30 @@
+package nu.marginallia.index.journal;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+public class IndexJournalFileNames {
+    public static Path allocateName(Path base, int idx) {
+        return base.resolve(String.format("page-index-%04d.dat", idx));
+    }
+
+    public static List<Path> findJournalFiles(Path baseDirectory) throws IOException {
+        List<Path> ret = new ArrayList<>();
+
+        try (var listStream = Files.list(baseDirectory)) {
+            listStream
+                    .filter(IndexJournalFileNames::isJournalFile)
+                    .sorted()
+                    .forEach(ret::add);
+        }
+
+        return ret;
+    }
+
+    public static boolean isJournalFile(Path file) {
+        return file.toFile().getName().matches("page-index-\\d{4}.dat");
+    }
+}
--- a/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java
+++ b/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java
@ -4,13 +4,12 @@ import nu.marginalia.index.journal.model.IndexJournalEntry;
 import nu.marginalia.index.journal.model.IndexJournalEntryData;
 import nu.marginalia.index.journal.reader.IndexJournalReader;
 import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
-import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
-import nu.marginalia.lexicon.KeywordLexicon;
+import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl;
+import nu.marginalia.model.id.UrlIdCodec;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;

 import java.io.IOException;
 import java.nio.file.Files;
@ -22,15 +21,16 @@ import static org.junit.jupiter.api.Assertions.assertEquals;

 public class IndexJournalTest {
    Path tempFile;
-    KeywordLexicon lexicon;
    IndexJournalReader reader;

+    long firstDocId = UrlIdCodec.encodeId(44, 10);
+    long secondDocId = UrlIdCodec.encodeId(43, 15);
+
    @BeforeEach
    public void setUp() throws IOException {
        tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
-        lexicon = Mockito.mock(KeywordLexicon.class);

-        var journalWriter = new IndexJournalWriterImpl(lexicon, tempFile);
+        var journalWriter = new IndexJournalWriterSingleFileImpl( tempFile);
        journalWriter.put(IndexJournalEntry.builder(44, 10, 55)
                .add(1, 2)
                .add(2, 3)
@ -65,11 +65,11 @@ public class IndexJournalTest {
    }

    @Test
-    public void forEachUrlId() {
-        List<Integer> expected = List.of(10, 15);
-        List<Integer> actual = new ArrayList<>();
+    public void forEachDocId() {
+        List<Long> expected = List.of(firstDocId, secondDocId);
+        List<Long> actual = new ArrayList<>();

-        reader.forEachUrlId(actual::add);
+        reader.forEachDocId(actual::add);
        assertEquals(expected, actual);
    }

@ -78,50 +78,19 @@ public class IndexJournalTest {
        List<Integer> expected = List.of(1, 2, 3, 5, 5 ,6);
        List<Integer> actual = new ArrayList<>();

-        reader.forEachWordId(actual::add);
-        assertEquals(expected, actual);
-    }
-
-
-    @Test
-    public void forEachUrlIdWordId() {
-        List<Pair<Integer, Integer>> expected = List.of(
-                Pair.of(10, 1),
-                Pair.of(10, 2),
-                Pair.of(10, 3),
-                Pair.of(10, 5),
-                Pair.of(15, 5),
-                Pair.of(15, 6));
-        List<Pair<Integer, Integer>> actual = new ArrayList<>();
-
-        reader.forEachUrlIdWordId((url, word) -> actual.add(Pair.of(url, word)));
-        assertEquals(expected, actual);
-    }
-
-    @Test
-    public void forEachDocIdWordId() {
-        List<Pair<Long, Integer>> expected = List.of(
-                Pair.of(10L | (44L << 32), 1),
-                Pair.of(10L | (44L << 32), 2),
-                Pair.of(10L | (44L << 32), 3),
-                Pair.of(10L | (44L << 32), 5),
-                Pair.of(15L | (43L << 32), 5),
-                Pair.of(15L | (43L << 32), 6));
-        List<Pair<Long, Integer>> actual = new ArrayList<>();
-
-        reader.forEachDocIdWordId((url, word) -> actual.add(Pair.of(url, word)));
+        reader.forEachWordId(i -> actual.add((int) i));
        assertEquals(expected, actual);
    }

    @Test
    public void forEachDocIdRecord() {
        List<Pair<Long, IndexJournalEntryData.Record>> expected = List.of(
-                Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(1, 2)),
-                Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(2, 3)),
-                Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(3, 4)),
-                Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(5, 6)),
-                Pair.of(15L | (43L << 32), new IndexJournalEntryData.Record(5, 5)),
-                Pair.of(15L | (43L << 32), new IndexJournalEntryData.Record(6, 6))
+                Pair.of(firstDocId, new IndexJournalEntryData.Record(1, 2)),
+                Pair.of(firstDocId, new IndexJournalEntryData.Record(2, 3)),
+                Pair.of(firstDocId, new IndexJournalEntryData.Record(3, 4)),
+                Pair.of(firstDocId, new IndexJournalEntryData.Record(5, 6)),
+                Pair.of(secondDocId, new IndexJournalEntryData.Record(5, 5)),
+                Pair.of(secondDocId, new IndexJournalEntryData.Record(6, 6))
        );
        List<Pair<Long, IndexJournalEntryData.Record>> actual = new ArrayList<>();

--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java
@ -9,16 +9,16 @@ import nu.marginalia.index.query.filter.QueryFilterStepIf;
 public interface IndexQueryBuilder {
    /** Filters documents that also contain termId, within the full index.
     */
-    IndexQueryBuilder alsoFull(int termId);
+    IndexQueryBuilder alsoFull(long termId);

    /**
     * Filters documents that also contain the termId, within the priority index.
     */
-    IndexQueryBuilder alsoPrio(int termIds);
+    IndexQueryBuilder alsoPrio(long termIds);

    /** Excludes documents that contain termId, within the full index
     */
-    IndexQueryBuilder notFull(int termId);
+    IndexQueryBuilder notFull(long termId);

    IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);

--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java
@ -21,7 +21,7 @@ public class QueryFilterLetThrough implements QueryFilterStepIf {
    }

    public String describe() {
-        return "[NoPass]";
+        return "[PassThrough]";
    }

 }
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/searchset/SearchSet.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/searchset/SearchSet.java
@ -3,10 +3,10 @@ package nu.marginalia.index.searchset;
 public interface SearchSet {

    /**
-     *  Returns true if the given urlId is contained in the set
+     *  Returns true if the given domainId is contained in the set
     *  or if the documentMetadata vibes with the set
     *
     */
-    boolean contains(int urlId, long documentMetadata);
+    boolean contains(int domainId, long documentMetadata);

 }
--- a/code/features-index/index-reverse/build.gradle
+++ b/code/features-index/index-reverse/build.gradle
@ -18,15 +18,15 @@ dependencies {
    implementation project(':code:features-index:domain-ranking')
    implementation project(':code:features-index:index-query')
    implementation project(':code:features-index:index-journal')
-    implementation project(':code:features-index:lexicon')
    implementation project(':code:common:model')
-    implementation project(':code:common:service')
+    implementation project(':code:common:process')
+

    implementation libs.lombok
    annotationProcessor libs.lombok
    implementation libs.bundles.slf4j

-    implementation libs.prometheus
+    implementation libs.fastutil

    testImplementation libs.bundles.slf4j.test
    testImplementation libs.bundles.junit
--- a/code/features-index/index-reverse/index.svg
+++ b/code/features-index/index-reverse/index.svg
--- a/code/features-index/index-reverse/merging.svg
+++ b/code/features-index/index-reverse/merging.svg
--- a/code/features-index/index-reverse/preindex.svg
+++ b/code/features-index/index-reverse/preindex.svg
--- a/code/features-index/index-reverse/readme.md
+++ b/code/features-index/index-reverse/readme.md
@ -12,9 +12,35 @@ The full index also provides access to term-level metadata, while the priority i
 [1] See WordFlags in [common/model](../../common/model/) and
 KeywordMetadata in [features-convert/keyword-extraction](../../features-convert/keyword-extraction).

+## Construction
+
+The reverse index is constructed by first building a series of preindexes.
+Preindexes consist of a Segment and a Documents object.  The segment contains
+information about which word identifiers are present and how many, and the
+documents contain information about in which documents the words can be found.
+
+![Memory layout illustrations](./preindex.svg)
+
+These would typically not fit in RAM, so the index journal is paged 
+and the preindexes are constructed small enough to fit in memory, and
+then merged.  Merging sorted arrays is a very fast operation that does
+not require additional RAM.
+
+![Illustration of successively merged preindex files](./merging.svg)
+
+Once merged into  one large preindex, indexes are added to the preindex data
+to form a finalized reverse index. 
+
+![Illustration of the data layout of the finalized index](index.svg)
 ## Central Classes

-* [ReverseIndexFullConverter](src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java) constructs the full index.
-* [ReverseIndexFullReader](src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java) interrogates the full index.
-* [ReverseIndexPriorityConverter](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java) constructs the priority index.
-* [ReverseIndexPriorityReader](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java) interrogates the priority index.
+* [ReversePreindex](src/main/java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state.
+* [ReverseIndexConstructor](src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index.
+* [ReverseIndexReader](src/main/java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index.
+
+## See Also
+
+* [index-journal](../index-journal)
+* [index-forward](../index-forward)
+* [libraries/btree](../../libraries/btree)
+* [libraries/array](../../libraries/array)
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullEntrySource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullEntrySource.java
@ -1,4 +1,4 @@
-package nu.marginalia.index.full;
+package nu.marginalia.index;

 import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.btree.BTreeReader;
@ -6,18 +6,18 @@ import nu.marginalia.index.query.EntrySource;

 import static java.lang.Math.min;

-public class ReverseIndexFullEntrySource implements EntrySource {
+public class ReverseIndexEntrySource implements EntrySource {
    private final BTreeReader reader;

    int pos;
    int endOffset;

    final int entrySize;
-    private final int wordId;
+    private final long wordId;

-    public ReverseIndexFullEntrySource(BTreeReader reader,
-                                       int entrySize,
-                                       int wordId) {
+    public ReverseIndexEntrySource(BTreeReader reader,
+                                   int entrySize,
+                                   long wordId) {
        this.reader = reader;
        this.entrySize = entrySize;
        this.wordId = wordId;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexFullFileNames.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexFullFileNames.java
@ -0,0 +1,28 @@
+package nu.marginalia.index;
+
+import java.nio.file.Path;
+
+public class ReverseIndexFullFileNames {
+    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
+        return switch (identifier) {
+            case WORDS -> switch (version) {
+                case NEXT -> basePath.resolve("rev-words.dat.next");
+                case CURRENT -> basePath.resolve("rev-words.dat");
+            };
+            case DOCS -> switch (version) {
+                case NEXT -> basePath.resolve("rev-docs.dat.next");
+                case CURRENT -> basePath.resolve("rev-docs.dat");
+            };
+        };
+    }
+
+    public enum FileVersion {
+        CURRENT,
+        NEXT
+    };
+
+    public enum FileIdentifier {
+        WORDS,
+        DOCS
+    }
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java
@ -0,0 +1,10 @@
+package nu.marginalia.index;
+
+import nu.marginalia.btree.model.BTreeBlockSize;
+import nu.marginalia.btree.model.BTreeContext;
+
+public class ReverseIndexParameters
+{
+    public static final BTreeContext docsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
+    public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexPrioFileNames.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexPrioFileNames.java
@ -0,0 +1,28 @@
+package nu.marginalia.index;
+
+import java.nio.file.Path;
+
+public class ReverseIndexPrioFileNames {
+    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
+        return switch (identifier) {
+            case WORDS -> switch (version) {
+                case NEXT -> basePath.resolve("rev-prio-words.dat.next");
+                case CURRENT -> basePath.resolve("rev-prio-words.dat");
+            };
+            case DOCS -> switch (version) {
+                case NEXT -> basePath.resolve("rev-prio-docs.dat.next");
+                case CURRENT -> basePath.resolve("rev-prio-docs.dat");
+            };
+        };
+    }
+
+    public enum FileVersion {
+        CURRENT,
+        NEXT
+    };
+
+    public enum FileIdentifier {
+        WORDS,
+        DOCS
+    }
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java
@ -1,11 +1,11 @@
-package nu.marginalia.index.full;
+package nu.marginalia.index;

-import nu.marginalia.index.query.ReverseIndexRejectFilter;
-import nu.marginalia.index.query.ReverseIndexRetainFilter;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.EmptyEntrySource;
 import nu.marginalia.index.query.EntrySource;
+import nu.marginalia.index.query.ReverseIndexRejectFilter;
+import nu.marginalia.index.query.ReverseIndexRetainFilter;
 import nu.marginalia.index.query.filter.QueryFilterLetThrough;
 import nu.marginalia.index.query.filter.QueryFilterNoPass;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;
@ -15,18 +15,22 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Arrays;

-public class ReverseIndexFullReader {
+public class ReverseIndexReader {
    private final LongArray words;
    private final LongArray documents;
-
+    private final long wordsDataOffset;
    private final Logger logger = LoggerFactory.getLogger(getClass());
+    private final BTreeReader wordsBTreeReader;

-    public ReverseIndexFullReader(Path words, Path documents) throws IOException {
+
+
+    public ReverseIndexReader(Path words, Path documents) throws IOException {
        if (!Files.exists(words) || !Files.exists(documents)) {
            this.words = null;
            this.documents = null;
+            this.wordsBTreeReader = null;
+            this.wordsDataOffset = -1;
            return;
        }

@ -34,62 +38,52 @@ public class ReverseIndexFullReader {

        this.words = LongArray.mmapRead(words);
        this.documents = LongArray.mmapRead(documents);
+
+        wordsBTreeReader = new BTreeReader(this.words, ReverseIndexParameters.wordsBTreeContext, 0);
+        wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
    }

-    public boolean isWordInDoc(int wordId, long documentId) {
-        if (wordId < 0) {
-            return false;
-        }

-        long offset = words.get(wordId);
+    private long wordOffset(long wordId) {
+        long idx = wordsBTreeReader.findEntry(wordId);

-        if (offset < 0) {
-            return false;
-        }
+        if (idx < 0)
+            return -1L;

-        return createReaderNew(offset).findEntry(documentId) >= 0;
+        return words.get(wordsDataOffset + idx + 1);
    }

-    public EntrySource documents(int wordId) {
+    public EntrySource documents(long wordId) {
        if (null == words) {
            logger.warn("Reverse index is not ready, dropping query");
            return new EmptyEntrySource();
        }

-        if (wordId < 0 || wordId >= words.size()) return new EmptyEntrySource();
-
-        long offset = words.get(wordId);
+        long offset = wordOffset(wordId);

        if (offset < 0) return new EmptyEntrySource();

-        return new ReverseIndexFullEntrySource(createReaderNew(offset), ReverseIndexFullParameters.ENTRY_SIZE, wordId);
+        return new ReverseIndexEntrySource(createReaderNew(offset), 2, wordId);
    }

-    public QueryFilterStepIf also(int wordId) {
-        if (wordId < 0) return new QueryFilterNoPass();
-
-        long offset = words.get(wordId);
+    public QueryFilterStepIf also(long wordId) {
+        long offset = wordOffset(wordId);

        if (offset < 0) return new QueryFilterNoPass();

        return new ReverseIndexRetainFilter(createReaderNew(offset), "full", wordId);
    }

-    public QueryFilterStepIf not(int wordId) {
-        if (wordId < 0) return new QueryFilterLetThrough();
-
-        long offset = words.get(wordId);
+    public QueryFilterStepIf not(long wordId) {
+        long offset = wordOffset(wordId);

        if (offset < 0) return new QueryFilterLetThrough();

        return new ReverseIndexRejectFilter(createReaderNew(offset));
    }

-    public int numDocuments(int wordId) {
-        if (wordId < 0)
-            return 0;
-
-        long offset = words.get(wordId);
+    public int numDocuments(long wordId) {
+        long offset = wordOffset(wordId);

        if (offset < 0)
            return 0;
@ -98,23 +92,33 @@ public class ReverseIndexFullReader {
    }

    private BTreeReader createReaderNew(long offset) {
-        return new BTreeReader(documents, ReverseIndexFullParameters.bTreeContext, offset);
+        return new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, offset);
    }

-    public long[] getTermMeta(int wordId, long[] docIds) {
-        if (wordId < 0) {
-            return new long[docIds.length];
-        }
+    public long[] getTermMeta(long wordId, long[] docIds) {
+        long offset = wordOffset(wordId);

-        long offset = words.get(wordId);
        if (offset < 0) {
            return new long[docIds.length];
        }

-        Arrays.sort(docIds);
+        assert isSorted(docIds) : "The input array docIds is assumed to be sorted";

        var reader = createReaderNew(offset);
        return reader.queryData(docIds, 1);
    }

+    private boolean isSorted(long[] ids) {
+        if (ids.length == 0)
+            return true;
+        long prev = ids[0];
+
+        for (int i = 1; i < ids.length; i++) {
+            if(ids[i] <= prev)
+                return false;
+        }
+
+        return true;
+    }
+
 }
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java
@ -0,0 +1,9 @@
+package nu.marginalia.index.construction;
+
+public interface DocIdRewriter {
+    long rewriteDocId(long docId);
+
+    static DocIdRewriter identity() {
+        return l -> l;
+    }
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java
@ -0,0 +1,10 @@
+package nu.marginalia.index.construction;
+
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+
+import java.io.IOException;
+import java.nio.file.Path;
+
+public interface JournalReaderSource {
+    IndexJournalReader construct(Path sourceFile) throws IOException;
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java
@ -4,7 +4,6 @@ import nu.marginalia.array.LongArray;
 import nu.marginalia.array.functional.LongIOTransformer;
 import nu.marginalia.btree.BTreeWriter;
 import nu.marginalia.btree.model.BTreeContext;
-import nu.marginalia.index.priority.ReverseIndexPriorityParameters;

 import java.io.IOException;
 import java.nio.channels.FileChannel;
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java
@ -0,0 +1,115 @@
+package nu.marginalia.index.construction;
+
+import nu.marginalia.process.control.ProcessAdHocTaskHeartbeat;
+import nu.marginalia.process.control.ProcessHeartbeat;
+import nu.marginallia.index.journal.IndexJournalFileNames;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ReverseIndexConstructor {
+
+    private static final Logger logger = LoggerFactory.getLogger(ReverseIndexConstructor.class);
+
+    public enum CreateReverseIndexSteps {
+        CREATE_PREINDEXES,
+        MERGE_PREINDEXES,
+        FINALIZE,
+        FINISHED
+    }
+    public static void createReverseIndex(
+                                    ProcessHeartbeat processHeartbeat,
+                                    JournalReaderSource readerSource,
+                                    Path sourceBaseDir,
+                                    DocIdRewriter docIdRewriter,
+                                    Path tmpDir,
+                                    Path outputFileDocs,
+                                    Path outputFileWords) throws IOException
+    {
+        var inputs = IndexJournalFileNames.findJournalFiles(sourceBaseDir);
+        if (inputs.isEmpty()) {
+            logger.error("No journal files in base dir {}", sourceBaseDir);
+            return;
+        }
+
+        try (var heartbeat = processHeartbeat.createProcessTaskHeartbeat(CreateReverseIndexSteps.class, "createReverseIndex")) {
+            List<ReversePreindex> preindexes = new ArrayList<>();
+
+            heartbeat.progress(CreateReverseIndexSteps.CREATE_PREINDEXES);
+
+            try (var preindexHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("constructPreindexes")) {
+                for (int i = 0; i < inputs.size(); i++) {
+                    var input = inputs.get(i);
+
+                    preindexHeartbeat.progress(input.toFile().getName(), i, inputs.size());
+
+                    preindexes.add(ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir));
+                }
+
+                preindexHeartbeat.progress("FINISHED", inputs.size(), inputs.size());
+            }
+
+            heartbeat.progress(CreateReverseIndexSteps.MERGE_PREINDEXES);
+            ReversePreindex finalPreindex;
+
+            try (var mergeHeartbeat = processHeartbeat.createAdHocTaskHeartbeat("mergePreindexes")) {
+                finalPreindex = mergePreindexes(tmpDir, mergeHeartbeat, preindexes);
+            }
+
+            heartbeat.progress(CreateReverseIndexSteps.FINALIZE);
+            finalPreindex.finalizeIndex(outputFileDocs, outputFileWords);
+
+            heartbeat.progress(CreateReverseIndexSteps.FINISHED);
+            finalPreindex.delete();
+        }
+    }
+
+    private static ReversePreindex mergePreindexes(Path workDir, ProcessAdHocTaskHeartbeat mergeHeartbeat, List<ReversePreindex> preindexes) throws IOException {
+        assert !preindexes.isEmpty();
+
+        if (preindexes.size() == 1) {
+            logger.info("Single preindex, no merge necessary");
+            return preindexes.get(0);
+        }
+
+        List<ReversePreindex> toMerge = new ArrayList<>(preindexes);
+        List<ReversePreindex> merged = new ArrayList<>();
+
+        int pass = 0;
+        while (toMerge.size() != 1) {
+            String stage = String.format("PASS[%d]: %d -> %d", ++pass,
+                    toMerge.size(),
+                    toMerge.size()/2 + (toMerge.size() % 2)
+            );
+
+            for (int i = 0; i + 1 < toMerge.size(); i+=2) {
+                mergeHeartbeat.progress(stage, i/2, toMerge.size()/2);
+
+                var left = toMerge.get(i);
+                var right = toMerge.get(i+1);
+
+                merged.add(ReversePreindex.merge(workDir, left, right));
+
+                left.delete();
+                right.delete();
+            }
+
+            if ((toMerge.size() % 2) != 0) {
+                merged.add(toMerge.get(toMerge.size()-1));
+            }
+
+            toMerge.clear();
+            toMerge.addAll(merged);
+            merged.clear();
+        }
+
+        mergeHeartbeat.progress("FINISHED", 1, 1);
+
+        return toMerge.get(0);
+    }
+
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java
@ -0,0 +1,280 @@
+package nu.marginalia.index.construction;
+
+import nu.marginalia.array.LongArray;
+import nu.marginalia.array.algo.SortingContext;
+import nu.marginalia.btree.BTreeWriter;
+import nu.marginalia.index.ReverseIndexParameters;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+import static nu.marginalia.array.algo.TwoArrayOperations.*;
+
+/** Contains the data that would go into a reverse index,
+ * that is, a mapping from words to documents, minus the actual
+ * index structure that makes the data quick to access while
+ * searching.
+ * <p>
+ * Two preindexes can be merged into a third preindex containing
+ * the union of their data.  This operation requires no additional
+ * RAM.
+ */
+public class ReversePreindex {
+    final ReversePreindexWordSegments segments;
+    final ReversePreindexDocuments documents;
+
+    private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class);
+
+    public ReversePreindex(ReversePreindexWordSegments segments, ReversePreindexDocuments documents) {
+        this.segments = segments;
+        this.documents = documents;
+    }
+
+    /** Constructs a new preindex with the data associated with reader.  The backing files
+     * will have randomly assigned names.
+     */
+    public static ReversePreindex constructPreindex(IndexJournalReader reader,
+                                                    DocIdRewriter docIdRewriter,
+                                                    Path destDir) throws IOException
+    {
+        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
+        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
+        Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
+
+        logger.info("Segmenting");
+        var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
+        logger.info("Mapping docs");
+        var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, segments);
+        logger.info("Done");
+        return new ReversePreindex(segments, docs);
+    }
+
+    /** Transform the preindex into a reverse index */
+    public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
+        var offsets = segments.counts;
+
+        Files.deleteIfExists(outputFileDocs);
+        Files.deleteIfExists(outputFileWords);
+
+        // Estimate the size of the docs index data
+        offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2));
+        IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
+        offsets.fold(0, 0, offsets.size(), sizeEstimator);
+
+        // Write the docs file
+        LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
+        try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
+            offsets.transformEachIO(0, offsets.size(), new ReverseIndexBTreeTransformer(finalDocs, 2, ReverseIndexParameters.docsBTreeContext, intermediateDocChannel));
+            intermediateDocChannel.force(false);
+        }
+
+        LongArray wordIds = segments.wordIds;
+
+        assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size";
+
+        // Estimate the size of the words index data
+        long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
+
+        // Construct the tree
+        LongArray wordsArray = LongArray.mmapForWriting(outputFileWords, wordsSize);
+
+        new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext)
+            .write(0, (int) offsets.size(), mapRegion -> {
+            for (long i = 0; i < offsets.size(); i++) {
+                mapRegion.set(2*i, wordIds.get(i));
+                mapRegion.set(2*i + 1, offsets.get(i));
+            }
+        });
+
+        wordsArray.force();
+
+    }
+
+    /** Delete all files associated with this pre-index */
+    public void delete() throws IOException {
+        segments.delete();
+        documents.delete();
+    }
+
+    public static ReversePreindex merge(Path destDir,
+                                        ReversePreindex left,
+                                        ReversePreindex right) throws IOException {
+
+        ReversePreindexWordSegments mergingSegment =
+                createMergedSegmentWordFile(destDir, left.segments, right.segments);
+
+        var mergingIter = mergingSegment.constructionIterator(2);
+        var leftIter = left.segments.iterator(2);
+        var rightIter = right.segments.iterator(2);
+
+        Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
+
+        LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
+
+        leftIter.next();
+        rightIter.next();
+
+        try (FileChannel leftChannel = left.documents.createDocumentsFileChannel();
+             FileChannel rightChannel = right.documents.createDocumentsFileChannel())
+        {
+
+            while (mergingIter.canPutMore()
+                    && leftIter.isPositionBeforeEnd()
+                    && rightIter.isPositionBeforeEnd())
+            {
+                final long currentWord = mergingIter.wordId;
+
+                if (leftIter.wordId == currentWord && rightIter.wordId == currentWord)
+                {
+                    // both inputs have documents for the current word
+                    mergeSegments(leftIter, rightIter,
+                            left.documents, right.documents,
+                            mergedDocuments, mergingIter);
+                }
+                else if (leftIter.wordId == currentWord) {
+                    if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
+                        break;
+                }
+                else if (rightIter.wordId == currentWord) {
+                    if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
+                        break;
+                }
+                else assert false : "This should never happen"; // the helvetica scenario
+            }
+
+            if (leftIter.isPositionBeforeEnd()) {
+                while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
+            }
+
+            if (rightIter.isPositionBeforeEnd()) {
+                while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
+            }
+
+        }
+
+        assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
+        assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
+        assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
+
+        // We may have overestimated the size of the merged docs size in the case there were
+        // duplicates in the data, so we need to shrink it to the actual size we wrote.
+
+        mergedDocuments = shrinkMergedDocuments(mergedDocuments,
+                docsFile, 2 * mergingSegment.totalSize());
+
+        mergingSegment.force();
+
+        return new ReversePreindex(
+                mergingSegment,
+                new ReversePreindexDocuments(mergedDocuments, docsFile)
+        );
+    }
+
+    /** Create a segment word file with each word from both inputs, with zero counts for all the data.
+     * This is an intermediate product in merging.
+     */
+    static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir,
+                                                                   ReversePreindexWordSegments left,
+                                                                   ReversePreindexWordSegments right) throws IOException {
+        Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
+        Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
+
+        long segmentsSize = countDistinctElements(left.wordIds, right.wordIds,
+                0,  left.wordIds.size(),
+                0,  right.wordIds.size());
+
+        LongArray wordIdsFile = LongArray.mmapForWriting(segmentWordsFile, segmentsSize);
+
+        mergeArrays(wordIdsFile, left.wordIds, right.wordIds,
+                0, wordIdsFile.size(),
+                0, left.wordIds.size(),
+                0, right.wordIds.size());
+
+        LongArray counts = LongArray.mmapForWriting(segmentCountsFile, segmentsSize);
+
+        return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
+    }
+
+    /** It's possible we overestimated the necessary size of the documents file,
+     * this will permit us to shrink it down to the smallest necessary size.
+     */
+    private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
+
+        mergedDocuments.force();
+
+        long beforeSize = mergedDocuments.size();
+        try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
+            bc.truncate(sizeLongs * 8);
+        }
+        long afterSize = mergedDocuments.size();
+        mergedDocuments = LongArray.mmapForWriting(docsFile, sizeLongs);
+
+        if (beforeSize != afterSize) {
+            logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
+        }
+
+        return mergedDocuments;
+    }
+
+    /** Merge contents of the segments indicated by leftIter and rightIter into the destionation
+     * segment, and advance the construction iterator with the appropriate size.
+     */
+    private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter,
+                                      ReversePreindexWordSegments.SegmentIterator rightIter,
+                                      ReversePreindexDocuments left,
+                                      ReversePreindexDocuments right,
+                                      LongArray dest,
+                                      ReversePreindexWordSegments.SegmentConstructionIterator destIter)
+    {
+        long distinct = countDistinctElementsN(2,
+                left.documents,
+                right.documents,
+                leftIter.startOffset, leftIter.endOffset,
+                rightIter.startOffset, rightIter.endOffset);
+
+        mergeArrays2(dest,
+                left.documents,
+                right.documents,
+                destIter.startOffset,
+                destIter.startOffset + 2*distinct,
+                leftIter.startOffset, leftIter.endOffset,
+                rightIter.startOffset, rightIter.endOffset);
+
+        destIter.putNext(distinct);
+        leftIter.next();
+        rightIter.next();
+    }
+
+    /** Copy the data from the source segment at the position and length indicated by sourceIter,
+     * into the destination segment, and advance the construction iterator.
+     */
+    private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter,
+                                    LongArray dest,
+                                    FileChannel sourceChannel,
+                                    ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
+
+        long size = sourceIter.endOffset - sourceIter.startOffset;
+        long start = mergingIter.startOffset;
+        long end = start + size;
+
+        dest.transferFrom(sourceChannel,
+                sourceIter.startOffset,
+                mergingIter.startOffset,
+                end);
+
+        boolean putNext = mergingIter.putNext(size / 2);
+        boolean iterNext = sourceIter.next();
+
+        assert putNext || !iterNext : "Source iterator ran out before dest iterator?!";
+
+        return iterNext;
+    }
+
+
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java
@ -0,0 +1,123 @@
+package nu.marginalia.index.construction;
+
+import lombok.SneakyThrows;
+import nu.marginalia.array.LongArray;
+import nu.marginalia.array.algo.SortingContext;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+/** A LongArray with document data, segmented according to
+ * the associated ReversePreindexWordSegments data
+ */
+public class ReversePreindexDocuments {
+    private final Path file;
+    public  final LongArray documents;
+    private static final int RECORD_SIZE_LONGS = 2;
+    private static final Logger logger= LoggerFactory.getLogger(ReversePreindexDocuments.class);
+
+    public ReversePreindexDocuments(LongArray documents, Path file) {
+        this.documents = documents;
+        this.file = file;
+    }
+
+    public static ReversePreindexDocuments construct(
+            Path docsFile,
+            IndexJournalReader reader,
+            DocIdRewriter docIdRewriter,
+            ReversePreindexWordSegments segments) throws IOException {
+
+
+        logger.info("Transferring data");
+        createUnsortedDocsFile(docsFile, reader, segments, docIdRewriter);
+
+        LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile));
+        logger.info("Sorting data");
+        sortDocsFile(docsFileMap, segments);
+
+        return new ReversePreindexDocuments(docsFileMap, docsFile);
+    }
+
+    public FileChannel createDocumentsFileChannel() throws IOException {
+        return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ);
+    }
+
+
+    public LongArray slice(long start, long end) {
+        return documents.range(start, end);
+    }
+
+    public long size() {
+        return documents.size();
+    }
+
+    private static void createUnsortedDocsFile(Path docsFile,
+                                               IndexJournalReader reader,
+                                               ReversePreindexWordSegments segments,
+                                               DocIdRewriter docIdRewriter) throws IOException {
+        long fileSize = 8 * segments.totalSize();
+        LongArray outArray = LongArray.mmapForWriting(docsFile, fileSize);
+
+        var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
+        offsetMap.defaultReturnValue(0);
+
+        for (var entry : reader) {
+            long rankEncodedId = docIdRewriter.rewriteDocId(entry.docId());
+
+            var data = entry.readEntry();
+            for (int i = 0; i + 1 < data.size(); i+=2) {
+                long wordId = data.get(i);
+                long meta = data.get(i+1);
+
+                long offset = offsetMap.addTo(wordId, RECORD_SIZE_LONGS);
+
+                outArray.set(offset + 0, rankEncodedId);
+                outArray.set(offset + 1, meta);
+            }
+        }
+
+        outArray.force();
+    }
+
+    @SneakyThrows
+    private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException {
+
+        var iter = segments.iterator(RECORD_SIZE_LONGS);
+
+        ExecutorService sortingWorkers = Executors.newWorkStealingPool(Runtime.getRuntime().availableProcessors());
+
+        while (iter.next()) {
+            if (iter.size() < 1024) {
+                docsFileMap.quickSortN(RECORD_SIZE_LONGS,
+                        iter.startOffset,
+                        iter.endOffset);
+            }
+            else {
+                sortingWorkers.execute(() ->
+                        docsFileMap.quickSortN(RECORD_SIZE_LONGS,
+                                iter.startOffset,
+                                iter.endOffset));
+            }
+        }
+
+        sortingWorkers.shutdown();
+        logger.info("Awaiting shutdown");
+
+        while (!sortingWorkers.awaitTermination(1, TimeUnit.HOURS));
+
+        sortingWorkers.close();
+    }
+
+    public void delete() throws IOException {
+        Files.delete(this.file);
+    }
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java
@ -0,0 +1,197 @@
+package nu.marginalia.index.construction;
+
+import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
+import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
+import it.unimi.dsi.fastutil.longs.LongIterator;
+import nu.marginalia.array.LongArray;
+import nu.marginalia.array.algo.SortingContext;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+/** A pair of file-backed arrays of sorted wordIds
+ * and the count of documents associated with each wordId.
+ */
+public class ReversePreindexWordSegments {
+    public final LongArray wordIds;
+    public final LongArray counts;
+
+    private final Path wordsFile;
+    private final Path countsFile;
+
+    public ReversePreindexWordSegments(LongArray wordIds,
+                                       LongArray counts,
+                                       Path wordsFile,
+                                       Path countsFile)
+    {
+        assert wordIds.size() == counts.size();
+
+        this.wordIds = wordIds;
+        this.counts = counts;
+        this.wordsFile = wordsFile;
+        this.countsFile = countsFile;
+    }
+
+    /** Returns a long-long hash map where each key is a wordId,
+     * and each value is the start offset of the data.
+     */
+    public Long2LongOpenHashMap asMap(int recordSize) {
+        Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
+        var iter = iterator(recordSize);
+
+        while (iter.next()) {
+            ret.put(iter.wordId, iter.startOffset);
+        }
+
+        return ret;
+    }
+
+    public static ReversePreindexWordSegments construct(IndexJournalReader reader,
+                                                        Path wordIdsFile,
+                                                        Path countsFile)
+    throws IOException
+    {
+        Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
+        countsMap.defaultReturnValue(0);
+        reader.forEachWordId(wordId -> countsMap.addTo(wordId, 1));
+
+        LongArray words = LongArray.mmapForWriting(wordIdsFile, countsMap.size());
+        LongArray counts = LongArray.mmapForWriting(countsFile, countsMap.size());
+
+        // Create the words file by iterating over the map and inserting them into
+        // the words file in whatever bizarro hash table order they appear in
+        int i = 0;
+        LongIterator iter = countsMap.keySet().iterator();
+        while (iter.hasNext()) {
+            words.set(i, iter.nextLong());
+            i++;
+        }
+
+        // Sort the words file
+        words.quickSort(0, counts.size());
+
+        // Populate the counts
+        for (i = 0; i < countsMap.size(); i++) {
+            counts.set(i, countsMap.get(words.get(i)));
+        }
+
+        return new ReversePreindexWordSegments(words, counts, wordIdsFile, countsFile);
+    }
+
+    public SegmentIterator iterator(int recordSize) {
+        return new SegmentIterator(recordSize);
+    }
+    public SegmentConstructionIterator constructionIterator(int recordSize) {
+        return new SegmentConstructionIterator(recordSize);
+    }
+
+    public long totalSize() {
+        return counts.fold(0, 0, counts.size(), Long::sum);
+    }
+
+    public void delete() throws IOException {
+        Files.delete(countsFile);
+        Files.delete(wordsFile);
+    }
+
+    public void force() {
+        counts.force();
+        wordIds.force();
+    }
+
+    public class SegmentIterator {
+        private final int recordSize;
+        private final long fileSize;
+        long wordId;
+        long startOffset = 0;
+        long endOffset = 0;
+
+        private SegmentIterator(int recordSize) {
+            this.recordSize = recordSize;
+            this.fileSize = wordIds.size();
+        }
+
+        private int i = -1;
+        public int idx() {
+            return i;
+        }
+        public boolean next() {
+            if (++i >= fileSize) {
+                wordId = Long.MIN_VALUE;
+                return false;
+            }
+
+            wordId = wordIds.get(i);
+            startOffset = endOffset;
+            endOffset = startOffset + recordSize * counts.get(i);
+
+            return true;
+        }
+
+        public boolean hasMorePositions() {
+            return i + 1 < wordIds.size();
+        }
+
+        public boolean isPositionBeforeEnd() {
+            return i < wordIds.size();
+        }
+
+        public long size() {
+            return endOffset - startOffset;
+        }
+    }
+
+    class SegmentConstructionIterator {
+        private final int recordSize;
+        private final long fileSize;
+        long wordId;
+        long startOffset = 0;
+        long endOffset = 0;
+
+        private SegmentConstructionIterator(int recordSize) {
+            this.recordSize = recordSize;
+            this.fileSize = wordIds.size();
+            if (fileSize == 0) {
+                throw new IllegalArgumentException("Cannot construct zero-length word segment file");
+            }
+            this.wordId = wordIds.get(0);
+        }
+
+        private int i = 0;
+        public int idx() {
+            return i;
+        }
+
+        public boolean putNext(long size) {
+
+            if (i >= fileSize)
+                return false;
+
+            endOffset = startOffset + recordSize * size;
+            counts.set(i, size);
+            startOffset = endOffset;
+            endOffset = -1;
+
+            i++;
+
+            if (i == fileSize) {
+                // We've reached the end of the iteration and there is no
+                // "next" wordId to fetch
+                wordId = Long.MIN_VALUE;
+                return false;
+            }
+            else {
+                wordId = wordIds.get(i);
+                return true;
+            }
+        }
+
+        public boolean canPutMore() {
+            return i < wordIds.size();
+        }
+    }
+}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java
@ -1,218 +0,0 @@
-package nu.marginalia.index.full;
-
-import lombok.SneakyThrows;
-import nu.marginalia.index.construction.CountToOffsetTransformer;
-import nu.marginalia.index.construction.ReverseIndexBTreeTransformer;
-import nu.marginalia.index.construction.IndexSizeEstimator;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
-import nu.marginalia.index.journal.model.IndexJournalStatistics;
-import nu.marginalia.index.journal.reader.IndexJournalReader;
-import nu.marginalia.ranking.DomainRankings;
-import nu.marginalia.rwf.RandomWriteFunnel;
-import nu.marginalia.array.IntArray;
-import nu.marginalia.array.LongArray;
-import nu.marginalia.array.algo.SortingContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.channels.FileChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-import nu.marginalia.service.control.ServiceHeartbeat;
-
-import static nu.marginalia.index.full.ReverseIndexFullParameters.bTreeContext;
-
-public class ReverseIndexFullConverter {
-    private static final int RWF_BIN_SIZE = 10_000_000;
-
-    private final ServiceHeartbeat heartbeat;
-    private final Path tmpFileDir;
-
-    private final Logger logger = LoggerFactory.getLogger(getClass());
-
-    private final IndexJournalReader journalReader;
-    private final DomainRankings domainRankings;
-    private final Path outputFileWords;
-    private final Path outputFileDocs;
-    private final SortingContext sortingContext;
-
-    public ReverseIndexFullConverter(ServiceHeartbeat heartbeat,
-                                     Path tmpFileDir,
-                                     IndexJournalReader journalReader,
-                                     DomainRankings domainRankings,
-                                     Path outputFileWords,
-                                     Path outputFileDocs) {
-        this.heartbeat = heartbeat;
-        this.tmpFileDir = tmpFileDir;
-        this.journalReader = journalReader;
-        this.domainRankings = domainRankings;
-        this.outputFileWords = outputFileWords;
-        this.outputFileDocs = outputFileDocs;
-        this.sortingContext = new SortingContext(tmpFileDir, 64_000);
-    }
-
-    public enum TaskSteps {
-        ACCUMULATE_STATISTICS,
-        INCREMENT_OFFSETS,
-        COUNT_OFFSETS,
-        CREATE_INTERMEDIATE_DOCS,
-        SORT_INTERMEDIATE_DOCS,
-        SIZING,
-        FINALIZING_DOCS,
-        FORCE,
-        FINISHED,
-    }
-
-    public void convert() throws IOException {
-        deleteOldFiles();
-
-        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
-            logger.warn("Bailing: Journal is empty!");
-            return;
-        }
-
-        final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
-
-        try (var progress = heartbeat.createServiceTaskHeartbeat(TaskSteps.class, "reverseIndexFullConverter")) {
-            progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
-
-            final IndexJournalStatistics statistics = journalReader.getStatistics();
-            final long wordsFileSize = statistics.highestWord() + 1;
-
-            progress.progress(TaskSteps.INCREMENT_OFFSETS);
-
-            logger.debug("Words file size: {}", wordsFileSize);
-            // Create a count of how many documents has contains each word
-            final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
-
-            journalReader.forEachWordId(wordsOffsets::increment);
-            progress.progress(TaskSteps.COUNT_OFFSETS);
-
-            wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexFullParameters.ENTRY_SIZE));
-
-            progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
-
-            // Construct an intermediate representation of the reverse documents index
-            try (FileChannel intermediateDocChannel =
-                         (FileChannel) Files.newByteChannel(intermediateUrlsFile,
-                                 StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
-            {
-
-                // Construct intermediate index
-                try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
-                     IntermediateIndexConstructor intermediateIndexConstructor = new IntermediateIndexConstructor(tmpFileDir, wordsOffsets, intermediateDocumentWriteFunnel)
-                )
-                {
-                    journalReader.forEachDocIdRecord(intermediateIndexConstructor);
-                    intermediateDocumentWriteFunnel.write(intermediateDocChannel);
-                }
-                intermediateDocChannel.force(false);
-                progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
-
-                // Sort each segment of the intermediate file
-                {
-                    LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
-                    wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
-                        intermediateDocs.sortLargeSpanN(sortingContext, ReverseIndexFullParameters.ENTRY_SIZE, s, e);
-                        return e;
-                    });
-                    intermediateDocs.force();
-                }
-
-                progress.progress(TaskSteps.SIZING);
-
-                IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
-                        ReverseIndexFullParameters.bTreeContext,
-                        ReverseIndexFullParameters.ENTRY_SIZE);
-
-                wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
-                progress.progress(TaskSteps.FINALIZING_DOCS);
-
-                LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
-                // Construct the proper reverse index
-                wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexFullParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
-                wordsOffsets.write(outputFileWords);
-
-                progress.progress(TaskSteps.FORCE);
-
-                // Attempt to clean up before forcing (important disk space preservation)
-                Files.deleteIfExists(intermediateUrlsFile);
-
-                wordsOffsets.force();
-                finalDocs.force();
-
-                progress.progress(TaskSteps.FINISHED);
-            }
-
-        } catch (IOException ex) {
-            logger.error("Failed to convert", ex);
-            throw ex;
-        } finally {
-            Files.deleteIfExists(intermediateUrlsFile);
-        }
-    }
-
-    private void deleteOldFiles() throws IOException {
-        Files.deleteIfExists(outputFileWords);
-        Files.deleteIfExists(outputFileDocs);
-    }
-
-    private class IntermediateIndexConstructor implements IndexJournalReader.LongObjectConsumer<IndexJournalEntryData.Record>, AutoCloseable {
-
-        private final LongArray wordRangeEnds;
-        private final IntArray wordRangeOffset;
-        private final RandomWriteFunnel documentsFile;
-
-        private final Path tempFile;
-
-        public IntermediateIndexConstructor(Path tempDir, LongArray wordRangeEnds, RandomWriteFunnel documentsFile) throws IOException {
-            tempFile = Files.createTempFile(tempDir, "iic", "dat");
-
-            this.wordRangeEnds = wordRangeEnds;
-            this.wordRangeOffset = IntArray.mmapForWriting(tempFile, wordRangeEnds.size());
-            this.documentsFile = documentsFile;
-        }
-
-        @SneakyThrows
-        @Override
-        public void accept(long docId, IndexJournalEntryData.Record record) {
-
-            /* Encode the ID as
-             *
-             *     32 bits  32 bits
-             *   [ ranking | url-id ]
-             *
-             *  in order to get low-ranking documents to be considered first
-             *  when sorting the items.
-             */
-
-            int domainId = (int) (docId >>> 32);
-            long rankingId = (long) domainRankings.getRanking(domainId) << 32;
-
-            int urlId = (int) (docId & 0xFFFF_FFFFL);
-            long rankEncodedId = rankingId | urlId;
-
-            final int wordId = record.wordId();
-            long offset = startOfRange(wordId);
-
-            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), rankEncodedId);
-            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), record.metadata());
-
-        }
-
-        private long startOfRange(int wordId) {
-            if (wordId == 0) return 0;
-
-            return wordRangeEnds.get(wordId - 1);
-        }
-
-        public void close() throws IOException {
-            Files.delete(tempFile);
-        }
-    }
-
-}
-
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/full/ReverseIndexFullParameters.java
@ -1,16 +0,0 @@
-package nu.marginalia.index.full;
-
-import nu.marginalia.btree.model.BTreeBlockSize;
-import nu.marginalia.btree.model.BTreeContext;
-
-public class ReverseIndexFullParameters {
-    static final int ENTRY_SIZE = 2;
-
-    // This is the byte size per index page on disk, the data pages are twice as large due to ENTRY_SIZE = 2.
-    //
-    // Given a hardware limit of 4k reads, 2k block size should be optimal.
-    static final BTreeBlockSize blockSize = BTreeBlockSize.BS_2048;
-
-
-    static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, blockSize);
-}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java
@ -1,215 +0,0 @@
-package nu.marginalia.index.priority;
-
-import lombok.SneakyThrows;
-import nu.marginalia.array.IntArray;
-import nu.marginalia.array.LongArray;
-import nu.marginalia.array.algo.SortingContext;
-import nu.marginalia.index.construction.CountToOffsetTransformer;
-import nu.marginalia.index.construction.ReverseIndexBTreeTransformer;
-import nu.marginalia.index.construction.IndexSizeEstimator;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
-import nu.marginalia.index.journal.model.IndexJournalStatistics;
-import nu.marginalia.index.journal.reader.IndexJournalReader;
-import nu.marginalia.ranking.DomainRankings;
-import nu.marginalia.rwf.RandomWriteFunnel;
-import nu.marginalia.service.control.ServiceHeartbeat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.channels.FileChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-import static nu.marginalia.index.priority.ReverseIndexPriorityParameters.bTreeContext;
-
-public class ReverseIndexPriorityConverter {
-    private static final int RWF_BIN_SIZE = 10_000_000;
-
-    private final ServiceHeartbeat heartbeat;
-    private final Path tmpFileDir;
-
-    private final Logger logger = LoggerFactory.getLogger(getClass());
-
-    private final IndexJournalReader journalReader;
-    private final DomainRankings domainRankings;
-    private final Path outputFileWords;
-    private final Path outputFileDocs;
-    private final SortingContext sortingContext;
-
-    public ReverseIndexPriorityConverter(ServiceHeartbeat heartbeat,
-                                         Path tmpFileDir,
-                                         IndexJournalReader journalReader,
-                                         DomainRankings domainRankings,
-                                         Path outputFileWords,
-                                         Path outputFileDocs) {
-        this.heartbeat = heartbeat;
-        this.tmpFileDir = tmpFileDir;
-        this.journalReader = journalReader;
-        this.domainRankings = domainRankings;
-        this.outputFileWords = outputFileWords;
-        this.outputFileDocs = outputFileDocs;
-        this.sortingContext = new SortingContext(tmpFileDir, 64_000);
-    }
-
-    public enum TaskSteps {
-        ACCUMULATE_STATISTICS,
-        INCREMENT_OFFSETS,
-        COUNT_OFFSETS,
-        CREATE_INTERMEDIATE_DOCS,
-        SORT_INTERMEDIATE_DOCS,
-        SIZING,
-        FINALIZING_DOCS,
-        FORCE,
-        FINISHED,
-    }
-
-    public void convert() throws IOException {
-        deleteOldFiles();
-
-        if (journalReader.fileHeader().fileSize() <= IndexJournalReader.FILE_HEADER_SIZE_BYTES) {
-            logger.warn("Bailing: Journal is empty!");
-            return;
-        }
-
-        final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
-
-        try (var progress = heartbeat.createServiceTaskHeartbeat(TaskSteps.class, "reverseIndexPriorityConverter")) {
-            progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
-
-            final IndexJournalStatistics statistics = journalReader.getStatistics();
-            final long wordsFileSize = statistics.highestWord() + 1;
-
-            progress.progress(TaskSteps.INCREMENT_OFFSETS);
-
-            logger.debug("Words file size: {}", wordsFileSize);
-            // Create a count of how many documents has contains each word
-            final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
-
-            journalReader.forEachWordId(wordsOffsets::increment);
-            progress.progress(TaskSteps.COUNT_OFFSETS);
-
-            wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexPriorityParameters.ENTRY_SIZE));
-
-            progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
-
-            // Construct an intermediate representation of the reverse documents index
-            try (FileChannel intermediateDocChannel =
-                         (FileChannel) Files.newByteChannel(intermediateUrlsFile,
-                                 StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
-            {
-
-                // Construct intermediate index
-                try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
-                     IntermediateIndexConstructor intermediateIndexConstructor = new IntermediateIndexConstructor(tmpFileDir, wordsOffsets, intermediateDocumentWriteFunnel)
-                )
-                {
-                    journalReader.forEachDocIdRecord(intermediateIndexConstructor);
-                    intermediateDocumentWriteFunnel.write(intermediateDocChannel);
-                }
-                intermediateDocChannel.force(false);
-                progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
-
-                // Sort each segment of the intermediate file
-                {
-                    LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
-                    wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
-                        intermediateDocs.sortLargeSpan(sortingContext, s, e);
-                        return e;
-                    });
-                    intermediateDocs.force();
-                }
-
-                progress.progress(TaskSteps.SIZING);
-
-                IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
-                        bTreeContext,
-                        ReverseIndexPriorityParameters.ENTRY_SIZE);
-
-                wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
-                progress.progress(TaskSteps.FINALIZING_DOCS);
-
-                LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
-                // Construct the proper reverse index
-                wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexPriorityParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
-                wordsOffsets.write(outputFileWords);
-
-                progress.progress(TaskSteps.FORCE);
-
-                // Attempt to clean up before forcing (important disk space preservation)
-                Files.deleteIfExists(intermediateUrlsFile);
-
-                wordsOffsets.force();
-                finalDocs.force();
-
-                progress.progress(TaskSteps.FINISHED);
-            }
-
-        } catch (IOException ex) {
-            logger.error("Failed to convert", ex);
-            throw ex;
-        } finally {
-            Files.deleteIfExists(intermediateUrlsFile);
-        }
-    }
-
-    private void deleteOldFiles() throws IOException {
-        Files.deleteIfExists(outputFileWords);
-        Files.deleteIfExists(outputFileDocs);
-    }
-
-    private class IntermediateIndexConstructor implements IndexJournalReader.LongObjectConsumer<IndexJournalEntryData.Record>, AutoCloseable {
-
-        private final LongArray wordRangeEnds;
-        private final IntArray wordRangeOffset;
-        private final RandomWriteFunnel documentsFile;
-
-        private final Path tempFile;
-
-        public IntermediateIndexConstructor(Path tempDir, LongArray wordRangeEnds, RandomWriteFunnel documentsFile) throws IOException {
-            tempFile = Files.createTempFile(tempDir, "iic", "dat");
-
-            this.wordRangeEnds = wordRangeEnds;
-            this.wordRangeOffset = IntArray.mmapForWriting(tempFile, wordRangeEnds.size());
-            this.documentsFile = documentsFile;
-        }
-
-        @SneakyThrows
-        @Override
-        public void accept(long docId, IndexJournalEntryData.Record record) {
-
-            /* Encode the ID as
-             *
-             *     32 bits  32 bits
-             *   [ ranking | url-id ]
-             *
-             *  in order to get low-ranking documents to be considered first
-             *  when sorting the items.
-             */
-
-            int domainId = (int) (docId >>> 32);
-            long rankingId = (long) domainRankings.getRanking(domainId) << 32;
-
-            int urlId = (int) (docId & 0xFFFF_FFFFL);
-            long rankEncodedId = rankingId | urlId;
-
-            final int wordId = record.wordId();
-            long offset = startOfRange(wordId);
-
-            documentsFile.put(offset + wordRangeOffset.getAndIncrement(wordId), rankEncodedId);
-        }
-
-        private long startOfRange(int wordId) {
-            if (wordId == 0) return 0;
-
-            return wordRangeEnds.get(wordId - 1);
-        }
-
-        public void close() throws IOException {
-            Files.delete(tempFile);
-        }
-    }
-
-}
-
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityEntrySource.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityEntrySource.java
@ -1,48 +0,0 @@
-package nu.marginalia.index.priority;
-
-import nu.marginalia.array.buffer.LongQueryBuffer;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.query.EntrySource;
-
-import static java.lang.Math.min;
-
-public class ReverseIndexPriorityEntrySource implements EntrySource {
-    private final BTreeReader reader;
-
-    int pos;
-    int endOffset;
-
-    private final int wordId;
-
-    public ReverseIndexPriorityEntrySource(BTreeReader reader, int wordId) {
-        this.reader = reader;
-        this.wordId = wordId;
-
-        pos = 0;
-        endOffset = pos + reader.numEntries();
-    }
-
-    @Override
-    public void skip(int n) {
-        pos += n;
-    }
-
-    @Override
-    public void read(LongQueryBuffer buffer) {
-        buffer.end = min(buffer.end, endOffset - pos);
-        reader.readData(buffer.data, buffer.end, pos);
-        pos += buffer.end;
-
-        buffer.uniq();
-    }
-
-    @Override
-    public boolean hasMore() {
-        return pos < endOffset;
-    }
-
-    @Override
-    public String indexName() {
-        return "Priority:" + wordId;
-    }
-}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityParameters.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityParameters.java
@ -1,31 +0,0 @@
-package nu.marginalia.index.priority;
-
-import nu.marginalia.btree.model.BTreeBlockSize;
-import nu.marginalia.btree.model.BTreeContext;
-import nu.marginalia.index.journal.model.IndexJournalEntryData;
-import nu.marginalia.model.idx.WordFlags;
-
-public class ReverseIndexPriorityParameters {
-    static final int ENTRY_SIZE = 1;
-    static final BTreeBlockSize blockSize = BTreeBlockSize.BS_4096;
-
-    static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, blockSize);
-
-    private static final long highPriorityFlags =
-            WordFlags.Title.asBit()
-            | WordFlags.Subjects.asBit()
-            | WordFlags.TfIdfHigh.asBit()
-            | WordFlags.NamesWords.asBit()
-            | WordFlags.UrlDomain.asBit()
-            | WordFlags.UrlPath.asBit()
-            | WordFlags.Site.asBit()
-            | WordFlags.SiteAdjacent.asBit();
-
-    public static boolean filterPriorityRecord(IndexJournalEntryData.Record record) {
-        long meta = record.metadata();
-
-        return (meta & highPriorityFlags) != 0;
-    }
-
-
-}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java
@ -1,77 +0,0 @@
-package nu.marginalia.index.priority;
-
-import nu.marginalia.index.query.EntrySource;
-import nu.marginalia.array.LongArray;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.query.EmptyEntrySource;
-import nu.marginalia.index.query.ReverseIndexRetainFilter;
-import nu.marginalia.index.query.filter.QueryFilterNoPass;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public class ReverseIndexPriorityReader {
-    private final LongArray words;
-    private final LongArray documents;
-
-    private final Logger logger = LoggerFactory.getLogger(getClass());
-
-    public ReverseIndexPriorityReader(Path words, Path documents) throws IOException {
-        if (!Files.exists(words) || !Files.exists(documents)) {
-            this.words = null;
-            this.documents = null;
-            return;
-        }
-
-        logger.info("Switching prio reverse index");
-
-        this.words = LongArray.mmapRead(words);
-        this.documents = LongArray.mmapRead(documents);
-    }
-
-    public EntrySource priorityDocuments(int wordId) {
-        if (words == null) {
-            // index not loaded
-            return new EmptyEntrySource();
-        }
-
-        if (wordId < 0 || wordId >= words.size()) return new EmptyEntrySource();
-
-        long offset = words.get(wordId);
-
-        if (offset < 0) return new EmptyEntrySource();
-
-        return new ReverseIndexPriorityEntrySource(createReaderNew(offset), wordId);
-    }
-
-    private BTreeReader createReaderNew(long offset) {
-        return new BTreeReader(documents, ReverseIndexPriorityParameters.bTreeContext, offset);
-    }
-
-    public QueryFilterStepIf also(int wordId) {
-        if (wordId < 0) return new QueryFilterNoPass();
-
-        long offset = words.get(wordId);
-
-        if (offset < 0) return new QueryFilterNoPass();
-
-        return new ReverseIndexRetainFilter(createReaderNew(offset), "priority", wordId);
-    }
-
-    public int numDocuments(int wordId) {
-        if (wordId < 0)
-            return 0;
-
-        long offset = words.get(wordId);
-
-        if (offset < 0)
-            return 0;
-
-        return createReaderNew(offset).numEntries();
-    }
-
-}
--- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
+++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
@ -4,7 +4,7 @@ import nu.marginalia.array.buffer.LongQueryBuffer;
 import nu.marginalia.btree.BTreeReader;
 import nu.marginalia.index.query.filter.QueryFilterStepIf;

-public record ReverseIndexRetainFilter(BTreeReader range, String name, int wordId) implements QueryFilterStepIf {
+public record ReverseIndexRetainFilter(BTreeReader range, String name, long wordId) implements QueryFilterStepIf {

    @Override
    public void apply(LongQueryBuffer buffer) {
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java
@ -0,0 +1,109 @@
+package nu.marginalia.index;
+
+import nu.marginalia.array.algo.SortingContext;
+import nu.marginalia.array.buffer.LongQueryBuffer;
+import nu.marginalia.index.construction.DocIdRewriter;
+import nu.marginalia.index.construction.ReversePreindex;
+import nu.marginalia.index.construction.TestJournalFactory;
+import nu.marginalia.index.construction.TestJournalFactory.EntryDataWithWordMeta;
+import nu.marginalia.index.journal.reader.IndexJournalReader;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import static nu.marginalia.index.construction.TestJournalFactory.wm;
+import static org.junit.jupiter.api.Assertions.*;
+
+class ReverseIndexReaderTest {
+    TestJournalFactory journalFactory;
+    Path tempDir;
+    SortingContext sortingContext;
+
+    @BeforeEach
+    public void setUp() throws IOException {
+        journalFactory = new TestJournalFactory();
+
+        tempDir = Files.createTempDirectory("sort");
+        sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
+    }
+
+    @AfterEach
+    public void tearDown() throws IOException {
+        journalFactory.clear();
+
+        List<Path> contents = new ArrayList<>();
+        Files.list(tempDir).forEach(contents::add);
+        for (var tempFile : contents) {
+            Files.delete(tempFile);
+        }
+        Files.delete(tempDir);
+    }
+
+    @Test
+    public void testSimple() throws IOException {
+
+        var indexReader = createIndex(
+                new EntryDataWithWordMeta(100, 101, wm(50, 51))
+        );
+
+        assertEquals(1, indexReader.numDocuments(50));
+
+        long[] meta = indexReader.getTermMeta(50, new long[] { 100 });
+        assertArrayEquals(new long[] { 51 }, meta);
+        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
+    }
+
+    @Test
+    public void test2x2() throws IOException {
+
+        var indexReader = createIndex(
+                new EntryDataWithWordMeta(100, 101, wm(50, 51), wm(51, 52)),
+                new EntryDataWithWordMeta(101, 101, wm(51, 53), wm(52, 54))
+
+        );
+
+        assertEquals(1, indexReader.numDocuments(50));
+        assertEquals(2, indexReader.numDocuments(51));
+        assertEquals(1, indexReader.numDocuments(52));
+
+        assertArrayEquals(new long[] { 51 }, indexReader.getTermMeta(50, new long[] { 100 }));
+        assertArrayEquals(new long[] { 100 }, readEntries(indexReader, 50));
+
+        assertArrayEquals(new long[] { 52, 53 }, indexReader.getTermMeta(51, new long[] { 100, 101 }));
+        assertArrayEquals(new long[] { 100, 101 }, readEntries(indexReader, 51));
+
+        assertArrayEquals(new long[] { 54 }, indexReader.getTermMeta(52, new long[] { 101 }));
+        assertArrayEquals(new long[] { 101 }, readEntries(indexReader, 52));
+
+    }
+
+    private long[] readEntries(ReverseIndexReader reader, long wordId) {
+        var es = reader.documents(wordId);
+        assertTrue(es.hasMore());
+        LongQueryBuffer buffer = new LongQueryBuffer(4);
+        es.read(buffer);
+        assertFalse(es.hasMore());
+        return buffer.copyData();
+    }
+
+    private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
+        var reader = journalFactory.createReader(scenario);
+        var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
+
+
+        Path docsFile = tempDir.resolve("docs.dat");
+        Path wordsFile = tempDir.resolve("words.dat");
+
+        preindex.finalizeIndex(docsFile, wordsFile);
+        preindex.delete();
+
+        return new ReverseIndexReader(wordsFile, docsFile);
+
+    }
+}
--- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java
+++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java
@ -0,0 +1,171 @@
+package nu.marginalia.index.construction;
+
+import nu.marginalia.array.algo.SortingContext;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static nu.marginalia.index.construction.TestJournalFactory.EntryData;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class ReversePreindexDocsTest {
+    Path countsFile;
+    Path wordsIdFile;
+    Path docsFile;
+    Path tempDir;
+
+    TestJournalFactory journalFactory;
+
+    @BeforeEach
+    public void setUp() throws IOException  {
+        journalFactory = new TestJournalFactory();
+
+        countsFile = Files.createTempFile("counts", ".dat");
+        wordsIdFile = Files.createTempFile("words", ".dat");
+        docsFile = Files.createTempFile("docs", ".dat");
+        tempDir = Files.createTempDirectory("sort");
+    }
+
+    @AfterEach
+    public void tearDown() throws IOException {
+        journalFactory.clear();
+
+        Files.deleteIfExists(countsFile);
+        Files.deleteIfExists(wordsIdFile);
+        List<Path> contents = new ArrayList<>();
+        Files.list(tempDir).forEach(contents::add);
+        for (var tempFile : contents) {
+            Files.delete(tempFile);
+        }
+        Files.delete(tempDir);
+    }
+
+    @Test
+    public void testDocs() throws IOException {
+        var reader = journalFactory.createReader(
+                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
+        );
+
+        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
+
+        List<TestSegmentData> expected = List.of(
+                new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
+                new TestSegmentData(10, 2, 4, new long[] { -0xF00BA3L, 0 }),
+                new TestSegmentData(33, 4, 6, new long[] { -0xF00BA3L, 0 }),
+                new TestSegmentData(40, 6, 8, new long[] { -0xF00BA3L, 0 })
+        );
+
+        List<TestSegmentData> actual = new ArrayList<>();
+
+        var iter = segments.iterator(2);
+        while (iter.next()) {
+            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
+            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
+            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
+                    data));
+        }
+
+        assertEquals(expected, actual);
+    }
+
+    @Test
+    public void testDocsRepeatedWord() throws IOException {
+        var reader = journalFactory.createReader(
+                new EntryData(-0xF00BA3L, 0, 4, 4)
+        );
+
+        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
+
+        List<TestSegmentData> expected = List.of(
+                new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 })
+        );
+
+        List<TestSegmentData> actual = new ArrayList<>();
+
+        var iter = segments.iterator(2);
+        while (iter.next()) {
+            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
+            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
+            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
+                    data));
+        }
+
+        assertEquals(expected, actual);
+    }
+    @Test
+    public void testDocs2() throws IOException {
+        var reader = journalFactory.createReader(
+                new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33),
+                new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
+        );
+
+        var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
+        var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
+
+        List<TestSegmentData> expected = List.of(
+                new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }),
+                new TestSegmentData(10, 4, 6, new long[] { -0xF00BA3L, 0}),
+                new TestSegmentData(15, 6, 8, new long[] { 0xF00BA4L, 0}),
+                new TestSegmentData(30, 8, 10, new long[] { 0xF00BA4L, 0}),
+                new TestSegmentData(33, 10, 14, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0}),
+                new TestSegmentData(40, 14, 16, new long[] { -0xF00BA3L, 0})
+        );
+
+        List<TestSegmentData> actual = new ArrayList<>();
+
+        var iter = segments.iterator(2);
+        while (iter.next()) {
+            long[] data = new long[(int) (iter.endOffset - iter.startOffset)];
+            docs.slice(iter.startOffset, iter.endOffset).get(0, data);
+            actual.add(new TestSegmentData(iter.wordId, iter.startOffset, iter.endOffset,
+                    data));
+        }
+    }
+
+    record TestSegmentData(long wordId, long start, long end, long[] data) {
+        public TestSegmentData(long wordId, long start, long end) {
+            this(wordId, start, end, null);
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            TestSegmentData that = (TestSegmentData) o;
+
+            if (wordId != that.wordId) return false;
+            if (start != that.start) return false;
+            if (end != that.end) return false;
+            return Arrays.equals(data, that.data);
+        }
+
+        @Override
+        public int hashCode() {
+            int result = (int) (wordId ^ (wordId >>> 32));
+            result = 31 * result + (int) (start ^ (start >>> 32));
+            result = 31 * result + (int) (end ^ (end >>> 32));
+            result = 31 * result + Arrays.hashCode(data);
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return "TestSegmentData{" +
+                    "wordId=" + wordId +
+                    ", start=" + start +
+                    ", end=" + end +
+                    ", data=" + Arrays.toString(data) +
+                    '}';
+        }
+    }
+}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`DELETE FROM FILE_STORAGE WHERE TYPE IN ('LEXICON_STAGING', 'LEXICON_LIVE');`