diff --git a/code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java
index dcb802b7..2cad98fc 100644
--- a/code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java
+++ b/code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java
@@ -85,7 +85,7 @@ class BTreeWriterTest {
     public void testWriteEntrySize2() throws IOException {
         BTreeContext ctx = new BTreeContext(4,  2,  BTreeBlockSize.BS_64);
 
-        var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
+        var tempFile = Files.createTempFile("tst", "dat");
 
         int[] data = generateItems32(64);
 
diff --git a/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java
index e32bdbf6..9f4fe2f9 100644
--- a/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java
+++ b/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java
@@ -7,6 +7,7 @@ import nu.marginalia.WmsaHome;
 import nu.marginalia.converting.model.ProcessedDomain;
 import nu.marginalia.converting.processor.DomainProcessor;
 import nu.marginalia.crawl.CrawlerMain;
+import nu.marginalia.crawl.DomainStateDb;
 import nu.marginalia.crawl.fetcher.HttpFetcher;
 import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
 import nu.marginalia.crawl.fetcher.warc.WarcRecorder;
@@ -46,6 +47,7 @@ public class CrawlingThenConvertingIntegrationTest {
 
     private Path fileName;
     private Path fileName2;
+    private Path dbTempFile;
 
     @BeforeAll
     public static void setUpAll() {
@@ -63,16 +65,18 @@ public class CrawlingThenConvertingIntegrationTest {
         httpFetcher = new HttpFetcherImpl(WmsaHome.getUserAgent().uaString());
         this.fileName = Files.createTempFile("crawling-then-converting", ".warc.gz");
         this.fileName2 = Files.createTempFile("crawling-then-converting", ".warc.gz");
+        this.dbTempFile = Files.createTempFile("domains", "db");
     }
 
     @AfterEach
     public void tearDown() throws IOException {
         Files.deleteIfExists(fileName);
         Files.deleteIfExists(fileName2);
+        Files.deleteIfExists(dbTempFile);
     }
 
     @Test
-    public void testInvalidDomain() throws IOException {
+    public void testInvalidDomain() throws Exception {
         // Attempt to fetch an invalid domain
         var specs = new CrawlerMain.CrawlSpecRecord("invalid.invalid.invalid", 10);
 
@@ -88,7 +92,7 @@ public class CrawlingThenConvertingIntegrationTest {
     }
 
     @Test
-    public void testRedirectingDomain() throws IOException {
+    public void testRedirectingDomain() throws Exception {
         // Attempt to fetch an invalid domain
         var specs = new CrawlerMain.CrawlSpecRecord("memex.marginalia.nu", 10);
 
@@ -107,7 +111,7 @@ public class CrawlingThenConvertingIntegrationTest {
     }
 
     @Test
-    public void testBlockedDomain() throws IOException {
+    public void testBlockedDomain() throws Exception {
         // Attempt to fetch an invalid domain
         var specs = new CrawlerMain.CrawlSpecRecord("search.marginalia.nu", 10);
 
@@ -124,7 +128,7 @@ public class CrawlingThenConvertingIntegrationTest {
     }
 
     @Test
-    public void crawlSunnyDay() throws IOException {
+    public void crawlSunnyDay() throws Exception {
         var specs = new CrawlerMain.CrawlSpecRecord("www.marginalia.nu", 10);
 
         CrawledDomain domain = crawl(specs);
@@ -157,7 +161,7 @@ public class CrawlingThenConvertingIntegrationTest {
 
 
     @Test
-    public void crawlContentTypes() throws IOException {
+    public void crawlContentTypes() throws Exception {
         var specs = new CrawlerMain.CrawlSpecRecord("www.marginalia.nu", 10,
                 List.of(
                         "https://www.marginalia.nu/sanic.png",
@@ -195,7 +199,7 @@ public class CrawlingThenConvertingIntegrationTest {
 
 
     @Test
-    public void crawlRobotsTxt() throws IOException {
+    public void crawlRobotsTxt() throws Exception {
         var specs = new CrawlerMain.CrawlSpecRecord("search.marginalia.nu", 5,
                         List.of("https://search.marginalia.nu/search?q=hello+world")
         );
@@ -235,15 +239,17 @@ public class CrawlingThenConvertingIntegrationTest {
             return null; // unreachable
         }
     }
-    private CrawledDomain crawl(CrawlerMain.CrawlSpecRecord specs) throws IOException {
+    private CrawledDomain crawl(CrawlerMain.CrawlSpecRecord specs) throws Exception {
         return crawl(specs, domain -> true);
     }
 
-    private CrawledDomain crawl(CrawlerMain.CrawlSpecRecord specs, Predicate<EdgeDomain> domainBlacklist) throws IOException {
+    private CrawledDomain crawl(CrawlerMain.CrawlSpecRecord specs, Predicate<EdgeDomain> domainBlacklist) throws Exception {
         List<SerializableCrawlData> data = new ArrayList<>();
 
-        try (var recorder = new WarcRecorder(fileName)) {
-            new CrawlerRetreiver(httpFetcher, new DomainProber(domainBlacklist), specs, recorder).crawlDomain();
+        try (var recorder = new WarcRecorder(fileName);
+             var db = new DomainStateDb(dbTempFile))
+        {
+            new CrawlerRetreiver(httpFetcher, new DomainProber(domainBlacklist), specs, db, recorder).crawlDomain();
         }
 
         CrawledDocumentParquetRecordFileWriter.convertWarc(specs.domain(),
diff --git a/code/processes/crawling-process/build.gradle b/code/processes/crawling-process/build.gradle
index e955f86c..fa7579f3 100644
--- a/code/processes/crawling-process/build.gradle
+++ b/code/processes/crawling-process/build.gradle
@@ -46,6 +46,8 @@ dependencies {
 
     implementation libs.notnull
     implementation libs.guava
+    implementation libs.sqlite
+
     implementation dependencies.create(libs.guice.get()) {
         exclude group: 'com.google.guava'
     }
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java
index e7fbe4f9..01204b24 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java
@@ -241,6 +241,7 @@ public class CrawlerMain extends ProcessMainClass {
 
         // Set up the work log and the warc archiver so we can keep track of what we've done
         try (WorkLog workLog = new WorkLog(outputDir.resolve("crawler.log"));
+             DomainStateDb domainStateDb = new DomainStateDb(outputDir.resolve("domainstate.db"));
              WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
              AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(domainsToCrawl)
         ) {
@@ -258,6 +259,7 @@ public class CrawlerMain extends ProcessMainClass {
                         anchorTagsSource,
                         outputDir,
                         warcArchiver,
+                        domainStateDb,
                         workLog);
 
                 if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) == null) {
@@ -299,11 +301,12 @@ public class CrawlerMain extends ProcessMainClass {
         heartbeat.start();
 
         try (WorkLog workLog = new WorkLog(outputDir.resolve("crawler-" + targetDomainName.replace('/', '-') + ".log"));
+             DomainStateDb domainStateDb = new DomainStateDb(outputDir.resolve("domainstate.db"));
              WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
              AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
         ) {
             var spec = new CrawlSpecRecord(targetDomainName, 1000, List.of());
-            var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
+            var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, domainStateDb, workLog);
             task.run();
         }
         catch (Exception ex) {
@@ -324,18 +327,21 @@ public class CrawlerMain extends ProcessMainClass {
         private final AnchorTagsSource anchorTagsSource;
         private final Path outputDir;
         private final WarcArchiverIf warcArchiver;
+        private final DomainStateDb domainStateDb;
         private final WorkLog workLog;
 
         CrawlTask(CrawlSpecRecord specification,
                   AnchorTagsSource anchorTagsSource,
                   Path outputDir,
                   WarcArchiverIf warcArchiver,
+                  DomainStateDb domainStateDb,
                   WorkLog workLog)
         {
             this.specification = specification;
             this.anchorTagsSource = anchorTagsSource;
             this.outputDir = outputDir;
             this.warcArchiver = warcArchiver;
+            this.domainStateDb = domainStateDb;
             this.workLog = workLog;
 
             this.domain = specification.domain();
@@ -359,7 +365,7 @@ public class CrawlerMain extends ProcessMainClass {
             }
 
             try (var warcRecorder = new WarcRecorder(newWarcFile); // write to a temp file for now
-                 var retriever = new CrawlerRetreiver(fetcher, domainProber, specification, warcRecorder);
+                 var retriever = new CrawlerRetreiver(fetcher, domainProber, specification, domainStateDb, warcRecorder);
                  CrawlDataReference reference = getReference();
                  )
             {
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/DomainStateDb.java b/code/processes/crawling-process/java/nu/marginalia/crawl/DomainStateDb.java
new file mode 100644
index 00000000..0824c3fe
--- /dev/null
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/DomainStateDb.java
@@ -0,0 +1,127 @@
+package nu.marginalia.crawl;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.time.Instant;
+import java.util.Optional;
+
+/** Supplemental sqlite database for storing the summary of a crawl.
+ *  One database exists per crawl data set.
+ * */
+public class DomainStateDb implements AutoCloseable {
+
+    private static final Logger logger = LoggerFactory.getLogger(DomainStateDb.class);
+
+    private final Connection connection;
+
+    public record SummaryRecord(
+            String domainName,
+            Instant lastUpdated,
+            String state,
+            @Nullable String stateDesc,
+            @Nullable String feedUrl
+    )
+    {
+        public static SummaryRecord forSuccess(String domainName) {
+            return new SummaryRecord(domainName, Instant.now(), "OK", null, null);
+        }
+
+        public static SummaryRecord forSuccess(String domainName, String feedUrl) {
+            return new SummaryRecord(domainName, Instant.now(), "OK", null, feedUrl);
+        }
+
+        public static SummaryRecord forError(String domainName, String state, String stateDesc) {
+            return new SummaryRecord(domainName, Instant.now(), state, stateDesc, null);
+        }
+
+        public boolean equals(Object other) {
+            if (other == this) {
+                return true;
+            }
+            if (!(other instanceof SummaryRecord(String name, Instant updated, String state1, String desc, String url))) {
+                return false;
+            }
+            return domainName.equals(name) &&
+                    lastUpdated.toEpochMilli() == updated.toEpochMilli() &&
+                    state.equals(state1) &&
+                    (stateDesc == null ? desc == null : stateDesc.equals(desc)) &&
+                    (feedUrl == null ? url == null : feedUrl.equals(url));
+        }
+
+        public int hashCode() {
+            return domainName.hashCode() + Long.hashCode(lastUpdated.toEpochMilli());
+        }
+
+    }
+
+    public DomainStateDb(Path filename) throws SQLException {
+        String sqliteDbString = "jdbc:sqlite:" + filename.toString();
+        connection = DriverManager.getConnection(sqliteDbString);
+
+        try (var stmt = connection.createStatement()) {
+            stmt.executeUpdate("""
+                    CREATE TABLE IF NOT EXISTS summary (
+                        domain TEXT PRIMARY KEY,
+                        lastUpdatedEpochMs LONG NOT NULL,
+                        state TEXT NOT NULL,
+                        stateDesc TEXT,
+                        feedUrl TEXT
+                    )
+                    """);
+
+            stmt.execute("PRAGMA journal_mode=WAL");
+        }
+    }
+
+    @Override
+    public void close() throws SQLException {
+        connection.close();
+    }
+
+
+    public void save(SummaryRecord record) {
+        try (var stmt = connection.prepareStatement("""
+                INSERT OR REPLACE INTO summary (domain, lastUpdatedEpochMs, state, stateDesc, feedUrl)
+                VALUES (?, ?, ?, ?, ?)
+                """)) {
+            stmt.setString(1, record.domainName());
+            stmt.setLong(2, record.lastUpdated().toEpochMilli());
+            stmt.setString(3, record.state());
+            stmt.setString(4, record.stateDesc());
+            stmt.setString(5, record.feedUrl());
+            stmt.executeUpdate();
+        } catch (SQLException e) {
+            logger.error("Failed to insert summary record", e);
+        }
+    }
+
+    public Optional<SummaryRecord> get(String domainName) {
+        try (var stmt = connection.prepareStatement("""
+                SELECT domain, lastUpdatedEpochMs, state, stateDesc, feedUrl
+                FROM summary
+                WHERE domain = ?
+                """)) {
+            stmt.setString(1, domainName);
+            var rs = stmt.executeQuery();
+            if (rs.next()) {
+                return Optional.of(new SummaryRecord(
+                        rs.getString("domain"),
+                        Instant.ofEpochMilli(rs.getLong("lastUpdatedEpochMs")),
+                        rs.getString("state"),
+                        rs.getString("stateDesc"),
+                        rs.getString("feedUrl")
+                ));
+            }
+        } catch (SQLException e) {
+            logger.error("Failed to get summary record", e);
+        }
+
+        return Optional.empty();
+    }
+}
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
index ace2059b..adef8ea1 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
@@ -4,6 +4,7 @@ import crawlercommons.robots.SimpleRobotRules;
 import nu.marginalia.atags.model.DomainLinks;
 import nu.marginalia.contenttype.ContentType;
 import nu.marginalia.crawl.CrawlerMain;
+import nu.marginalia.crawl.DomainStateDb;
 import nu.marginalia.crawl.fetcher.ContentTags;
 import nu.marginalia.crawl.fetcher.HttpFetcher;
 import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
@@ -16,7 +17,9 @@ import nu.marginalia.ip_blocklist.UrlBlocklist;
 import nu.marginalia.link_parser.LinkParser;
 import nu.marginalia.model.EdgeDomain;
 import nu.marginalia.model.EdgeUrl;
+import nu.marginalia.model.body.DocumentBodyExtractor;
 import nu.marginalia.model.body.HttpFetchResult;
+import nu.marginalia.model.crawldata.CrawlerDomainStatus;
 import org.jsoup.Jsoup;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,6 +49,7 @@ public class CrawlerRetreiver implements AutoCloseable {
 
     private final DomainProber domainProber;
     private final DomainCrawlFrontier crawlFrontier;
+    private final DomainStateDb domainStateDb;
     private final WarcRecorder warcRecorder;
     private final CrawlerRevisitor crawlerRevisitor;
 
@@ -55,8 +59,10 @@ public class CrawlerRetreiver implements AutoCloseable {
     public CrawlerRetreiver(HttpFetcher fetcher,
                             DomainProber domainProber,
                             CrawlerMain.CrawlSpecRecord specs,
+                            DomainStateDb domainStateDb,
                             WarcRecorder warcRecorder)
     {
+        this.domainStateDb = domainStateDb;
         this.warcRecorder = warcRecorder;
         this.fetcher = fetcher;
         this.domainProber = domainProber;
@@ -90,8 +96,21 @@ public class CrawlerRetreiver implements AutoCloseable {
         try {
             // Do an initial domain probe to determine the root URL
             EdgeUrl rootUrl;
-            if (probeRootUrl() instanceof HttpFetcher.DomainProbeResult.Ok ok) rootUrl = ok.probedUrl();
-            else return 1;
+
+            var probeResult = probeRootUrl();
+            switch (probeResult) {
+                case HttpFetcher.DomainProbeResult.Ok(EdgeUrl probedUrl) -> {
+                    rootUrl = probedUrl; // Good track
+                }
+                case HttpFetcher.DomainProbeResult.Redirect(EdgeDomain domain1) -> {
+                    domainStateDb.save(DomainStateDb.SummaryRecord.forError(domain, "Redirect", domain1.toString()));
+                    return 1;
+                }
+                case HttpFetcher.DomainProbeResult.Error(CrawlerDomainStatus status, String desc) -> {
+                    domainStateDb.save(DomainStateDb.SummaryRecord.forError(domain, status.toString(), desc));
+                    return 1;
+                }
+            }
 
             // Sleep after the initial probe, we don't have access to the robots.txt yet
             // so we don't know the crawl delay
@@ -114,7 +133,8 @@ public class CrawlerRetreiver implements AutoCloseable {
 
         delayTimer.waitFetchDelay(0); // initial delay after robots.txt
 
-        sniffRootDocument(rootUrl, delayTimer);
+        DomainStateDb.SummaryRecord summaryRecord = sniffRootDocument(rootUrl, delayTimer);
+        domainStateDb.save(summaryRecord);
 
         // Play back the old crawl data (if present) and fetch the documents comparing etags and last-modified
         if (crawlerRevisitor.recrawl(oldCrawlData, robotsRules, delayTimer) > 0) {
@@ -196,7 +216,9 @@ public class CrawlerRetreiver implements AutoCloseable {
         return domainProbeResult;
     }
 
-    private void sniffRootDocument(EdgeUrl rootUrl, CrawlDelayTimer timer) {
+    private DomainStateDb.SummaryRecord sniffRootDocument(EdgeUrl rootUrl, CrawlDelayTimer timer) {
+        Optional<String> feedLink = Optional.empty();
+
         try {
             var url = rootUrl.withPathAndParam("/", null);
 
@@ -204,11 +226,11 @@ public class CrawlerRetreiver implements AutoCloseable {
             timer.waitFetchDelay(0);
 
             if (!(result instanceof HttpFetchResult.ResultOk ok))
-                return;
+                return DomainStateDb.SummaryRecord.forSuccess(domain);
 
             var optDoc = ok.parseDocument();
             if (optDoc.isEmpty())
-                return;
+                return DomainStateDb.SummaryRecord.forSuccess(domain);
 
             // Sniff the software based on the sample document
             var doc = optDoc.get();
@@ -216,7 +238,6 @@ public class CrawlerRetreiver implements AutoCloseable {
             crawlFrontier.enqueueLinksFromDocument(url, doc);
 
             EdgeUrl faviconUrl = url.withPathAndParam("/favicon.ico", null);
-            Optional<EdgeUrl> sitemapUrl = Optional.empty();
 
             for (var link : doc.getElementsByTag("link")) {
                 String rel = link.attr("rel");
@@ -232,23 +253,33 @@ public class CrawlerRetreiver implements AutoCloseable {
 
                 // Grab the RSS/Atom as a sitemap if it exists
                 if (rel.equalsIgnoreCase("alternate")
-                && (type.equalsIgnoreCase("application/atom+xml") || type.equalsIgnoreCase("application/atomsvc+xml"))) {
+                && (type.equalsIgnoreCase("application/atom+xml")
+                        || type.equalsIgnoreCase("application/atomsvc+xml")
+                        || type.equalsIgnoreCase("application/rss+xml")
+                )) {
                     String href = link.attr("href");
 
-                    sitemapUrl = linkParser.parseLink(url, href)
-                            .filter(crawlFrontier::isSameDomain);
+                    feedLink = linkParser.parseLink(url, href)
+                            .filter(crawlFrontier::isSameDomain)
+                            .map(EdgeUrl::toString);
                 }
             }
 
-            // Download the sitemap if available exists
-            if (sitemapUrl.isPresent()) {
-                sitemapFetcher.downloadSitemaps(List.of(sitemapUrl.get()));
+
+            if (feedLink.isEmpty()) {
+                feedLink = guessFeedUrl(timer);
+            }
+
+            // Download the sitemap if available
+            if (feedLink.isPresent()) {
+                sitemapFetcher.downloadSitemaps(List.of(feedLink.get()));
                 timer.waitFetchDelay(0);
             }
 
             // Grab the favicon if it exists
             fetchWithRetry(faviconUrl, timer, HttpFetcher.ProbeType.DISABLED, ContentTags.empty());
             timer.waitFetchDelay(0);
+
         }
         catch (Exception ex) {
             logger.error("Error configuring link filter", ex);
@@ -256,6 +287,74 @@ public class CrawlerRetreiver implements AutoCloseable {
         finally {
             crawlFrontier.addVisited(rootUrl);
         }
+
+        if (feedLink.isPresent()) {
+            return DomainStateDb.SummaryRecord.forSuccess(domain, feedLink.get());
+        }
+        else {
+            return DomainStateDb.SummaryRecord.forSuccess(domain);
+        }
+    }
+
+    private final List<String> likelyFeedEndpoints = List.of(
+            "/rss.xml",
+            "/atom.xml",
+            "/feed.xml",
+            "/index.xml",
+            "/feed",
+            "/rss",
+            "/atom",
+            "/feeds",
+            "/blog/feed",
+            "/blog/rss"
+    );
+
+    private Optional<String> guessFeedUrl(CrawlDelayTimer timer) throws InterruptedException {
+        var oldDomainStateRecord = domainStateDb.get(domain);
+
+        // If we are already aware of an old feed URL, then we can just revalidate it
+        if (oldDomainStateRecord.isPresent()) {
+            var oldRecord = oldDomainStateRecord.get();
+            if (oldRecord.feedUrl() != null && validateFeedUrl(oldRecord.feedUrl(), timer)) {
+                return Optional.of(oldRecord.feedUrl());
+            }
+        }
+
+        for (String endpoint : likelyFeedEndpoints) {
+            String url = "https://" + domain + "/" + endpoint;
+            if (validateFeedUrl(url, timer)) {
+                return Optional.of(url);
+            }
+        }
+
+        return Optional.empty();
+    }
+
+    private boolean validateFeedUrl(String url, CrawlDelayTimer timer) throws InterruptedException {
+        var parsedOpt = EdgeUrl.parse(url);
+        if (parsedOpt.isEmpty())
+            return false;
+
+        HttpFetchResult result = fetchWithRetry(parsedOpt.get(), timer, HttpFetcher.ProbeType.DISABLED, ContentTags.empty());
+        timer.waitFetchDelay(0);
+
+        if (!(result instanceof HttpFetchResult.ResultOk ok)) {
+            return false;
+        }
+
+        // Extract the beginning of the
+        Optional<String> bodyOpt = DocumentBodyExtractor.asString(ok).getBody();
+        if (bodyOpt.isEmpty())
+            return false;
+        String body = bodyOpt.get();
+        body = body.substring(0, Math.min(128, body.length())).toLowerCase();
+
+        if (body.contains("<atom"))
+            return true;
+        if (body.contains("<rss"))
+            return true;
+
+        return false;
     }
 
     public HttpFetchResult fetchContentWithReference(EdgeUrl top,
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java
index 2cf0ab33..a7a486e3 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java
@@ -7,9 +7,9 @@ import nu.marginalia.model.EdgeUrl;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 
 public class SitemapFetcher {
@@ -24,26 +24,27 @@ public class SitemapFetcher {
     }
 
     public void downloadSitemaps(SimpleRobotRules robotsRules, EdgeUrl rootUrl) {
-        List<String> sitemaps = robotsRules.getSitemaps();
+        List<String> urls = robotsRules.getSitemaps();
 
-        List<EdgeUrl> urls = new ArrayList<>(sitemaps.size());
-        if (!sitemaps.isEmpty()) {
-            for (var url : sitemaps) {
-                EdgeUrl.parse(url).ifPresent(urls::add);
-            }
-        }
-        else {
-            urls.add(rootUrl.withPathAndParam("/sitemap.xml", null));
+        if (urls.isEmpty()) {
+            urls = List.of(rootUrl.withPathAndParam("/sitemap.xml", null).toString());
         }
 
         downloadSitemaps(urls);
     }
 
-    public void downloadSitemaps(List<EdgeUrl> urls) {
+    public void downloadSitemaps(List<String> urls) {
 
         Set<String> checkedSitemaps = new HashSet<>();
 
-        for (var url : urls) {
+        for (var rawUrl : urls) {
+            Optional<EdgeUrl> parsedUrl = EdgeUrl.parse(rawUrl);
+            if (parsedUrl.isEmpty()) {
+                continue;
+            }
+
+            EdgeUrl url = parsedUrl.get();
+
             // Let's not download sitemaps from other domains for now
             if (!crawlFrontier.isSameDomain(url)) {
                 continue;
diff --git a/code/processes/crawling-process/model/java/nu/marginalia/model/body/ContentTypeLogic.java b/code/processes/crawling-process/model/java/nu/marginalia/model/body/ContentTypeLogic.java
index c38bcb3b..8d33fe00 100644
--- a/code/processes/crawling-process/model/java/nu/marginalia/model/body/ContentTypeLogic.java
+++ b/code/processes/crawling-process/model/java/nu/marginalia/model/body/ContentTypeLogic.java
@@ -18,6 +18,7 @@ public class ContentTypeLogic {
             "application/xhtml",
             "application/xml",
             "application/atom+xml",
+            "application/atomsvc+xml",
             "application/rss+xml",
             "application/x-rss+xml",
             "application/rdf+xml",
diff --git a/code/processes/crawling-process/model/java/nu/marginalia/model/body/DocumentBodyResult.java b/code/processes/crawling-process/model/java/nu/marginalia/model/body/DocumentBodyResult.java
index a29e7093..1248ecba 100644
--- a/code/processes/crawling-process/model/java/nu/marginalia/model/body/DocumentBodyResult.java
+++ b/code/processes/crawling-process/model/java/nu/marginalia/model/body/DocumentBodyResult.java
@@ -23,6 +23,10 @@ public sealed interface DocumentBodyResult<T> {
             return mapper.apply(contentType, body);
         }
 
+        public Optional<T> getBody() {
+            return Optional.of(body);
+        }
+
         @Override
         public void ifPresent(ExConsumer<T, Exception> consumer) throws Exception {
             consumer.accept(contentType, body);
@@ -41,6 +45,11 @@ public sealed interface DocumentBodyResult<T> {
             return (DocumentBodyResult<T2>) this;
         }
 
+        @Override
+        public Optional<T> getBody() {
+            return Optional.empty();
+        }
+
         @Override
         public void ifPresent(ExConsumer<T, Exception> consumer) throws Exception {
         }
@@ -49,6 +58,7 @@ public sealed interface DocumentBodyResult<T> {
     <T2> Optional<T2> mapOpt(BiFunction<ContentType, T, T2> mapper);
     <T2> Optional<T2> flatMapOpt(BiFunction<ContentType, T, Optional<T2>> mapper);
     <T2> DocumentBodyResult<T2> flatMap(BiFunction<ContentType, T, DocumentBodyResult<T2>> mapper);
+    Optional<T> getBody();
 
     void ifPresent(ExConsumer<T,Exception> consumer) throws Exception;
 
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawl/DomainStateDbTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/DomainStateDbTest.java
new file mode 100644
index 00000000..156f6f6d
--- /dev/null
+++ b/code/processes/crawling-process/test/nu/marginalia/crawl/DomainStateDbTest.java
@@ -0,0 +1,66 @@
+package nu.marginalia.crawl;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+import java.time.Instant;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class DomainStateDbTest {
+
+    Path tempFile;
+    @BeforeEach
+    void setUp() throws IOException {
+        tempFile = Files.createTempFile(getClass().getSimpleName(), ".db");
+    }
+
+    @AfterEach
+    void tearDown() throws IOException {
+        Files.deleteIfExists(tempFile);
+    }
+
+    @Test
+    public void testSunnyDay() throws SQLException {
+        try (var db = new DomainStateDb(tempFile)) {
+            var allFields = new DomainStateDb.SummaryRecord(
+                    "all.marginalia.nu",
+                    Instant.now(),
+                    "OK",
+                    "Bad address",
+                    "https://www.marginalia.nu/atom.xml"
+                    );
+
+            var minFields = new DomainStateDb.SummaryRecord(
+                    "min.marginalia.nu",
+                    Instant.now(),
+                    "OK",
+                    null,
+                    null
+            );
+
+            db.save(allFields);
+            db.save(minFields);
+
+            assertEquals(allFields, db.get("all.marginalia.nu").orElseThrow());
+            assertEquals(minFields, db.get("min.marginalia.nu").orElseThrow());
+
+            var updatedAllFields = new DomainStateDb.SummaryRecord(
+                    "all.marginalia.nu",
+                    Instant.now(),
+                    "BAD",
+                    null,
+                    null
+            );
+
+            db.save(updatedAllFields);
+            assertEquals(updatedAllFields, db.get("all.marginalia.nu").orElseThrow());
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
index ea9bcf60..aacc0e52 100644
--- a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
+++ b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
@@ -2,6 +2,7 @@ package nu.marginalia.crawling.retreival;
 
 import crawlercommons.robots.SimpleRobotRules;
 import nu.marginalia.crawl.CrawlerMain;
+import nu.marginalia.crawl.DomainStateDb;
 import nu.marginalia.crawl.fetcher.ContentTags;
 import nu.marginalia.crawl.fetcher.HttpFetcher;
 import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
@@ -18,6 +19,7 @@ import nu.marginalia.model.crawldata.SerializableCrawlData;
 import nu.marginalia.test.CommonTestData;
 import okhttp3.Headers;
 import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
@@ -25,6 +27,9 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -36,9 +41,14 @@ public class CrawlerMockFetcherTest {
 
     Map<EdgeUrl, CrawledDocument> mockData = new HashMap<>();
     HttpFetcher fetcherMock = new MockFetcher();
-
+    private Path dbTempFile;
+    @BeforeEach
+    public void setUp() throws IOException {
+        dbTempFile = Files.createTempFile("domains","db");
+    }
     @AfterEach
-    public void tearDown() {
+    public void tearDown() throws IOException {
+        Files.deleteIfExists(dbTempFile);
         mockData.clear();
     }
 
@@ -66,15 +76,17 @@ public class CrawlerMockFetcherTest {
 
     }
 
-    void crawl(CrawlerMain.CrawlSpecRecord spec)  throws IOException {
-        try (var recorder = new WarcRecorder()) {
-            new CrawlerRetreiver(fetcherMock, new DomainProber(d -> true), spec, recorder)
+    void crawl(CrawlerMain.CrawlSpecRecord spec) throws IOException, SQLException {
+        try (var recorder = new WarcRecorder();
+             var db = new DomainStateDb(dbTempFile)
+        ) {
+            new CrawlerRetreiver(fetcherMock, new DomainProber(d -> true), spec, db, recorder)
                     .crawlDomain();
         }
     }
 
     @Test
-    public void testLemmy() throws URISyntaxException, IOException {
+    public void testLemmy() throws Exception {
         List<SerializableCrawlData> out = new ArrayList<>();
 
         registerUrlClasspathData(new EdgeUrl("https://startrek.website/"), "mock-crawl-data/lemmy/index.html");
@@ -85,7 +97,7 @@ public class CrawlerMockFetcherTest {
     }
 
     @Test
-    public void testMediawiki() throws URISyntaxException, IOException {
+    public void testMediawiki() throws Exception {
         List<SerializableCrawlData> out = new ArrayList<>();
 
         registerUrlClasspathData(new EdgeUrl("https://en.wikipedia.org/"), "mock-crawl-data/mediawiki/index.html");
@@ -94,7 +106,7 @@ public class CrawlerMockFetcherTest {
     }
 
     @Test
-    public void testDiscourse() throws URISyntaxException, IOException {
+    public void testDiscourse() throws Exception {
         List<SerializableCrawlData> out = new ArrayList<>();
 
         registerUrlClasspathData(new EdgeUrl("https://community.tt-rss.org/"), "mock-crawl-data/discourse/index.html");
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
index edd0de78..01cf8339 100644
--- a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
+++ b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
@@ -4,6 +4,7 @@ import nu.marginalia.UserAgent;
 import nu.marginalia.WmsaHome;
 import nu.marginalia.atags.model.DomainLinks;
 import nu.marginalia.crawl.CrawlerMain;
+import nu.marginalia.crawl.DomainStateDb;
 import nu.marginalia.crawl.fetcher.HttpFetcher;
 import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
 import nu.marginalia.crawl.fetcher.warc.WarcRecorder;
@@ -25,6 +26,7 @@ import java.io.RandomAccessFile;
 import java.net.URISyntaxException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.sql.SQLException;
 import java.util.*;
 import java.util.stream.Collectors;
 
@@ -39,11 +41,13 @@ class CrawlerRetreiverTest {
     Path tempFileWarc2;
     Path tempFileParquet2;
     Path tempFileWarc3;
+    Path tempFileDb;
     @BeforeEach
     public void setUp() throws IOException {
         httpFetcher = new HttpFetcherImpl("search.marginalia.nu; testing a bit :D");
         tempFileParquet1 = Files.createTempFile("crawling-process", ".parquet");
         tempFileParquet2 = Files.createTempFile("crawling-process", ".parquet");
+        tempFileDb = Files.createTempFile("crawling-process", ".db");
 
     }
 
@@ -505,22 +509,26 @@ class CrawlerRetreiverTest {
     }
 
     private void doCrawlWithReferenceStream(CrawlerMain.CrawlSpecRecord specs, SerializableCrawlDataStream stream) {
-        try (var recorder = new WarcRecorder(tempFileWarc2)) {
-            new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).crawlDomain(new DomainLinks(),
+        try (var recorder = new WarcRecorder(tempFileWarc2);
+             var db = new DomainStateDb(tempFileDb)
+        ) {
+            new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, db, recorder).crawlDomain(new DomainLinks(),
                     new CrawlDataReference(stream));
         }
-        catch (IOException ex) {
+        catch (IOException | SQLException ex) {
             Assertions.fail(ex);
         }
     }
 
     @NotNull
     private DomainCrawlFrontier doCrawl(Path tempFileWarc1, CrawlerMain.CrawlSpecRecord specs) {
-        try (var recorder = new WarcRecorder(tempFileWarc1)) {
-            var crawler = new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder);
+        try (var recorder = new WarcRecorder(tempFileWarc1);
+             var db = new DomainStateDb(tempFileDb)
+        ) {
+            var crawler = new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, db, recorder);
             crawler.crawlDomain();
             return crawler.getCrawlFrontier();
-        } catch (IOException ex) {
+        } catch (IOException| SQLException  ex) {
             Assertions.fail(ex);
             return null; // unreachable
         }