diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
index a64360f7..67e661eb 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java
@@ -10,7 +10,8 @@ import nu.marginalia.crawl.retreival.revisit.CrawlerRevisitor;
 import nu.marginalia.crawl.retreival.revisit.DocumentWithReference;
 import nu.marginalia.crawl.retreival.sitemap.SitemapFetcher;
 import nu.marginalia.crawling.body.HttpFetchResult;
-import nu.marginalia.crawling.model.*;
+import nu.marginalia.crawling.model.CrawledDomain;
+import nu.marginalia.crawling.model.CrawlerDomainStatus;
 import nu.marginalia.ip_blocklist.UrlBlocklist;
 import nu.marginalia.link_parser.LinkParser;
 import nu.marginalia.model.EdgeDomain;
@@ -87,17 +88,8 @@ public class CrawlerRetreiver implements AutoCloseable {
     }
 
     public int fetch(DomainLinks domainLinks, CrawlDataReference oldCrawlData) {
-        final DomainProber.ProbeResult probeResult = domainProber.probeDomain(
-                fetcher,
-                domain,
-                new EdgeUrl("http", new EdgeDomain(domain), null, "/", null));
-
         try {
-            // Sleep a bit to avoid hammering the server with requests, we just probed it
-            TimeUnit.SECONDS.sleep(1);
-
-            // Fetch the domain
-            return crawlDomain(oldCrawlData, probeResult, domainLinks);
+            return crawlDomain(oldCrawlData, domainLinks);
         }
         catch (Exception ex) {
             logger.error("Error crawling domain {}", domain, ex);
@@ -111,25 +103,33 @@ public class CrawlerRetreiver implements AutoCloseable {
         resync.run(warcFile);
     }
 
-    private int crawlDomain(CrawlDataReference oldCrawlData, DomainProber.ProbeResult probeResult, DomainLinks domainLinks) throws IOException, InterruptedException {
-        String ip = findIp(domain);
-        EdgeUrl rootUrl;
+    private DomainProber.ProbeResult probeRootUrl(String ip) throws IOException {
+        // Construct an URL to the root of the domain, we don't know the schema yet so we'll
+        // start with http and then try https if that fails
+        var httpUrl = new EdgeUrl("http", new EdgeDomain(domain), null, "/", null);
+        final DomainProber.ProbeResult probeResult = domainProber.probeDomain(fetcher, domain, httpUrl);
 
         warcRecorder.writeWarcinfoHeader(ip, new EdgeDomain(domain), probeResult);
 
-        if (!(probeResult instanceof DomainProber.ProbeResultOk ok)) {
-            return 1;
-        }
-        else {
-            rootUrl = ok.probedUrl();
-        }
+        return probeResult;
+    }
+
+    private int crawlDomain(CrawlDataReference oldCrawlData, DomainLinks domainLinks) throws IOException, InterruptedException {
+        String ip = findIp(domain);
+        EdgeUrl rootUrl;
+
+        if (probeRootUrl(ip) instanceof DomainProber.ProbeResultOk ok) rootUrl = ok.probedUrl();
+        else return 1;
+
+        // Sleep after the initial probe, we don't have access to the robots.txt yet
+        // so we don't know the crawl delay
+        TimeUnit.SECONDS.sleep(1);
 
         final SimpleRobotRules robotsRules = fetcher.fetchRobotRules(rootUrl.domain, warcRecorder);
         final CrawlDelayTimer delayTimer = new CrawlDelayTimer(robotsRules.getCrawlDelay());
 
         delayTimer.waitFetchDelay(0); // initial delay after robots.txt
         sniffRootDocument(rootUrl, delayTimer);
-        delayTimer.waitFetchDelay(0); // delay after sniffing
 
         // Play back the old crawl data (if present) and fetch the documents comparing etags and last-modified
         int recrawled = crawlerRevisitor.recrawl(oldCrawlData, robotsRules, delayTimer);
@@ -187,7 +187,7 @@ public class CrawlerRetreiver implements AutoCloseable {
 
 
             try {
-                if (fetchWriteAndSleep(top, delayTimer, DocumentWithReference.empty()).isOk()) {
+                if (fetchContentWithReference(top, delayTimer, DocumentWithReference.empty()).isOk()) {
                     fetchedCount++;
                 }
             }
@@ -208,21 +208,8 @@ public class CrawlerRetreiver implements AutoCloseable {
 
             var url = rootUrl.withPathAndParam("/", null);
 
-            HttpFetchResult result = null;
-
-            for (int i = 0; i <= HTTP_429_RETRY_LIMIT; i++) {
-                try {
-                    result = fetcher.fetchContent(url, warcRecorder, ContentTags.empty());
-                    break;
-                }
-                catch (RateLimitException ex) {
-                    timer.waitRetryDelay(ex);
-                }
-                catch (Exception ex) {
-                    logger.warn("Failed to fetch {}", url, ex);
-                    result = new HttpFetchResult.ResultException(ex);
-                }
-            }
+            HttpFetchResult result = fetchWithRetry(url, timer, HttpFetcher.ProbeType.DISABLED, ContentTags.empty());
+            timer.waitFetchDelay(0);
 
             if (!(result instanceof HttpFetchResult.ResultOk ok))
                 return;
@@ -235,24 +222,39 @@ public class CrawlerRetreiver implements AutoCloseable {
             var doc = optDoc.get();
             crawlFrontier.setLinkFilter(linkFilterSelector.selectFilter(doc));
 
+            EdgeUrl faviconUrl = url.withPathAndParam("/favicon.ico", null);
+            EdgeUrl sitemapUrl = url.withPathAndParam("/sitemap.xml", null);
+
             for (var link : doc.getElementsByTag("link")) {
                 String rel = link.attr("rel");
                 String type = link.attr("type");
 
-                if (!rel.equalsIgnoreCase("alternate"))
-                    continue;
+                if (rel.equals("icon") || rel.equals("shortcut icon")) {
+                    String href = link.attr("href");
 
-                if (!(type.equalsIgnoreCase("application/atom+xml")
-                   || type.equalsIgnoreCase("application/rss+xml")))
-                    continue;
+                    faviconUrl = linkParser.parseLink(url, href)
+                            .filter(crawlFrontier::isSameDomain)
+                            .orElse(faviconUrl);
+                }
 
-                String href = link.attr("href");
+                // Grab the RSS/Atom as a sitemap if it exists
+                if (rel.equalsIgnoreCase("alternate")
+                && (type.equalsIgnoreCase("application/atom+xml") || type.equalsIgnoreCase("application/atomsvc+xml"))) {
+                    String href = link.attr("href");
 
-                linkParser.parseLink(url, href)
-                        .filter(crawlFrontier::isSameDomain)
-                        .map(List::of)
-                        .ifPresent(sitemapFetcher::downloadSitemaps);
+                    sitemapUrl = linkParser.parseLink(url, href)
+                            .filter(crawlFrontier::isSameDomain)
+                            .orElse(sitemapUrl);
+                }
             }
+
+            // Download the sitemap if it exists
+            sitemapFetcher.downloadSitemaps(List.of(sitemapUrl));
+            timer.waitFetchDelay(0);
+
+            // Grab the favicon if it exists
+            fetchWithRetry(faviconUrl, timer, HttpFetcher.ProbeType.DISABLED, ContentTags.empty());
+            timer.waitFetchDelay(0);
         }
         catch (Exception ex) {
             logger.error("Error configuring link filter", ex);
@@ -262,31 +264,16 @@ public class CrawlerRetreiver implements AutoCloseable {
         }
     }
 
-    public HttpFetchResult fetchWriteAndSleep(EdgeUrl top,
-                                              CrawlDelayTimer timer,
-                                              DocumentWithReference reference) throws InterruptedException
+    public HttpFetchResult fetchContentWithReference(EdgeUrl top,
+                                                     CrawlDelayTimer timer,
+                                                     DocumentWithReference reference) throws InterruptedException
     {
         logger.debug("Fetching {}", top);
 
-        HttpFetchResult fetchedDoc = new HttpFetchResult.ResultNone();
-
         long startTime = System.currentTimeMillis();
         var contentTags = reference.getContentTags();
 
-        // Fetch the document, retrying if we get a rate limit exception
-        for (int i = 0; i <= HTTP_429_RETRY_LIMIT; i++) {
-            try {
-                fetchedDoc = fetcher.fetchContent(top, warcRecorder, contentTags);
-                break;
-            }
-            catch (RateLimitException ex) {
-                timer.waitRetryDelay(ex);
-            }
-            catch (Exception ex) {
-                logger.warn("Failed to fetch {}", top, ex);
-                fetchedDoc = new HttpFetchResult.ResultException(ex);
-            }
-        }
+        HttpFetchResult fetchedDoc = fetchWithRetry(top, timer, HttpFetcher.ProbeType.FULL, contentTags);
 
         // Parse the document and enqueue links
         try {
@@ -328,6 +315,27 @@ public class CrawlerRetreiver implements AutoCloseable {
         return fetchedDoc;
     }
 
+    /** Fetch a document and retry on 429s */
+    private HttpFetchResult fetchWithRetry(EdgeUrl url,
+                                           CrawlDelayTimer timer,
+                                           HttpFetcher.ProbeType probeType,
+                                           ContentTags contentTags) throws InterruptedException {
+        for (int i = 0; i <= HTTP_429_RETRY_LIMIT; i++) {
+            try {
+                return fetcher.fetchContent(url, warcRecorder, contentTags, probeType);
+            }
+            catch (RateLimitException ex) {
+                timer.waitRetryDelay(ex);
+            }
+            catch (Exception ex) {
+                logger.warn("Failed to fetch {}", url, ex);
+                return new HttpFetchResult.ResultException(ex);
+            }
+        }
+
+        return new HttpFetchResult.ResultNone();
+    }
+
     private boolean isAllowedProtocol(String proto) {
         return proto.equalsIgnoreCase("http")
                 || proto.equalsIgnoreCase("https");
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java
index 70576510..fd3dd0dd 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java
@@ -3,8 +3,8 @@ package nu.marginalia.crawl.retreival.fetcher;
 import com.google.inject.ImplementedBy;
 import crawlercommons.robots.SimpleRobotRules;
 import nu.marginalia.crawl.retreival.RateLimitException;
-import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
+import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.model.EdgeDomain;
 import nu.marginalia.model.EdgeUrl;
 
@@ -19,9 +19,18 @@ public interface HttpFetcher {
 
     FetchResult probeDomain(EdgeUrl url);
 
-    HttpFetchResult fetchContent(EdgeUrl url, WarcRecorder recorder, ContentTags tags) throws RateLimitException;
+    HttpFetchResult fetchContent(EdgeUrl url,
+                                 WarcRecorder recorder,
+                                 ContentTags tags,
+                                 ProbeType probeType) throws RateLimitException;
 
     SimpleRobotRules fetchRobotRules(EdgeDomain domain, WarcRecorder recorder);
 
     SitemapRetriever createSitemapRetriever();
+
+    enum ProbeType {
+        DISABLED,
+        FULL,
+        IF_MODIFIED_SINCE
+    }
 }
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java
index 1df0301b..6ec3cd73 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java
@@ -11,10 +11,10 @@ import nu.marginalia.crawl.retreival.fetcher.ContentTypeProber.ContentTypeProbeR
 import nu.marginalia.crawl.retreival.fetcher.socket.FastTerminatingSocketFactory;
 import nu.marginalia.crawl.retreival.fetcher.socket.IpInterceptingNetworkInterceptor;
 import nu.marginalia.crawl.retreival.fetcher.socket.NoSecuritySSL;
-import nu.marginalia.crawling.body.DocumentBodyExtractor;
-import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
 import nu.marginalia.crawling.body.ContentTypeLogic;
+import nu.marginalia.crawling.body.DocumentBodyExtractor;
+import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.model.EdgeDomain;
 import nu.marginalia.model.EdgeUrl;
 import okhttp3.ConnectionPool;
@@ -145,12 +145,13 @@ public class HttpFetcherImpl implements HttpFetcher {
     @SneakyThrows
     public HttpFetchResult fetchContent(EdgeUrl url,
                                            WarcRecorder warcRecorder,
-                                           ContentTags contentTags)
+                                           ContentTags contentTags,
+                                           ProbeType probeType)
     {
 
         // We don't want to waste time and resources on URLs that are not HTML, so if the file ending
         // looks like it might be something else, we perform a HEAD first to check the content type
-        if (contentTags.isEmpty() && contentTypeLogic.isUrlLikeBinary(url))
+        if (probeType == ProbeType.FULL && contentTags.isEmpty() && contentTypeLogic.isUrlLikeBinary(url))
         {
             ContentTypeProbeResult probeResult = contentTypeProber.probeContentType(url);
             if (probeResult instanceof ContentTypeProbeResult.Ok ok) {
@@ -174,7 +175,9 @@ public class HttpFetcherImpl implements HttpFetcher {
         else {
             // Possibly do a soft probe to see if the URL has been modified since the last time we crawled it
             // if we have reason to suspect ETags are not supported by the server.
-            if (softIfModifiedSinceProber.probeModificationTime(url, contentTags)) {
+            if (probeType == ProbeType.IF_MODIFIED_SINCE
+              && softIfModifiedSinceProber.probeModificationTime(url, contentTags))
+            {
                 return new HttpFetchResult.Result304Raw();
             }
         }
diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java
index af4a743f..6b32317d 100644
--- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java
+++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java
@@ -137,7 +137,7 @@ public class CrawlerRevisitor {
 
                 DocumentWithReference reference =  new DocumentWithReference(doc, oldCrawlData);
 
-                var result = crawlerRetreiver.fetchWriteAndSleep(url, delayTimer, reference);
+                var result = crawlerRetreiver.fetchContentWithReference(url, delayTimer, reference);
 
                 if (reference.isSame(result)) {
                     retained++;
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java
index 0873924f..af196da7 100644
--- a/code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java
+++ b/code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java
@@ -3,11 +3,12 @@ package nu.marginalia.crawling;
 import lombok.SneakyThrows;
 import nu.marginalia.crawl.retreival.RateLimitException;
 import nu.marginalia.crawl.retreival.fetcher.ContentTags;
+import nu.marginalia.crawl.retreival.fetcher.HttpFetcher;
 import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl;
-import nu.marginalia.crawling.body.DocumentBodyExtractor;
-import nu.marginalia.crawling.body.DocumentBodyResult;
 import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
 import nu.marginalia.crawling.body.ContentTypeLogic;
+import nu.marginalia.crawling.body.DocumentBodyExtractor;
+import nu.marginalia.crawling.body.DocumentBodyResult;
 import nu.marginalia.model.EdgeUrl;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
@@ -35,7 +36,7 @@ class HttpFetcherTest {
     void fetchUTF8() throws URISyntaxException, RateLimitException, IOException {
         var fetcher = new HttpFetcherImpl("nu.marginalia.edge-crawler");
         try (var recorder = new WarcRecorder()) {
-            var result = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu"), recorder, ContentTags.empty());
+            var result = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu"), recorder, ContentTags.empty(), HttpFetcher.ProbeType.FULL);
             if (DocumentBodyExtractor.asString(result) instanceof DocumentBodyResult.Ok bodyOk) {
                 System.out.println(bodyOk.contentType());
             }
@@ -47,7 +48,7 @@ class HttpFetcherTest {
         var fetcher = new HttpFetcherImpl("nu.marginalia.edge-crawler");
 
         try (var recorder = new WarcRecorder()) {
-            var result = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu/robots.txt"), recorder, ContentTags.empty());
+            var result = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu/robots.txt"), recorder, ContentTags.empty(), HttpFetcher.ProbeType.FULL);
             if (DocumentBodyExtractor.asString(result) instanceof DocumentBodyResult.Ok bodyOk) {
                 System.out.println(bodyOk.contentType());
             }
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
index 749b821c..01534385 100644
--- a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
+++ b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java
@@ -5,8 +5,8 @@ import lombok.SneakyThrows;
 import nu.marginalia.crawl.retreival.CrawlerRetreiver;
 import nu.marginalia.crawl.retreival.DomainProber;
 import nu.marginalia.crawl.retreival.fetcher.*;
-import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
+import nu.marginalia.crawling.body.HttpFetchResult;
 import nu.marginalia.crawling.model.CrawledDocument;
 import nu.marginalia.crawling.model.CrawlerDocumentStatus;
 import nu.marginalia.crawling.model.SerializableCrawlData;
@@ -23,7 +23,10 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 public class CrawlerMockFetcherTest {
 
@@ -119,7 +122,7 @@ public class CrawlerMockFetcherTest {
 
         @SneakyThrows
         @Override
-        public HttpFetchResult fetchContent(EdgeUrl url, WarcRecorder recorder, ContentTags tags) {
+        public HttpFetchResult fetchContent(EdgeUrl url, WarcRecorder recorder, ContentTags tags, ProbeType probeType) {
             logger.info("Fetching {}", url);
             if (mockData.containsKey(url)) {
                 byte[] bodyBytes = mockData.get(url).documentBody.getBytes();
diff --git a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
index aa1f00e7..a6df0791 100644
--- a/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
+++ b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java
@@ -261,6 +261,7 @@ class CrawlerRetreiverTest {
                         .collect(Collectors.toSet());
 
         assertEquals(Set.of("https://www.marginalia.nu/",
+                            "https://www.marginalia.nu/favicon.ico",
                             "https://www.marginalia.nu/log/06-optimization.gmi/"),
                     fetchedUrls);