diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java index 62560d83..5ac9cf21 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java @@ -139,7 +139,7 @@ public class HttpFetcherImpl implements HttpFetcher { public ContentTypeProbeResult probeContentType(EdgeUrl url, WarcRecorder warcRecorder, ContentTags tags) throws RateLimitException { - if (tags.isEmpty()) { + if (tags.isEmpty() && contentTypeLogic.isUrlLikeBinary(url)) { var headBuilder = new Request.Builder().head() .addHeader("User-agent", userAgentString) .addHeader("Accept-Encoding", "gzip")