diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java index e1df86c8..13da0975 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java @@ -18,7 +18,38 @@ public record ContentTags(String etag, String lastMod) { /** Paints the tags onto the request builder. */ public void paint(Request.Builder getBuilder) { - if (etag != null) getBuilder.addHeader("If-None-Match", etag); - if (lastMod != null) getBuilder.addHeader("If-Modified-Since", lastMod); + + System.out.println(ifNoneMatch() + " " + ifModifiedSince()); + + if (etag != null) { + getBuilder.addHeader("If-None-Match", ifNoneMatch()); + } + + if (lastMod != null) { + getBuilder.addHeader("If-Modified-Since", ifModifiedSince()); + } + } + + private String ifNoneMatch() { + // Remove the W/ prefix if it exists + + //'W/' (case-sensitive) indicates that a weak validator is used. Weak etags are + // easy to generate, but are far less useful for comparisons. Strong validators + // are ideal for comparisons but can be very difficult to generate efficiently. + // Weak ETag values of two representations of the same resources might be semantically + // equivalent, but not byte-for-byte identical. This means weak etags prevent caching + // when byte range requests are used, but strong etags mean range requests can + // still be cached. + // - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag + + if (null != etag && etag.startsWith("W/")) { + return etag.substring(2); + } else { + return etag; + } + } + + private String ifModifiedSince() { + return lastMod; } }