From c9f029c214c3e1f84cd5fbaed25a6523318d0279 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 22 Apr 2024 14:31:05 +0200 Subject: [PATCH] (crawler) Strip W/-prefix from the etag when supplied as If-None-Match --- .../crawl/retreival/fetcher/ContentTags.java | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java index e1df86c8..13da0975 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java @@ -18,7 +18,38 @@ public record ContentTags(String etag, String lastMod) { /** Paints the tags onto the request builder. */ public void paint(Request.Builder getBuilder) { - if (etag != null) getBuilder.addHeader("If-None-Match", etag); - if (lastMod != null) getBuilder.addHeader("If-Modified-Since", lastMod); + + System.out.println(ifNoneMatch() + " " + ifModifiedSince()); + + if (etag != null) { + getBuilder.addHeader("If-None-Match", ifNoneMatch()); + } + + if (lastMod != null) { + getBuilder.addHeader("If-Modified-Since", ifModifiedSince()); + } + } + + private String ifNoneMatch() { + // Remove the W/ prefix if it exists + + //'W/' (case-sensitive) indicates that a weak validator is used. Weak etags are + // easy to generate, but are far less useful for comparisons. Strong validators + // are ideal for comparisons but can be very difficult to generate efficiently. + // Weak ETag values of two representations of the same resources might be semantically + // equivalent, but not byte-for-byte identical. This means weak etags prevent caching + // when byte range requests are used, but strong etags mean range requests can + // still be cached. + // - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag + + if (null != etag && etag.startsWith("W/")) { + return etag.substring(2); + } else { + return etag; + } + } + + private String ifModifiedSince() { + return lastMod; } }