From 1755b646b8c2fe59ab8319dcc775d3cab7c6ee13 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 25 Dec 2023 00:48:42 +0100 Subject: [PATCH] (warc) Fix NPE in WarcRecorder --- .../crawl/retreival/fetcher/warc/WarcRecorder.java | 9 ++++++++- .../crawl/retreival/fetcher/WarcRecorderTest.java | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java index e31585ef..1bd640ca 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java @@ -203,7 +203,14 @@ public class WarcRecorder implements AutoCloseable { WarcDigestBuilder responseDigestBuilder = new WarcDigestBuilder(); WarcDigestBuilder payloadDigestBuilder = new WarcDigestBuilder(); - byte[] bytes = documentBody.getBytes(); + byte[] bytes; + + if (documentBody == null) { + bytes = new byte[0]; + } + else { + bytes = documentBody.getBytes(); + } String fakeHeaders = STR.""" Content-Type: \{contentType} diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java b/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java index cdc10bd2..4c533b13 100644 --- a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java +++ b/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java @@ -91,6 +91,18 @@ class WarcRecorderTest { } } + @Test + public void flagAsSkippedNullBody() throws IOException, URISyntaxException { + + try (var recorder = new WarcRecorder(fileNameWarc)) { + recorder.flagAsSkipped(new EdgeUrl("https://www.marginalia.nu/"), + "text/html", + 200, + null); + } + + } + @Test public void testSaveImport() throws URISyntaxException, IOException { try (var recorder = new WarcRecorder(fileNameWarc)) {