From 8862100f7ea71b3a818f8938ec62b3523def9c8f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 21 Jan 2025 21:44:21 +0100 Subject: [PATCH] (crawler) Improve logging and error handling --- .../java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java | 2 -- .../java/nu/marginalia/crawl/fetcher/warc/WarcRecorder.java | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java index 3c330fb4..4b0d5158 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java @@ -23,12 +23,10 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; -import java.net.URLDecoder; import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.net.http.HttpTimeoutException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.*; import java.util.concurrent.Executors; diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/warc/WarcRecorder.java b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/warc/WarcRecorder.java index 464ee91b..9842f392 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/warc/WarcRecorder.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/warc/WarcRecorder.java @@ -96,7 +96,7 @@ public class WarcRecorder implements AutoCloseable { try { response = client.send(request, java.net.http.HttpResponse.BodyHandlers.ofInputStream()); } - catch (IOException ex) { + catch (Exception ex) { logger.warn("Failed to fetch URL {}: {}", requestUri, ex.getMessage()); return new HttpFetchResult.ResultException(ex); }