From 162fc25ebc0a3776d827133ed00a3bff989d317b Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 23 Sep 2024 18:03:09 +0200 Subject: [PATCH] (minor) Fix accidental commit errors --- .../atags/source/AnchorTagsSourceFactory.java | 10 +++++++++- .../CrawlingThenConvertingIntegrationTest.java | 2 ++ .../nu/marginalia/crawl/fetcher/HttpFetcherImpl.java | 2 +- .../CrawledDocumentParquetRecordFileWriterTest.java | 1 + .../screenshot/ScreenshotCaptureToolMain.java | 2 +- 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java index 93f774d4..aaed5ace 100644 --- a/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java +++ b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java @@ -20,6 +20,7 @@ public class AnchorTagsSourceFactory { private final int nodeId; private final HikariDataSource dataSource; private static final Logger logger = LoggerFactory.getLogger(AnchorTagsSourceFactory.class); + @Inject public AnchorTagsSourceFactory(HikariDataSource dataSource, ProcessConfiguration config) @@ -30,7 +31,14 @@ public class AnchorTagsSourceFactory { } public AnchorTagsSource create() throws SQLException { - return create(getRelevantDomainsByNodeAffinity()); + try { + return create(getRelevantDomainsByNodeAffinity()); + } + catch (Exception e) { + // likely a test environment + logger.warn("Failed to create anchor tags source", e); + return domain -> new DomainLinks(); + } } public AnchorTagsSource create(List relevantDomains) throws SQLException { diff --git a/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java index b433a276..6ab40a90 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java @@ -105,6 +105,8 @@ public class CrawlingThenConvertingIntegrationTest { CrawledDomain crawlData = crawl(specs); + System.out.println(crawlData); + assertEquals("REDIRECT", crawlData.crawlerStatus); assertEquals("www.marginalia.nu", crawlData.redirectDomain); assertTrue(crawlData.doc.isEmpty()); diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java index 567d9d2d..561cdad6 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java @@ -127,7 +127,7 @@ public class HttpFetcherImpl implements HttpFetcher { EdgeUrl requestUrl = new EdgeUrl(rsp.request().url().toString()); if (!Objects.equals(requestUrl.domain, url.domain)) { - return new ProbeResultRedirect(url.domain); + return new ProbeResultRedirect(requestUrl.domain); } return new ProbeResultOk(requestUrl); } diff --git a/code/processes/crawling-process/model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java b/code/processes/crawling-process/model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java index 0da0f6d8..cef2a0f2 100644 --- a/code/processes/crawling-process/model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java +++ b/code/processes/crawling-process/model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java @@ -42,6 +42,7 @@ class CrawledDocumentParquetRecordFileWriterTest { Instant.now(), "text/html", "hello world".getBytes(), + null, null, null); // Write the record to a file diff --git a/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java b/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java index 169df4dc..8432b80e 100644 --- a/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java +++ b/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java @@ -96,7 +96,7 @@ public class ScreenshotCaptureToolMain { private static byte[] fetchDomain(HttpClient client, EdgeDomain domain) { try { Map requestData = Map.of( - "url", domain.toRootUrlHttp().toString(), + "url", domain.toRootUrl().toString(), "options", Map.of("fullPage", false, "type", "png"),