From d167ad20172f513c582e19ff3053d0f4b554c26e Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 27 Jun 2023 13:59:47 +0200 Subject: [PATCH] Remove sitemap related log spam --- .../java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java | 2 -- .../marginalia/crawl/retreival/fetcher/SitemapRetriever.java | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java index 9a32e3e9..78514a27 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java @@ -225,8 +225,6 @@ public class CrawlerRetreiver { checkedSitemaps.add(url.path); crawlFrontier.addAllToQueue(sitemap); - - sitemap.forEach(u -> System.out.println("u" + u)); } logger.info("Queue is now {}", crawlFrontier.queueSize()); diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java index 6eab5ac1..1b717186 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java @@ -6,6 +6,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.inject.Singleton; +import java.io.FileNotFoundException; import java.util.*; @Singleton @@ -20,6 +21,9 @@ public class SitemapRetriever { try { return sitemapToUrls(parser.parseSiteMap(sitemapUrl.asURL())); } + catch (FileNotFoundException ex) { + return Collections.emptyList(); + } catch (UnknownFormatException ex) { logger.debug("Unknown sitemap format: {}", sitemapUrl); return Collections.emptyList();