diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java b/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java index 744236c0..47dec05f 100644 --- a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java +++ b/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java @@ -15,6 +15,7 @@ import java.io.InputStreamReader; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.TimeUnit; @@ -58,14 +59,13 @@ public class CrawledDomainReader { } } - public CrawledDomain readRuntimeExcept(Path path) { + public Optional readOptionally(Path path) { try { - return read(path); + return Optional.of(read(path)); } catch (Exception ex) { logger.warn("Failed to read domain", ex); - - throw new RuntimeException(ex); + return Optional.empty(); } } diff --git a/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java b/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java index c46ed854..ff299d68 100644 --- a/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java +++ b/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.function.Consumer; import java.util.function.Predicate; import java.util.stream.Stream; +import java.util.Optional; @AllArgsConstructor @NoArgsConstructor @ToString public class CrawlPlan { @@ -95,7 +96,9 @@ public class CrawlPlan { entryStream .map(WorkLogEntry::path) .map(this::getCrawledFilePath) - .map(reader::readRuntimeExcept) + .map(reader::readOptionally) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(consumer); } catch (IOException ex) { @@ -119,11 +122,13 @@ public class CrawlPlan { } return true; }) - .map(reader::readRuntimeExcept) + .map(reader::readOptionally) + .filter(Optional::isPresent) + .map(Optional::get) .forEach(consumer); } catch (IOException ex) { - logger.warn("Failed to read domains", ex); + logger.error("Failed to read domains", ex); throw new RuntimeException(ex); } @@ -141,7 +146,9 @@ public class CrawlPlan { stream = WorkLog.streamLog(crawl.getLogFile()) .map(WorkLogEntry::path) .map(CrawlPlan.this::getCrawledFilePath) - .map(reader::readRuntimeExcept); + .map(reader::readOptionally) + .filter(Optional::isPresent) + .map(Optional::get); } @Override