Minor: Don't blow up the reader on a corrupted file

This commit is contained in:
Viktor Lofgren 2023-07-06 20:18:09 +02:00
parent 96eecc6ea5
commit da8bcc6e24
2 changed files with 15 additions and 8 deletions

View File

@ -15,6 +15,7 @@ import java.io.InputStreamReader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.TimeUnit;
@ -58,14 +59,13 @@ public class CrawledDomainReader {
}
}
public CrawledDomain readRuntimeExcept(Path path) {
public Optional<CrawledDomain> readOptionally(Path path) {
try {
return read(path);
return Optional.of(read(path));
}
catch (Exception ex) {
logger.warn("Failed to read domain", ex);
throw new RuntimeException(ex);
return Optional.empty();
}
}

View File

@ -21,6 +21,7 @@ import java.util.Iterator;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Stream;
import java.util.Optional;
@AllArgsConstructor @NoArgsConstructor @ToString
public class CrawlPlan {
@ -95,7 +96,9 @@ public class CrawlPlan {
entryStream
.map(WorkLogEntry::path)
.map(this::getCrawledFilePath)
.map(reader::readRuntimeExcept)
.map(reader::readOptionally)
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(consumer);
}
catch (IOException ex) {
@ -119,11 +122,13 @@ public class CrawlPlan {
}
return true;
})
.map(reader::readRuntimeExcept)
.map(reader::readOptionally)
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(consumer);
}
catch (IOException ex) {
logger.warn("Failed to read domains", ex);
logger.error("Failed to read domains", ex);
throw new RuntimeException(ex);
}
@ -141,7 +146,9 @@ public class CrawlPlan {
stream = WorkLog.streamLog(crawl.getLogFile())
.map(WorkLogEntry::path)
.map(CrawlPlan.this::getCrawledFilePath)
.map(reader::readRuntimeExcept);
.map(reader::readOptionally)
.filter(Optional::isPresent)
.map(Optional::get);
}
@Override