mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Minor: Don't blow up the reader on a corrupted file
This commit is contained in:
parent
96eecc6ea5
commit
da8bcc6e24
@ -15,6 +15,7 @@ import java.io.InputStreamReader;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@ -58,14 +59,13 @@ public class CrawledDomainReader {
|
||||
}
|
||||
}
|
||||
|
||||
public CrawledDomain readRuntimeExcept(Path path) {
|
||||
public Optional<CrawledDomain> readOptionally(Path path) {
|
||||
try {
|
||||
return read(path);
|
||||
return Optional.of(read(path));
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Failed to read domain", ex);
|
||||
|
||||
throw new RuntimeException(ex);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@ import java.util.Iterator;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
@AllArgsConstructor @NoArgsConstructor @ToString
|
||||
public class CrawlPlan {
|
||||
@ -95,7 +96,9 @@ public class CrawlPlan {
|
||||
entryStream
|
||||
.map(WorkLogEntry::path)
|
||||
.map(this::getCrawledFilePath)
|
||||
.map(reader::readRuntimeExcept)
|
||||
.map(reader::readOptionally)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(consumer);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
@ -119,11 +122,13 @@ public class CrawlPlan {
|
||||
}
|
||||
return true;
|
||||
})
|
||||
.map(reader::readRuntimeExcept)
|
||||
.map(reader::readOptionally)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(consumer);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.warn("Failed to read domains", ex);
|
||||
logger.error("Failed to read domains", ex);
|
||||
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
@ -141,7 +146,9 @@ public class CrawlPlan {
|
||||
stream = WorkLog.streamLog(crawl.getLogFile())
|
||||
.map(WorkLogEntry::path)
|
||||
.map(CrawlPlan.this::getCrawledFilePath)
|
||||
.map(reader::readRuntimeExcept);
|
||||
.map(reader::readOptionally)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Loading…
Reference in New Issue
Block a user