mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Minor: Don't blow up the reader on a corrupted file
This commit is contained in:
parent
96eecc6ea5
commit
da8bcc6e24
@ -15,6 +15,7 @@ import java.io.InputStreamReader;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.concurrent.ForkJoinPool;
|
import java.util.concurrent.ForkJoinPool;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
@ -58,14 +59,13 @@ public class CrawledDomainReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public CrawledDomain readRuntimeExcept(Path path) {
|
public Optional<CrawledDomain> readOptionally(Path path) {
|
||||||
try {
|
try {
|
||||||
return read(path);
|
return Optional.of(read(path));
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
logger.warn("Failed to read domain", ex);
|
logger.warn("Failed to read domain", ex);
|
||||||
|
return Optional.empty();
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ import java.util.Iterator;
|
|||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
@AllArgsConstructor @NoArgsConstructor @ToString
|
@AllArgsConstructor @NoArgsConstructor @ToString
|
||||||
public class CrawlPlan {
|
public class CrawlPlan {
|
||||||
@ -95,7 +96,9 @@ public class CrawlPlan {
|
|||||||
entryStream
|
entryStream
|
||||||
.map(WorkLogEntry::path)
|
.map(WorkLogEntry::path)
|
||||||
.map(this::getCrawledFilePath)
|
.map(this::getCrawledFilePath)
|
||||||
.map(reader::readRuntimeExcept)
|
.map(reader::readOptionally)
|
||||||
|
.filter(Optional::isPresent)
|
||||||
|
.map(Optional::get)
|
||||||
.forEach(consumer);
|
.forEach(consumer);
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
catch (IOException ex) {
|
||||||
@ -119,11 +122,13 @@ public class CrawlPlan {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
})
|
})
|
||||||
.map(reader::readRuntimeExcept)
|
.map(reader::readOptionally)
|
||||||
|
.filter(Optional::isPresent)
|
||||||
|
.map(Optional::get)
|
||||||
.forEach(consumer);
|
.forEach(consumer);
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
catch (IOException ex) {
|
||||||
logger.warn("Failed to read domains", ex);
|
logger.error("Failed to read domains", ex);
|
||||||
|
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
@ -141,7 +146,9 @@ public class CrawlPlan {
|
|||||||
stream = WorkLog.streamLog(crawl.getLogFile())
|
stream = WorkLog.streamLog(crawl.getLogFile())
|
||||||
.map(WorkLogEntry::path)
|
.map(WorkLogEntry::path)
|
||||||
.map(CrawlPlan.this::getCrawledFilePath)
|
.map(CrawlPlan.this::getCrawledFilePath)
|
||||||
.map(reader::readRuntimeExcept);
|
.map(reader::readOptionally)
|
||||||
|
.filter(Optional::isPresent)
|
||||||
|
.map(Optional::get);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Loading…
Reference in New Issue
Block a user