mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(crawler) Fix rare ConcurrentModificationError due to HashSet
This commit is contained in:
parent
58556af6c7
commit
e5c9791b14
@ -54,7 +54,7 @@ public class CrawlerMain {
|
|||||||
private final Gson gson;
|
private final Gson gson;
|
||||||
private final DumbThreadPool pool;
|
private final DumbThreadPool pool;
|
||||||
|
|
||||||
private final Set<String> processingIds = new HashSet<>();
|
private final Map<String, String> processingIds = new ConcurrentHashMap<>();
|
||||||
private final CrawledDomainReader reader = new CrawledDomainReader();
|
private final CrawledDomainReader reader = new CrawledDomainReader();
|
||||||
|
|
||||||
final AbortMonitor abortMonitor = AbortMonitor.getInstance();
|
final AbortMonitor abortMonitor = AbortMonitor.getInstance();
|
||||||
@ -148,7 +148,7 @@ public class CrawlerMain {
|
|||||||
// This shouldn't realistically happen, but if it does, we need to ignore it, otherwise
|
// This shouldn't realistically happen, but if it does, we need to ignore it, otherwise
|
||||||
// we'd end crawling the same site twice and might end up writing to the same output
|
// we'd end crawling the same site twice and might end up writing to the same output
|
||||||
// file from multiple threads with complete bit salad as a result.
|
// file from multiple threads with complete bit salad as a result.
|
||||||
if (!processingIds.add(crawlingSpecification.id)) {
|
if (processingIds.put(crawlingSpecification.id, "") != null) {
|
||||||
logger.error("Ignoring duplicate id: {}", crawlingSpecification.id);
|
logger.error("Ignoring duplicate id: {}", crawlingSpecification.id);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user