mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(crawler) Fix rare ConcurrentModificationError due to HashSet
This commit is contained in:
parent
58556af6c7
commit
e5c9791b14
@ -54,7 +54,7 @@ public class CrawlerMain {
|
||||
private final Gson gson;
|
||||
private final DumbThreadPool pool;
|
||||
|
||||
private final Set<String> processingIds = new HashSet<>();
|
||||
private final Map<String, String> processingIds = new ConcurrentHashMap<>();
|
||||
private final CrawledDomainReader reader = new CrawledDomainReader();
|
||||
|
||||
final AbortMonitor abortMonitor = AbortMonitor.getInstance();
|
||||
@ -148,7 +148,7 @@ public class CrawlerMain {
|
||||
// This shouldn't realistically happen, but if it does, we need to ignore it, otherwise
|
||||
// we'd end crawling the same site twice and might end up writing to the same output
|
||||
// file from multiple threads with complete bit salad as a result.
|
||||
if (!processingIds.add(crawlingSpecification.id)) {
|
||||
if (processingIds.put(crawlingSpecification.id, "") != null) {
|
||||
logger.error("Ignoring duplicate id: {}", crawlingSpecification.id);
|
||||
continue;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user