From 4565bfe35907f1b210dd11cb92d15261738b94e6 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 26 Sep 2024 18:30:29 +0200 Subject: [PATCH] (crawler) Make the crawler report crawling progress correctly when stopped and resumed. --- .../java/nu/marginalia/crawl/CrawlerMain.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java index 9577f2c1..81ebc3b7 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java @@ -180,6 +180,11 @@ public class CrawlerMain extends ProcessMainClass { WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir); AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(specProvider.getDomains()) ) { + // Set the number of tasks done to the number of tasks that are already finished, + // (this happens when the process is restarted after a crash or a shutdown) + tasksDone.set(workLog.countFinishedJobs()); + + // Process the crawl tasks try (var specStream = specProvider.stream()) { specStream .takeWhile((e) -> abortMonitor.isAlive())