From a497e4c920135ea424ee21b0c1614fe514cd6a3f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 26 Oct 2023 12:49:28 +0200 Subject: [PATCH] (crawler) Terminate crawler after a few hours of no progress --- .../util/SimpleBlockingThreadPool.java | 6 +++++- .../java/nu/marginalia/crawl/CrawlerMain.java | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java b/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java index 294bf88e..876ecef9 100644 --- a/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java +++ b/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java @@ -3,6 +3,7 @@ package nu.marginalia.util; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ArrayBlockingQueue; @@ -47,12 +48,15 @@ public class SimpleBlockingThreadPool { this.shutDown = true; } - public void shutDownNow() { + public void shutDownNow() throws InterruptedException { this.shutDown = true; tasks.clear(); for (Thread worker : workers) { worker.interrupt(); } + for (Thread worker : workers) { + worker.join(Duration.ofMinutes(5)); + } } private void worker() { diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java index 5b9cb175..99829868 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java @@ -151,12 +151,20 @@ public class CrawlerMain { logger.info("Shutting down the pool, waiting for tasks to complete..."); pool.shutDown(); - do { - System.out.println("Waiting for pool to terminate... " + pool.getActiveCount() + " remaining"); - } while (!pool.awaitTermination(60, TimeUnit.SECONDS)); + int activePoolCount = pool.getActiveCount(); + + while (!pool.awaitTermination(5, TimeUnit.HOURS)) { + int newActivePoolCount = pool.getActiveCount(); + if (activePoolCount == newActivePoolCount) { + logger.warn("Aborting the last {} jobs of the crawl, taking too long", newActivePoolCount); + pool.shutDownNow(); + } else { + activePoolCount = newActivePoolCount; + } + } } catch (Exception ex) { - + logger.warn("Exception in crawler", ex); } finally { heartbeat.shutDown();