From 01a16ff3889d0333080aa9668787c208aa82de4f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sat, 5 Oct 2024 17:55:59 +0200 Subject: [PATCH] (crawler) Properly enqueue links from the root document in the crawler --- .../crawling-process/java/nu/marginalia/crawl/CrawlerMain.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java index 4307952d..83965a88 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java @@ -222,7 +222,7 @@ public class CrawlerMain extends ProcessMainClass { WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir); AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName))) ) { - var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, null); + var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, List.of()); var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog); task.run(); }