(crawler) Properly enqueue links from the root document in the crawler

This commit is contained in:
Viktor Lofgren 2024-10-05 17:55:59 +02:00
parent eb60ddb729
commit 01a16ff388

View File

@ -222,7 +222,7 @@ public class CrawlerMain extends ProcessMainClass {
WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir); WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName))) AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
) { ) {
var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, null); var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, List.of());
var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog); var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
task.run(); task.run();
} }