mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(crawler) Properly enqueue links from the root document in the crawler
This commit is contained in:
parent
eb60ddb729
commit
01a16ff388
@ -222,7 +222,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
|
||||
AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
|
||||
) {
|
||||
var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, null);
|
||||
var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, List.of());
|
||||
var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
|
||||
task.run();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user