mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(crawler) Properly enqueue links from the root document in the crawler
This commit is contained in:
parent
eb60ddb729
commit
01a16ff388
@ -222,7 +222,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
|
WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
|
||||||
AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
|
AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
|
||||||
) {
|
) {
|
||||||
var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, null);
|
var spec = new CrawlSpecProvider.CrawlSpecRecord(targetDomainName, 1000, List.of());
|
||||||
var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
|
var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
|
||||||
task.run();
|
task.run();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user