mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Remove annoying log spam in crawler retriever
This commit is contained in:
parent
8274e8a953
commit
11c26e700e
@ -227,12 +227,12 @@ public class CrawlerRetreiver {
|
||||
crawlFrontier.addAllToQueue(sitemap);
|
||||
}
|
||||
|
||||
logger.info("Queue is now {}", crawlFrontier.queueSize());
|
||||
logger.debug("Queue is now {}", crawlFrontier.queueSize());
|
||||
}
|
||||
|
||||
private void sniffRootDocument() {
|
||||
try {
|
||||
logger.info("Configuring link filter");
|
||||
logger.debug("Configuring link filter");
|
||||
|
||||
var url = crawlFrontier.peek();
|
||||
|
||||
@ -241,6 +241,9 @@ public class CrawlerRetreiver {
|
||||
return;
|
||||
var sample = maybeSample.get();
|
||||
|
||||
if (sample.documentBody == null)
|
||||
return;
|
||||
|
||||
// Sniff the software based on the sample document
|
||||
var doc = Jsoup.parse(sample.documentBody.decode());
|
||||
crawlFrontier.setLinkFilter(linkFilterSelector.selectFilter(doc));
|
||||
|
Loading…
Reference in New Issue
Block a user