Remove annoying log spam in crawler retriever

This commit is contained in:
Viktor Lofgren 2023-06-30 17:08:24 +02:00
parent 8274e8a953
commit 11c26e700e

View File

@ -227,12 +227,12 @@ public class CrawlerRetreiver {
crawlFrontier.addAllToQueue(sitemap);
}
logger.info("Queue is now {}", crawlFrontier.queueSize());
logger.debug("Queue is now {}", crawlFrontier.queueSize());
}
private void sniffRootDocument() {
try {
logger.info("Configuring link filter");
logger.debug("Configuring link filter");
var url = crawlFrontier.peek();
@ -241,6 +241,9 @@ public class CrawlerRetreiver {
return;
var sample = maybeSample.get();
if (sample.documentBody == null)
return;
// Sniff the software based on the sample document
var doc = Jsoup.parse(sample.documentBody.decode());
crawlFrontier.setLinkFilter(linkFilterSelector.selectFilter(doc));