mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Remove annoying log spam in crawler retriever
This commit is contained in:
parent
8274e8a953
commit
11c26e700e
@ -227,12 +227,12 @@ public class CrawlerRetreiver {
|
|||||||
crawlFrontier.addAllToQueue(sitemap);
|
crawlFrontier.addAllToQueue(sitemap);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Queue is now {}", crawlFrontier.queueSize());
|
logger.debug("Queue is now {}", crawlFrontier.queueSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void sniffRootDocument() {
|
private void sniffRootDocument() {
|
||||||
try {
|
try {
|
||||||
logger.info("Configuring link filter");
|
logger.debug("Configuring link filter");
|
||||||
|
|
||||||
var url = crawlFrontier.peek();
|
var url = crawlFrontier.peek();
|
||||||
|
|
||||||
@ -241,6 +241,9 @@ public class CrawlerRetreiver {
|
|||||||
return;
|
return;
|
||||||
var sample = maybeSample.get();
|
var sample = maybeSample.get();
|
||||||
|
|
||||||
|
if (sample.documentBody == null)
|
||||||
|
return;
|
||||||
|
|
||||||
// Sniff the software based on the sample document
|
// Sniff the software based on the sample document
|
||||||
var doc = Jsoup.parse(sample.documentBody.decode());
|
var doc = Jsoup.parse(sample.documentBody.decode());
|
||||||
crawlFrontier.setLinkFilter(linkFilterSelector.selectFilter(doc));
|
crawlFrontier.setLinkFilter(linkFilterSelector.selectFilter(doc));
|
||||||
|
Loading…
Reference in New Issue
Block a user