mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(live-crawl) Flag URLs that don't pass robots.txt as bad so we don't keep fetching robots.txt every day for an empty link list
This commit is contained in:
parent
923ebbac81
commit
88caca60f9
@ -92,6 +92,7 @@ public class SimpleLinkScraper implements AutoCloseable {
|
||||
|
||||
EdgeUrl parsedUrl = optParsedUrl.get();
|
||||
if (!rules.isAllowed(url)) {
|
||||
maybeFlagAsBad(parsedUrl);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user