mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(converter) Add upper 128KB limit to how much HTML we'll parse
This commit is contained in:
parent
33c2188c87
commit
fdec565b34
@ -107,6 +107,10 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
|||||||
throw new DisqualifiedException(DisqualificationReason.LANGUAGE);
|
throw new DisqualifiedException(DisqualificationReason.LANGUAGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (documentBody.length() > 128_000) { // 128kb
|
||||||
|
documentBody = documentBody.substring(0, 128_000);
|
||||||
|
}
|
||||||
|
|
||||||
Document doc = Jsoup.parse(documentBody);
|
Document doc = Jsoup.parse(documentBody);
|
||||||
|
|
||||||
if (!metaRobotsTag.allowIndexingByMetaTag(doc)) {
|
if (!metaRobotsTag.allowIndexingByMetaTag(doc)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user