mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Big brain web developers were using onload and onerror handlers to load JS without script tags...
This commit is contained in:
parent
7d86586594
commit
d2fdaafc7a
@ -36,8 +36,15 @@ public class DocumentValuator {
|
|||||||
var scriptVisitor = new ScriptVisitor();
|
var scriptVisitor = new ScriptVisitor();
|
||||||
|
|
||||||
parsed.getElementsByTag("script").traverse(scriptVisitor);
|
parsed.getElementsByTag("script").traverse(scriptVisitor);
|
||||||
|
int value = scriptVisitor.score();
|
||||||
|
|
||||||
return scriptVisitor.score();
|
for (var links : parsed.head().getElementsByTag("link")) {
|
||||||
|
if (links.hasAttr("onerror") || links.hasAttr("onload")) {
|
||||||
|
value += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ScriptVisitor implements NodeVisitor {
|
private static class ScriptVisitor implements NodeVisitor {
|
||||||
@ -56,7 +63,6 @@ public class DocumentValuator {
|
|||||||
}
|
}
|
||||||
else if (node instanceof TextNode tn) {
|
else if (node instanceof TextNode tn) {
|
||||||
visitScriptText(tn);
|
visitScriptText(tn);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,8 +79,7 @@ public class DocumentValuator {
|
|||||||
String srcAttr = el.attr("src");
|
String srcAttr = el.attr("src");
|
||||||
if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) {
|
if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) {
|
||||||
penalty += 0.49;
|
penalty += 0.49;
|
||||||
}
|
} else if (!Strings.isBlank(srcAttr)) {
|
||||||
else if (!Strings.isBlank(srcAttr)) {
|
|
||||||
penalty += 1;
|
penalty += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,6 +76,19 @@ public class FeatureExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 500 IQ web developers use <link> error or load handlers
|
||||||
|
// sneakily load JS without explicit script tags
|
||||||
|
for (var link : doc.head().getElementsByTag("link")) {
|
||||||
|
if (link.hasAttr("onerror")) {
|
||||||
|
features.add(HtmlFeature.JS);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (link.hasAttr("onload")) {
|
||||||
|
features.add(HtmlFeature.JS);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (features.contains(HtmlFeature.JS) && adblockSimulator.hasAds(doc.clone())) {
|
if (features.contains(HtmlFeature.JS) && adblockSimulator.hasAds(doc.clone())) {
|
||||||
features.add(HtmlFeature.ADVERTISEMENT);
|
features.add(HtmlFeature.ADVERTISEMENT);
|
||||||
}
|
}
|
||||||
@ -117,8 +130,13 @@ public class FeatureExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean hasTrackingScript(Element scriptTag) {
|
private boolean hasTrackingScript(Element scriptTag) {
|
||||||
|
return hasTrackingScript(scriptTag.attr("src"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasTrackingScript(String scriptText) {
|
||||||
|
|
||||||
for (var tracker : trackers) {
|
for (var tracker : trackers) {
|
||||||
if (scriptTag.attr("src").contains(tracker)) {
|
if (scriptText.contains(tracker)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user