Big brain web developers were using onload and onerror handlers to load JS without script tags...

This commit is contained in:
Viktor Lofgren 2023-06-30 17:10:25 +02:00
parent 7d86586594
commit d2fdaafc7a
2 changed files with 28 additions and 5 deletions

View File

@ -36,8 +36,15 @@ public class DocumentValuator {
var scriptVisitor = new ScriptVisitor(); var scriptVisitor = new ScriptVisitor();
parsed.getElementsByTag("script").traverse(scriptVisitor); parsed.getElementsByTag("script").traverse(scriptVisitor);
int value = scriptVisitor.score();
return scriptVisitor.score(); for (var links : parsed.head().getElementsByTag("link")) {
if (links.hasAttr("onerror") || links.hasAttr("onload")) {
value += 1;
}
}
return value;
} }
private static class ScriptVisitor implements NodeVisitor { private static class ScriptVisitor implements NodeVisitor {
@ -56,7 +63,6 @@ public class DocumentValuator {
} }
else if (node instanceof TextNode tn) { else if (node instanceof TextNode tn) {
visitScriptText(tn); visitScriptText(tn);
} }
} }
@ -73,8 +79,7 @@ public class DocumentValuator {
String srcAttr = el.attr("src"); String srcAttr = el.attr("src");
if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) { if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) {
penalty += 0.49; penalty += 0.49;
} } else if (!Strings.isBlank(srcAttr)) {
else if (!Strings.isBlank(srcAttr)) {
penalty += 1; penalty += 1;
} }
} }

View File

@ -76,6 +76,19 @@ public class FeatureExtractor {
} }
} }
// 500 IQ web developers use <link> error or load handlers
// sneakily load JS without explicit script tags
for (var link : doc.head().getElementsByTag("link")) {
if (link.hasAttr("onerror")) {
features.add(HtmlFeature.JS);
break;
}
if (link.hasAttr("onload")) {
features.add(HtmlFeature.JS);
break;
}
}
if (features.contains(HtmlFeature.JS) && adblockSimulator.hasAds(doc.clone())) { if (features.contains(HtmlFeature.JS) && adblockSimulator.hasAds(doc.clone())) {
features.add(HtmlFeature.ADVERTISEMENT); features.add(HtmlFeature.ADVERTISEMENT);
} }
@ -117,8 +130,13 @@ public class FeatureExtractor {
} }
private boolean hasTrackingScript(Element scriptTag) { private boolean hasTrackingScript(Element scriptTag) {
return hasTrackingScript(scriptTag.attr("src"));
}
private boolean hasTrackingScript(String scriptText) {
for (var tracker : trackers) { for (var tracker : trackers) {
if (scriptTag.attr("src").contains(tracker)) { if (scriptText.contains(tracker)) {
return true; return true;
} }
} }