From fcfe07fb7ddb0cc42e069cdc4db4b2d13cd98363 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 18 Aug 2023 11:26:56 +0200 Subject: [PATCH] (valuator) Clean up code --- .../processor/logic/DocumentValuator.java | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java index 5db3684d..baacb766 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java @@ -99,7 +99,7 @@ public class DocumentValuator { return quality + adjustment; } - private static class ScriptVisitor implements NodeVisitor { + public static class ScriptVisitor implements NodeVisitor { boolean hasBadScript = false; int scriptLength = 0; double penalty = 0.; @@ -113,26 +113,23 @@ public class DocumentValuator { if (node instanceof Element el) { visitTag(el); } - else if (node instanceof TextNode tn) { - visitScriptText(tn); - } - } - - private void visitScriptText(TextNode tn) { - String wholeText = tn.getWholeText(); - scriptLength += wholeText.length(); - - if (!hasBadScript) { - hasBadScript = wholeText.contains(".createElement("); - } } public void visitTag(Element el) { String srcAttr = el.attr("src"); + if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) { penalty += 0.49; } else if (!Strings.isBlank(srcAttr)) { penalty += 1; + } else { + var wt = el.wholeText(); + scriptLength += wt.length(); + penalty += 0.25; + + if (!hasBadScript) { + hasBadScript = wt.contains(".createElement("); + } } } }