From d8073f0dded230313ee0bd6e9e6a5c818093c6bd Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 15 Aug 2023 19:10:43 +0200 Subject: [PATCH] (feature-extractor) Add mail.ru counter to non-adtech trackers --- .../processor/logic/FeatureExtractor.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java index 040f96dd..6c4ddcf9 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java @@ -23,8 +23,12 @@ public class FeatureExtractor { private static final List innocentTrackers = List.of( "twitter.com", "bing.com", - "msn.com"); - private static final List adtechTrackers = List.of("adform.net", + "msn.com", + "mail.ru/counter" + ); + private static final List adtechTrackers = List.of( + "publir.com", + "adform.net", "connect.facebook", "facebook.com/tr", "absbygoogle.com", @@ -222,6 +226,12 @@ public class FeatureExtractor { String scriptText = scriptTag.html(); + if (scriptText.contains("_ga=") || scriptText.contains("ga('create'")) { + features.add(HtmlFeature.TRACKING); + } + if (scriptText.contains("_tmr")) { + features.add(HtmlFeature.TRACKING); + } if (scriptText.contains("'pd.js'")) { features.add(HtmlFeature.PARDOT); }