From 65b0ff26fc6f802ff38d3e5fa298aad3b1dcf1d6 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 30 Jan 2023 09:42:46 +0100 Subject: [PATCH] Better SiteWords extraction --- .../marginalia/wmsa/edge/converting/processor/SiteWords.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/SiteWords.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/SiteWords.java index b5a5191f..87e8c931 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/SiteWords.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/SiteWords.java @@ -33,8 +33,7 @@ public class SiteWords { Set commonSiteWords = new HashSet<>(10); commonSiteWords.addAll(commonKeywordExtractor.getCommonSiteWords(processedDomain, - EdgePageWordFlags.Subjects, - EdgePageWordFlags.TfIdfHigh)); + EdgePageWordFlags.Subjects)); commonSiteWords.addAll(commonKeywordExtractor.getCommonSiteWords(processedDomain, EdgePageWordFlags.Title));