From 32a6735d03cb0fc34d2aea10c0a034d43eebe7cc Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 19 Jun 2023 17:49:58 +0200 Subject: [PATCH] Undo change in requirements for counting as a high tf-idf word --- .../java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java index e017061e..64f50dde 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java +++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java @@ -40,7 +40,7 @@ public class WordsTfIdfCounts implements WordReps, Comparator { int value = getTermValue(key, cnt, maxVal); tfIdf.put(key, value); - if (cnt > 2 && value > 100) { + if (cnt > 1 && value > 100) { highTfIdfInstances.add(key); } });