diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/DocumentKeywordExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/DocumentKeywordExtractor.java index cf97302e..557e8d58 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/DocumentKeywordExtractor.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/DocumentKeywordExtractor.java @@ -154,7 +154,7 @@ public class DocumentKeywordExtractor { if (!word.isStopWord()) { String w = AsciiFlattener.flattenUnicode(word.wordLowerCase()); if (WordPatterns.singleWordQualitiesPredicate.test(w)) { - wordsBuilder.add(w, metadata.forWord(flagsTemplate, word.stemmed())); + wordsBuilder.add(w, metadata.getMetadataForWord(flagsTemplate, word.stemmed())); } } } @@ -163,7 +163,7 @@ public class DocumentKeywordExtractor { var rep = new WordRep(sent, names); String w = AsciiFlattener.flattenUnicode(rep.word); - wordsBuilder.add(w, metadata.forWord(flagsTemplate, rep.stemmed)); + wordsBuilder.add(w, metadata.getMetadataForWord(flagsTemplate, rep.stemmed)); } } @@ -217,7 +217,7 @@ public class DocumentKeywordExtractor { continue; } - wordsBuilder.add(flatWord, metadata.forWord(metadata.wordFlagsTemplate(), word.stemmed) | additionalMeta); + wordsBuilder.add(flatWord, metadata.getMetadataForWord(metadata.wordFlagsTemplate(), word.stemmed) | additionalMeta); } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/model/KeywordMetadata.java b/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/model/KeywordMetadata.java index 0a9731b0..58e53551 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/model/KeywordMetadata.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/language/processing/model/KeywordMetadata.java @@ -17,8 +17,6 @@ public record KeywordMetadata(HashSet titleKeywords, ) { - private static final KeywordCounter.WordFrequencyData empty = new KeywordCounter.WordFrequencyData(0, 0); - public KeywordMetadata(EnumSet flags) { this(new HashSet<>(50), new HashSet<>(10), new HashSet<>(50), new HashMap<>(15_000), @@ -30,7 +28,8 @@ public record KeywordMetadata(HashSet titleKeywords, this(EnumSet.noneOf(EdgePageWordFlags.class)); } - public long forWord(EnumSet flagsTemplate, String stemmed) { + private static final KeywordCounter.WordFrequencyData empty = new KeywordCounter.WordFrequencyData(0, 0); + public long getMetadataForWord(EnumSet flagsTemplate, String stemmed) { KeywordCounter.WordFrequencyData tfidf = wordsTfIdf.getOrDefault(stemmed, empty); EnumSet flags = flagsTemplate.clone();