diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java index df1e56a9..bdb7362a 100644 --- a/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java +++ b/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java @@ -127,6 +127,10 @@ public class TermFrequencyExporter implements ExporterIf { for (var word : sent) { words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8))); } + + for (var ngram : sent.ngramStemmed) { + words.add(longHash(ngram.getBytes())); + } } synchronized (counts) {