(term-freq-exporter) Extract ngrams in term-frequency-exporter

This commit is contained in:
Viktor Lofgren 2024-04-10 16:58:05 +02:00
parent 4fb86ac692
commit 491d6bec46

View File

@ -127,6 +127,10 @@ public class TermFrequencyExporter implements ExporterIf {
for (var word : sent) { for (var word : sent) {
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8))); words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
} }
for (var ngram : sent.ngramStemmed) {
words.add(longHash(ngram.getBytes()));
}
} }
synchronized (counts) { synchronized (counts) {