mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(term-freq-exporter) Extract ngrams in term-frequency-exporter
This commit is contained in:
parent
4fb86ac692
commit
491d6bec46
@ -127,6 +127,10 @@ public class TermFrequencyExporter implements ExporterIf {
|
||||
for (var word : sent) {
|
||||
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
for (var ngram : sent.ngramStemmed) {
|
||||
words.add(longHash(ngram.getBytes()));
|
||||
}
|
||||
}
|
||||
|
||||
synchronized (counts) {
|
||||
|
Loading…
Reference in New Issue
Block a user