mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(term-freq-exporter) Extract ngrams in term-frequency-exporter
This commit is contained in:
parent
fcdc843c15
commit
4b47fadbab
@ -127,6 +127,10 @@ public class TermFrequencyExporter implements ExporterIf {
|
|||||||
for (var word : sent) {
|
for (var word : sent) {
|
||||||
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
|
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (var ngram : sent.ngramStemmed) {
|
||||||
|
words.add(longHash(ngram.getBytes()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized (counts) {
|
synchronized (counts) {
|
||||||
|
Loading…
Reference in New Issue
Block a user