diff --git a/code/common/config/src/main/java/nu/marginalia/LanguageModels.java b/code/common/config/src/main/java/nu/marginalia/LanguageModels.java index 0220e7a2..04ab0aa0 100644 --- a/code/common/config/src/main/java/nu/marginalia/LanguageModels.java +++ b/code/common/config/src/main/java/nu/marginalia/LanguageModels.java @@ -10,13 +10,21 @@ public class LanguageModels { public final Path posRules; public final Path posDict; public final Path openNLPTokenData; + public final Path fasttextLanguageModel; - public LanguageModels(Path ngramBloomFilter, Path termFrequencies, Path openNLPSentenceDetectionData, Path posRules, Path posDict, Path openNLPTokenData) { + public LanguageModels(Path ngramBloomFilter, + Path termFrequencies, + Path openNLPSentenceDetectionData, + Path posRules, + Path posDict, + Path openNLPTokenData, + Path fasttextLanguageModel) { this.ngramBloomFilter = ngramBloomFilter; this.termFrequencies = termFrequencies; this.openNLPSentenceDetectionData = openNLPSentenceDetectionData; this.posRules = posRules; this.posDict = posDict; this.openNLPTokenData = openNLPTokenData; + this.fasttextLanguageModel = fasttextLanguageModel; } } diff --git a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java b/code/common/config/src/main/java/nu/marginalia/WmsaHome.java index d63be333..7a1158d4 100644 --- a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java +++ b/code/common/config/src/main/java/nu/marginalia/WmsaHome.java @@ -87,7 +87,8 @@ public class WmsaHome { home.resolve("model/opennlp-sentence.bin"), home.resolve("model/English.RDR"), home.resolve("model/English.DICT"), - home.resolve("model/opennlp-tok.bin")); + home.resolve("model/opennlp-tok.bin"), + home.resolve("model/lid.176.ftz")); } private static final boolean debugMode = Boolean.getBoolean("wmsa-debug"); diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java b/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java index 958604ca..0675559a 100644 --- a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java +++ b/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java @@ -31,7 +31,8 @@ public class TestLanguageModels { languageModelsHome.resolve("opennlp-sentence.bin"), languageModelsHome.resolve("English.RDR"), languageModelsHome.resolve("English.DICT"), - languageModelsHome.resolve("opennlp-tokens.bin") + languageModelsHome.resolve("opennlp-tokens.bin"), + languageModelsHome.resolve("lid.176.ftz") ); } } diff --git a/code/features-search/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java b/code/features-search/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java index 81df1ed9..5efd2025 100644 --- a/code/features-search/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java +++ b/code/features-search/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java @@ -31,7 +31,8 @@ public class TestLanguageModels { languageModelsHome.resolve("opennlp-sentence.bin"), languageModelsHome.resolve("English.RDR"), languageModelsHome.resolve("English.DICT"), - languageModelsHome.resolve("opennlp-tokens.bin") + languageModelsHome.resolve("opennlp-tokens.bin"), + languageModelsHome.resolve("lid.176.ftz") ); } } diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java b/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java index a889ab2a..2ad53f7a 100644 --- a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java +++ b/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java @@ -16,13 +16,15 @@ public class DocumentLanguageData { public final DocumentSentence[] sentences; public final DocumentSentence[] titleSentences; public final TObjectIntHashMap wordCount; + public final String text; /** for test convenience */ public static DocumentLanguageData empty() { return new DocumentLanguageData( new DocumentSentence[0], new DocumentSentence[0], - new TObjectIntHashMap<>() + new TObjectIntHashMap<>(), + "" ); } diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java b/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java index c322b5dc..7b2af91c 100644 --- a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java +++ b/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java @@ -71,7 +71,7 @@ public class SentenceExtractor { TObjectIntHashMap counts = calculateWordCounts(textSentences); var titleSentences = extractSentencesFromString(title.toLowerCase()); - return new DocumentLanguageData(textSentences, titleSentences, counts); + return new DocumentLanguageData(textSentences, titleSentences, counts, text); } public DocumentLanguageData extractSentences(String text, String title) { @@ -79,7 +79,7 @@ public class SentenceExtractor { TObjectIntHashMap counts = calculateWordCounts(textSentences); var titleSentences = extractSentencesFromString(title.toLowerCase()); - return new DocumentLanguageData(textSentences, titleSentences, counts); + return new DocumentLanguageData(textSentences, titleSentences, counts, text); } private String getTitle(Document doc, DocumentSentence[] textSentences) { diff --git a/code/processes/converting-process/build.gradle b/code/processes/converting-process/build.gradle index 3b22535c..23e2165c 100644 --- a/code/processes/converting-process/build.gradle +++ b/code/processes/converting-process/build.gradle @@ -72,6 +72,7 @@ dependencies { implementation libs.zstd implementation libs.bundles.mariadb + implementation libs.bundles.nlp implementation libs.trove implementation libs.fastutil diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/FasttextLanguagePredictionModel.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/FasttextLanguagePredictionModel.java new file mode 100644 index 00000000..ab3c934f --- /dev/null +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/FasttextLanguagePredictionModel.java @@ -0,0 +1,26 @@ +package nu.marginalia.converting.language; + +import com.github.jfasttext.JFastText; +import nu.marginalia.LanguageModels; +import nu.marginalia.language.model.DocumentLanguageData; + +public class FasttextLanguagePredictionModel implements LanguagePredictionModel { + private final JFastText jft = new JFastText(); + + public FasttextLanguagePredictionModel(LanguageModels lm) throws Exception { + jft.loadModel(lm.fasttextLanguageModel.toString()); + } + + @Override + public double predictEnglish(DocumentLanguageData dld) { + if ("__label__en".equals(jft.predict(dld.text))) { + return 1.0; + } + return 0.; + } + + @Override + public boolean hasPoorAccuracy() { + return false; + } +} diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java index dd375fad..61e757b8 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguageFilter.java @@ -1,5 +1,7 @@ package nu.marginalia.converting.language; +import lombok.SneakyThrows; +import nu.marginalia.LanguageModels; import nu.marginalia.language.encoding.UnicodeRanges; import nu.marginalia.language.model.DocumentLanguageData; import org.jsoup.nodes.Document; @@ -8,10 +10,6 @@ import org.slf4j.LoggerFactory; import javax.inject.Inject; import javax.inject.Singleton; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -20,48 +18,29 @@ public class LanguageFilter { private static final Set interestingLanguages = Set.of("en", "en-us", "en-gb", "eng", "english"); - private static final Set englishWords = new HashSet<>(); private static final Logger logger = LoggerFactory.getLogger(LanguageFilter.class); - static { - try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/en-1000"), - "Could not load word frequency table"); - var br = new BufferedReader(new InputStreamReader(resource)) - ) { - for (;;) { - String s = br.readLine(); - if (s == null) { - break; - } - englishWords.add(s.toLowerCase()); - } - } - catch (Exception ex) { - throw new RuntimeException(ex); - } - } + private final LanguagePredictionModel languagePredictionModel; + + /** Returns the probability the language is in English */ public double dictionaryAgreement(DocumentLanguageData dld) { - Set seenWords = new HashSet<>(); - int englishCount = 0; - - for (var sent : dld.sentences) { - for (var word : sent.wordsLowerCase) { - if (seenWords.add(word) && englishWords.contains(word)) { - englishCount++; - } - } - } - - double englishAgreement = englishCount / (double) Math.min(seenWords.size(), englishWords.size()); - - logger.debug("Agreement: {}", englishAgreement); - - return englishAgreement; + return languagePredictionModel.predictEnglish(dld); } @Inject - public LanguageFilter() { + @SneakyThrows + public LanguageFilter(LanguageModels lm) { + try { + if (Boolean.getBoolean("disable-fasttext")) { + languagePredictionModel = new UngaBungaLanguagePredictionModel(); + } + else { + languagePredictionModel = new FasttextLanguagePredictionModel(lm); + } + } catch (Exception e) { + throw new RuntimeException(e); + } } public Optional isPageInterestingByHtmlTag(Document parsed) { @@ -72,20 +51,17 @@ public class LanguageFilter { .map(interestingLanguages::contains); } - public Optional isPageInterestingByMetaLanguage(Document parsed) { - return parsed.getElementsByTag("meta").stream().filter(elem -> "content-language".equalsIgnoreCase(elem.attr("http-equiv"))) - .map(elem -> elem.attr("content")) - .filter(s -> !s.isBlank()) - .map(String::toLowerCase) - .map(interestingLanguages::contains) - .findAny(); - } public boolean isBlockedUnicodeRange(String data) { + if (!languagePredictionModel.hasPoorAccuracy()) { + return false; + } + for (var range: UnicodeRanges.values()) { if (range.test(data)) return true; } return false; } + } diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguagePredictionModel.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguagePredictionModel.java new file mode 100644 index 00000000..e2b907c4 --- /dev/null +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/LanguagePredictionModel.java @@ -0,0 +1,11 @@ +package nu.marginalia.converting.language; + +import nu.marginalia.language.model.DocumentLanguageData; + +public interface LanguagePredictionModel { + /** Returns the probability the language is in English */ + double predictEnglish(DocumentLanguageData dld); + + boolean hasPoorAccuracy(); + +} diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/UngaBungaLanguagePredictionModel.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/UngaBungaLanguagePredictionModel.java new file mode 100644 index 00000000..21c4f0a4 --- /dev/null +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/language/UngaBungaLanguagePredictionModel.java @@ -0,0 +1,51 @@ +package nu.marginalia.converting.language; + +import nu.marginalia.language.model.DocumentLanguageData; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +public class UngaBungaLanguagePredictionModel implements LanguagePredictionModel { + private static final Set englishWords = new HashSet<>(); + + public UngaBungaLanguagePredictionModel() throws Exception { + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/en-1000"), + "Could not load word frequency table"); + var br = new BufferedReader(new InputStreamReader(resource)) + ) { + for (;;) { + String s = br.readLine(); + if (s == null) { + break; + } + englishWords.add(s.toLowerCase()); + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + @Override + public double predictEnglish(DocumentLanguageData dld) { + Set seenWords = new HashSet<>(); + int englishCount = 0; + + for (var sent : dld.sentences) { + for (var word : sent.wordsLowerCase) { + if (seenWords.add(word) && englishWords.contains(word)) { + englishCount++; + } + } + } + + return englishCount / (double) Math.min(seenWords.size(), englishWords.size()); + } + + @Override + public boolean hasPoorAccuracy() { + return true; + } +} diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java index 14fd12ad..2d2f58ca 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java @@ -17,7 +17,10 @@ import java.net.URISyntaxException; import java.util.*; public abstract class AbstractDocumentProcessorPlugin { - protected LanguageFilter languageFilter = new LanguageFilter(); + protected LanguageFilter languageFilter; + public AbstractDocumentProcessorPlugin(LanguageFilter languageFilter) { + this.languageFilter = languageFilter; + } public abstract DetailsWithWords createDetails(CrawledDocument crawledDocument) throws DisqualifiedException, URISyntaxException; public abstract boolean isApplicable(CrawledDocument doc); diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java index d0827670..481496e1 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java @@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin; import com.google.inject.Inject; import com.google.inject.name.Named; +import nu.marginalia.converting.language.LanguageFilter; import nu.marginalia.converting.model.GeneratorType; import nu.marginalia.converting.processor.MetaRobotsTag; import nu.marginalia.converting.processor.logic.dom.MeasureLengthVisitor; @@ -64,6 +65,7 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin @Inject public HtmlDocumentProcessorPlugin( @Named("min-document-quality") Double minDocumentQuality, + LanguageFilter languageFilter, SentenceExtractor sentenceExtractor, FeatureExtractor featureExtractor, TitleExtractor titleExtractor, @@ -74,6 +76,8 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin DocumentGeneratorExtractor documentGeneratorExtractor, HtmlProcessorSpecializations specializations) { + super(languageFilter); + this.documentLengthLogic = documentLengthLogic; this.minDocumentQuality = minDocumentQuality; this.sentenceExtractor = sentenceExtractor; diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java index 1dac05f1..bc288430 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java @@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin; import com.google.inject.Inject; import com.google.inject.name.Named; +import nu.marginalia.converting.language.LanguageFilter; import nu.marginalia.converting.processor.logic.DocumentLengthLogic; import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawledDomain; @@ -38,11 +39,13 @@ public class PlainTextDocumentProcessorPlugin extends AbstractDocumentProcessorP @Inject public PlainTextDocumentProcessorPlugin(@Named("max-title-length") Integer maxTitleLength, + LanguageFilter languageFilter, SentenceExtractor sentenceExtractor, DocumentKeywordExtractor keywordExtractor, DocumentLengthLogic documentLengthLogic ) { + super(languageFilter); this.documentLengthLogic = documentLengthLogic; this.maxTitleLength = maxTitleLength; this.sentenceExtractor = sentenceExtractor; diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/language/LanguageFilterTest.java b/code/processes/converting-process/src/test/java/nu/marginalia/converting/language/LanguageFilterTest.java index f37c0cb5..dd3a8936 100644 --- a/code/processes/converting-process/src/test/java/nu/marginalia/converting/language/LanguageFilterTest.java +++ b/code/processes/converting-process/src/test/java/nu/marginalia/converting/language/LanguageFilterTest.java @@ -1,5 +1,6 @@ package nu.marginalia.converting.language; +import nu.marginalia.converting.util.TestLanguageModels; import org.jsoup.Jsoup; import org.junit.jupiter.api.Test; @@ -10,16 +11,10 @@ class LanguageFilterTest { @Test void isPageInteresting() { - var languageFilter = new LanguageFilter(); + var languageFilter = new LanguageFilter(TestLanguageModels.getLanguageModels()); assertTrue(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(true)); assertTrue(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(false)); assertFalse(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(false)); } - @Test - public void isStringChinese() { - var languageFilter = new LanguageFilter(); - assertTrue(languageFilter.isBlockedUnicodeRange("溶岩ドームの手前に広がる斜面(木が生えているところ)は普賢岳の山体です.今回の噴火にともない,このあたりの山体がマグマに押されて変形し,北(写真では左)にむかって100mほどせりだしました\n")); - } - } \ No newline at end of file diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/util/TestLanguageModels.java b/code/processes/converting-process/src/test/java/nu/marginalia/converting/util/TestLanguageModels.java new file mode 100644 index 00000000..4ad1e430 --- /dev/null +++ b/code/processes/converting-process/src/test/java/nu/marginalia/converting/util/TestLanguageModels.java @@ -0,0 +1,38 @@ +package nu.marginalia.converting.util; + +import nu.marginalia.LanguageModels; +import nu.marginalia.WmsaHome; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; + +public class TestLanguageModels { + private static final Path LANGUAGE_MODELS_DEFAULT = WmsaHome.getHomePath().resolve("model"); + + public static Path getLanguageModelsPath() { + final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME")) + .map(Path::of) + .orElse(LANGUAGE_MODELS_DEFAULT); + + if (!Files.isDirectory(languageModelsHome)) { + throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md"); + } + return languageModelsHome; + } + + public static LanguageModels getLanguageModels() { + + var languageModelsHome = getLanguageModelsPath(); + + return new LanguageModels( + languageModelsHome.resolve("ngrams.bin"), + languageModelsHome.resolve("tfreq-new-algo3.bin"), + languageModelsHome.resolve("opennlp-sentence.bin"), + languageModelsHome.resolve("English.RDR"), + languageModelsHome.resolve("English.DICT"), + languageModelsHome.resolve("opennlp-tokens.bin"), + languageModelsHome.resolve("lid.176.ftz") + ); + } +} diff --git a/code/services-core/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java b/code/services-core/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java index 81df1ed9..5efd2025 100644 --- a/code/services-core/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java +++ b/code/services-core/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java @@ -31,7 +31,8 @@ public class TestLanguageModels { languageModelsHome.resolve("opennlp-sentence.bin"), languageModelsHome.resolve("English.RDR"), languageModelsHome.resolve("English.DICT"), - languageModelsHome.resolve("opennlp-tokens.bin") + languageModelsHome.resolve("opennlp-tokens.bin"), + languageModelsHome.resolve("lid.176.ftz") ); } } diff --git a/code/tools/term-frequency-extractor/src/main/java/nu/marginalia/tools/TermFrequencyExtractor.java b/code/tools/term-frequency-extractor/src/main/java/nu/marginalia/tools/TermFrequencyExtractor.java index c5a52dd3..4a41cb55 100644 --- a/code/tools/term-frequency-extractor/src/main/java/nu/marginalia/tools/TermFrequencyExtractor.java +++ b/code/tools/term-frequency-extractor/src/main/java/nu/marginalia/tools/TermFrequencyExtractor.java @@ -36,7 +36,7 @@ public class TermFrequencyExtractor { var plan = new CrawlPlanLoader().load(Path.of(args[0])); ThreadLocal se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels())); - LanguageFilter lf = new LanguageFilter(); + LanguageFilter lf = new LanguageFilter(WmsaHome.getLanguageModels()); TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1); diff --git a/run/setup.sh b/run/setup.sh index 24feba85..4e5f258a 100755 --- a/run/setup.sh +++ b/run/setup.sh @@ -26,6 +26,7 @@ download_model model/opennlp-sentence.bin https://mirrors.estointernet.in/apache download_model model/opennlp-tokens.bin https://mirrors.estointernet.in/apache/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin download_model model/ngrams.bin https://downloads.marginalia.nu/model/ngrams.bin download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin +download_model model/lid.176.ftz https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/lid.176.ftz download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP diff --git a/settings.gradle b/settings.gradle index 361c3ec6..67b3c6fa 100644 --- a/settings.gradle +++ b/settings.gradle @@ -153,7 +153,7 @@ dependencyResolutionManagement { library('stanford.corenlp','edu.stanford.nlp','stanford-corenlp').version('4.4.0') library('opennlp','org.apache.opennlp','opennlp-tools').version('1.9.4') - + library('fasttext','com.github.vinhkhuc','jfasttext').version('0.5') library('roaringbitmap','org.roaringbitmap','RoaringBitmap').version('0.9.32') library('opencsv','com.opencsv','opencsv').version('5.6') library('bucket4j','com.github.vladimir-bukhtoyarov','bucket4j-core').version('7.5.0') @@ -184,7 +184,7 @@ dependencyResolutionManagement { bundle('slf4j.test', ['slf4j.jdk14']) bundle('prometheus', ['prometheus', 'prometheus-servlet', 'prometheus-server', 'prometheus-hotspot']) bundle('mariadb', ['mariadb-client', 'hikaricp']) - bundle('nlp', ['stanford.corenlp', 'opennlp']) + bundle('nlp', ['stanford.corenlp', 'opennlp', 'fasttext']) bundle('selenium', ['selenium.chrome', 'selenium.java']) bundle('handlebars', ['handlebars', 'handlebars.markdown'])