From 74ae97f8f4caf0d834f9a857ab6bf0ad2bc597e4 Mon Sep 17 00:00:00 2001 From: vlofgren Date: Thu, 19 May 2022 18:05:10 +0200 Subject: [PATCH] Added test util for the tests to remove hard coding of LanguageModels. --- doc/language-models.md | 15 +++++++++ .../marginalia/util/TestLanguageModels.java | 31 +++++++++++++++++++ .../assistant/suggest/SuggestionsTest.java | 10 ++---- .../crawler/domain/DomainCrawlerTest.java | 10 ++---- .../crawler/domain/DomainCrawlerTest2.java | 10 ++---- .../processing/SentenceExtractorTest.java | 10 ++---- .../domain/processor/HtmlProcessorTest.java | 10 ++---- .../edge/index/service/EdgeSearchTest.java | 10 ++---- .../index/service/EdgeSearchTestLocal.java | 11 ++----- .../integration/arxiv/ArxivParserTest.java | 10 ++---- .../stackoverflow/StackOverflowPostsTest.java | 10 ++---- .../integration/wikipedia/WikipediaTest.java | 10 ++---- .../search/query/BodyQueryParserTest.java | 10 ++---- .../search/query/EnglishDictionaryTest.java | 10 ++---- .../edge/search/query/QueryParserTest.java | 10 ++---- .../edge/search/query/QueryVariantsTest.java | 10 ++---- 16 files changed, 74 insertions(+), 113 deletions(-) create mode 100644 doc/language-models.md create mode 100644 marginalia_nu/src/test/java/nu/marginalia/util/TestLanguageModels.java diff --git a/doc/language-models.md b/doc/language-models.md new file mode 100644 index 00000000..c5803bc5 --- /dev/null +++ b/doc/language-models.md @@ -0,0 +1,15 @@ +# Language Models + +## For Tests + +Many tests require language models to work, +download them from [https://downloads.marginalia.nu/](https://downloads.marginalia.nu/), +and put them somewhere. Then set the environment +variable ```LANGUAGE_MODELS_HOME``` to point to this directory. + +Alternatively, patch ```nu.marginalia.util.TestLanguageModels``` to +default to where you've put them. + +## For Production + +TBW \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/TestLanguageModels.java b/marginalia_nu/src/test/java/nu/marginalia/util/TestLanguageModels.java new file mode 100644 index 00000000..2d83c3c9 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/TestLanguageModels.java @@ -0,0 +1,31 @@ +package nu.marginalia.util; + +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; + +public class TestLanguageModels { + private static final Path LANGUAGE_MODELS_DEFAULT = Path.of("/home/vlofgren/Work/ngrams/"); + + public static LanguageModels getLanguageModels() { + + final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME")) + .map(Path::of) + .orElse(LANGUAGE_MODELS_DEFAULT); + + if (!Files.isDirectory(languageModelsHome)) { + throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md"); + } + + return new LanguageModels( + languageModelsHome.resolve("ngrams-generous-emstr.bin"), + languageModelsHome.resolve("tfreq-generous-emstr.bin"), + languageModelsHome.resolve("opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + languageModelsHome.resolve("English.RDR"), + languageModelsHome.resolve("English.DICT"), + languageModelsHome.resolve("opennlp-tok.bin") + ); + } +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java index 94f68001..02fe26ff 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.assistant.suggest; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; @@ -14,14 +15,7 @@ class SuggestionsTest { @BeforeAll public static void setUp() { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); suggestions = new Suggestions(Path.of("/home/vlofgren/Work/sql-titles-clean"), new SpellChecker(), new NGramDict(lm)); } diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java index 931d2cf7..d6b497ed 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java @@ -4,6 +4,7 @@ import com.zaxxer.hikari.HikariDataSource; import io.reactivex.rxjava3.exceptions.UndeliverableException; import io.reactivex.rxjava3.plugins.RxJavaPlugins; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.util.TestUtil; import nu.marginalia.wmsa.configuration.server.Context; import nu.marginalia.wmsa.configuration.server.Initialization; @@ -150,14 +151,7 @@ class DomainCrawlerTest { languageFilter = new LanguageFilter(); - var lm = new LanguageModels( - Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), - Path.of("/var/lib/wmsa/model/tfreq-generous-emstr.bin"), - Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), - Path.of("/var/lib/wmsa/model/English.RDR"), - Path.of("/var/lib/wmsa/model/English.DICT"), - Path.of("/var/lib/wmsa/model/opennlp-tok.bin") - ); + var lm = TestLanguageModels.getLanguageModels(); var ke = new DocumentKeywordExtractor(new NGramDict(lm)); var se = new SentenceExtractor(lm); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java index da026154..7d9fee0b 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java @@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.crawler.domain; import com.opencsv.exceptions.CsvValidationException; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; @@ -42,14 +43,7 @@ class DomainCrawlerTest2 { var ingress = new EdgeIndexTask(new EdgeDomain("memex.marginalia.nu"), 0, 10, 1.); ingress.urls.add(new EdgeUrl("https://memex.marginalia.nu/")); - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); var dict = new NGramDict(lm); HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(dict),new SentenceExtractor(lm)); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java index 435e203d..ad71e526 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java @@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.crawler.domain.language.processing; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; @@ -28,14 +29,7 @@ import java.util.regex.Pattern; class SentenceExtractorTest { SentenceExtractor newSe; SentenceExtractor legacySe; - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); @BeforeEach public void setUp() { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java index b90585c0..3a9d4d7c 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.crawler.domain.processor; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; @@ -24,14 +25,7 @@ import java.util.List; class HtmlProcessorTest { Logger logger = LoggerFactory.getLogger(getClass()); - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-sentence.bin"), - Path.of("/var/lib/wmsa/model/English.RDR"), - Path.of("/var/lib/wmsa/model/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(new NGramDict(lm)),new SentenceExtractor(lm)); @Test diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java index 216bc1f3..2c98173b 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java @@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.index.service; import com.opencsv.exceptions.CsvValidationException; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.util.TestUtil; import nu.marginalia.wmsa.configuration.server.Context; import nu.marginalia.wmsa.configuration.server.Initialization; @@ -105,14 +106,7 @@ public class EdgeSearchTest { static Initialization init = new Initialization(); private QueryParser parser; private static NGramDict dict; - private static LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + private static LanguageModels lm = TestLanguageModels.getLanguageModels(); @SneakyThrows diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java index c3b605a9..3a5c349f 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java @@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.index.service; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.util.TestUtil; import nu.marginalia.wmsa.configuration.ServiceDescriptor; import nu.marginalia.wmsa.configuration.module.DatabaseModule; @@ -69,15 +70,7 @@ public class EdgeSearchTestLocal { static Initialization init = new Initialization(); private QueryParser parser; private static NGramDict dict; - private static LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); - + private static LanguageModels lm = TestLanguageModels.getLanguageModels(); @SneakyThrows @BeforeAll diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java index d522261b..b775cc4f 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.integration.arxiv; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; @@ -17,14 +18,7 @@ import static org.junit.jupiter.api.Assertions.*; @Disabled // this isn't used and the test is hella slow class ArxivParserTest { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); @Test void parse() throws IOException { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java index 05f66976..2c94cd69 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.integration.stackoverflow; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; @@ -16,14 +17,7 @@ import java.io.IOException; import java.nio.file.Path; public class StackOverflowPostsTest { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); @Test public void test() throws IOException, ParserConfigurationException, SAXException, InterruptedException { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java index 41c6b362..290b29cb 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java @@ -1,6 +1,7 @@ package nu.marginalia.wmsa.edge.integration.wikipedia; import lombok.SneakyThrows; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.DocumentDebugger; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; @@ -17,14 +18,7 @@ import java.io.IOException; import java.nio.file.Path; public class WikipediaTest { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); @Test @SneakyThrows public void test() { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java index 8f159817..1b70a9b9 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.search.query; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import org.junit.BeforeClass; @@ -15,14 +16,7 @@ class BodyQueryParserTest { private QueryParser parser; private static NGramDict dict; private static EnglishDictionary englishDictionary; - private static LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + private static LanguageModels lm = TestLanguageModels.getLanguageModels(); @BeforeClass public static void init() { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java index 4c0514ea..0b463798 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.search.query; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import org.junit.jupiter.api.Test; @@ -12,14 +13,7 @@ class EnglishDictionaryTest { @Test void getWordVariants() { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); var dict = new NGramDict(lm); new EnglishDictionary(dict).getWordVariants("dos").forEach(System.out::println); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java index 0e9aec69..cc3123ca 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.search.query; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import org.junit.BeforeClass; @@ -13,14 +14,7 @@ class QueryParserTest { private QueryParser parser; private static NGramDict dict; private static EnglishDictionary englishDictionary; - private static LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + private static LanguageModels lm = TestLanguageModels.getLanguageModels(); @BeforeEach public void setUp() { diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java index 91ec77af..3122c8a0 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.search.query; +import nu.marginalia.util.TestLanguageModels; import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; @@ -15,14 +16,7 @@ class QueryVariantsTest { SentenceExtractor se; @BeforeEach public void setUp() { - LanguageModels lm = new LanguageModels( - Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), - Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), - Path.of("/home/vlofgren/Work/ngrams/English.RDR"), - Path.of("/home/vlofgren/Work/ngrams/English.DICT"), - Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") - ); + LanguageModels lm = TestLanguageModels.getLanguageModels(); se = new SentenceExtractor(lm);