mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 04:58:59 +00:00
Added test util for the tests to remove hard coding of LanguageModels.
This commit is contained in:
parent
c24b978c51
commit
74ae97f8f4
15
doc/language-models.md
Normal file
15
doc/language-models.md
Normal file
@ -0,0 +1,15 @@
|
||||
# Language Models
|
||||
|
||||
## For Tests
|
||||
|
||||
Many tests require language models to work,
|
||||
download them from [https://downloads.marginalia.nu/](https://downloads.marginalia.nu/),
|
||||
and put them somewhere. Then set the environment
|
||||
variable ```LANGUAGE_MODELS_HOME``` to point to this directory.
|
||||
|
||||
Alternatively, patch ```nu.marginalia.util.TestLanguageModels``` to
|
||||
default to where you've put them.
|
||||
|
||||
## For Production
|
||||
|
||||
TBW
|
@ -0,0 +1,31 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Optional;
|
||||
|
||||
public class TestLanguageModels {
|
||||
private static final Path LANGUAGE_MODELS_DEFAULT = Path.of("/home/vlofgren/Work/ngrams/");
|
||||
|
||||
public static LanguageModels getLanguageModels() {
|
||||
|
||||
final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME"))
|
||||
.map(Path::of)
|
||||
.orElse(LANGUAGE_MODELS_DEFAULT);
|
||||
|
||||
if (!Files.isDirectory(languageModelsHome)) {
|
||||
throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md");
|
||||
}
|
||||
|
||||
return new LanguageModels(
|
||||
languageModelsHome.resolve("ngrams-generous-emstr.bin"),
|
||||
languageModelsHome.resolve("tfreq-generous-emstr.bin"),
|
||||
languageModelsHome.resolve("opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
languageModelsHome.resolve("English.RDR"),
|
||||
languageModelsHome.resolve("English.DICT"),
|
||||
languageModelsHome.resolve("opennlp-tok.bin")
|
||||
);
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.assistant.suggest;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
@ -14,14 +15,7 @@ class SuggestionsTest {
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
suggestions = new Suggestions(Path.of("/home/vlofgren/Work/sql-titles-clean"),
|
||||
new SpellChecker(), new NGramDict(lm));
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
||||
import io.reactivex.rxjava3.exceptions.UndeliverableException;
|
||||
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.util.TestUtil;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.configuration.server.Initialization;
|
||||
@ -150,14 +151,7 @@ class DomainCrawlerTest {
|
||||
|
||||
languageFilter = new LanguageFilter();
|
||||
|
||||
var lm = new LanguageModels(
|
||||
Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"),
|
||||
Path.of("/var/lib/wmsa/model/tfreq-generous-emstr.bin"),
|
||||
Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"),
|
||||
Path.of("/var/lib/wmsa/model/English.RDR"),
|
||||
Path.of("/var/lib/wmsa/model/English.DICT"),
|
||||
Path.of("/var/lib/wmsa/model/opennlp-tok.bin")
|
||||
);
|
||||
var lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
var ke = new DocumentKeywordExtractor(new NGramDict(lm));
|
||||
var se = new SentenceExtractor(lm);
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.crawler.domain;
|
||||
|
||||
import com.opencsv.exceptions.CsvValidationException;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.archive.client.ArchiveClient;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter;
|
||||
@ -42,14 +43,7 @@ class DomainCrawlerTest2 {
|
||||
var ingress = new EdgeIndexTask(new EdgeDomain("memex.marginalia.nu"), 0, 10, 1.);
|
||||
ingress.urls.add(new EdgeUrl("https://memex.marginalia.nu/"));
|
||||
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
var dict = new NGramDict(lm);
|
||||
HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(dict),new SentenceExtractor(lm));
|
||||
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.crawler.domain.language.processing;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep;
|
||||
@ -28,14 +29,7 @@ import java.util.regex.Pattern;
|
||||
class SentenceExtractorTest {
|
||||
SentenceExtractor newSe;
|
||||
SentenceExtractor legacySe;
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.crawler.domain.processor;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor;
|
||||
@ -24,14 +25,7 @@ import java.util.List;
|
||||
class HtmlProcessorTest {
|
||||
Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-sentence.bin"),
|
||||
Path.of("/var/lib/wmsa/model/English.RDR"),
|
||||
Path.of("/var/lib/wmsa/model/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(new NGramDict(lm)),new SentenceExtractor(lm));
|
||||
|
||||
@Test
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.index.service;
|
||||
import com.opencsv.exceptions.CsvValidationException;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.util.TestUtil;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.configuration.server.Initialization;
|
||||
@ -105,14 +106,7 @@ public class EdgeSearchTest {
|
||||
static Initialization init = new Initialization();
|
||||
private QueryParser parser;
|
||||
private static NGramDict dict;
|
||||
private static LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
private static LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.index.service;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.util.TestUtil;
|
||||
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
||||
import nu.marginalia.wmsa.configuration.module.DatabaseModule;
|
||||
@ -69,15 +70,7 @@ public class EdgeSearchTestLocal {
|
||||
static Initialization init = new Initialization();
|
||||
private QueryParser parser;
|
||||
private static NGramDict dict;
|
||||
private static LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
|
||||
private static LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@SneakyThrows
|
||||
@BeforeAll
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.integration.arxiv;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor;
|
||||
@ -17,14 +18,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@Disabled // this isn't used and the test is hella slow
|
||||
class ArxivParserTest {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@Test
|
||||
void parse() throws IOException {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.integration.stackoverflow;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor;
|
||||
@ -16,14 +17,7 @@ import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class StackOverflowPostsTest {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@Test
|
||||
public void test() throws IOException, ParserConfigurationException, SAXException, InterruptedException {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.integration.wikipedia;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.DocumentDebugger;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
@ -17,14 +18,7 @@ import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class WikipediaTest {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@Test @SneakyThrows
|
||||
public void test() {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.search.query;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import org.junit.BeforeClass;
|
||||
@ -15,14 +16,7 @@ class BodyQueryParserTest {
|
||||
private QueryParser parser;
|
||||
private static NGramDict dict;
|
||||
private static EnglishDictionary englishDictionary;
|
||||
private static LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
private static LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@BeforeClass
|
||||
public static void init() {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.search.query;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -12,14 +13,7 @@ class EnglishDictionaryTest {
|
||||
|
||||
@Test
|
||||
void getWordVariants() {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
var dict = new NGramDict(lm);
|
||||
new EnglishDictionary(dict).getWordVariants("dos").forEach(System.out::println);
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.search.query;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import org.junit.BeforeClass;
|
||||
@ -13,14 +14,7 @@ class QueryParserTest {
|
||||
private QueryParser parser;
|
||||
private static NGramDict dict;
|
||||
private static EnglishDictionary englishDictionary;
|
||||
private static LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
private static LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.search.query;
|
||||
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor;
|
||||
@ -15,14 +16,7 @@ class QueryVariantsTest {
|
||||
SentenceExtractor se;
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
LanguageModels lm = new LanguageModels(
|
||||
Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.RDR"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/English.DICT"),
|
||||
Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin")
|
||||
);
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
se = new SentenceExtractor(lm);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user