(test) Integration test from crawl->query

This commit is contained in:
Viktor Lofgren 2024-06-25 22:17:26 +02:00
parent 9d00243d7f
commit dae22ccbe0
14 changed files with 616 additions and 18 deletions

View File

@ -1,13 +1,18 @@
package nu.marginalia.process.control;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Dummy implementation of ProcessHeartbeat that does nothing */
public class FakeProcessHeartbeat implements ProcessHeartbeat {
private static final Logger logger = LoggerFactory.getLogger(FakeProcessHeartbeat.class);
@Override
public <T extends Enum<T>> ProcessTaskHeartbeat<T> createProcessTaskHeartbeat(Class<T> steps, String processName) {
return new ProcessTaskHeartbeat<>() {
@Override
public void progress(T step) {}
public void progress(T step) {
logger.info("Progress: {}", step);
}
@Override
public void shutDown() {}
@ -21,7 +26,9 @@ public class FakeProcessHeartbeat implements ProcessHeartbeat {
public ProcessAdHocTaskHeartbeat createAdHocTaskHeartbeat(String processName) {
return new ProcessAdHocTaskHeartbeat() {
@Override
public void progress(String step, int progress, int total) {}
public void progress(String step, int progress, int total) {
logger.info("Progress: {}, {}/{}", step, progress, total);
}
@Override
public void close() {}

View File

@ -124,7 +124,9 @@ public class DocumentKeywordsBuilder {
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[ ");
wordToMeta.forEach((word, meta) -> sb.append(word).append("->").append(new WordMetadata(meta)).append(' '));
wordToMeta.forEach((word, meta) -> {
sb.append(word).append("->").append(new WordMetadata(meta).flagSet()).append(',').append(wordToPos.getOrDefault(word, new IntArrayList())).append(' ');
});
return sb.append(']').toString();
}

View File

@ -212,12 +212,21 @@ public class QueryFactoryTest {
var subquery = parseAndGetSpecs("The");
System.out.println("Time: " + (System.currentTimeMillis() - start));
System.out.println(subquery);
} @Test
}
@Test
public void testExpansion6() {
long start = System.currentTimeMillis();
var subquery = parseAndGetSpecs("burning the nerves in the neck");
System.out.println("Time: " + (System.currentTimeMillis() - start));
System.out.println(subquery);
}
@Test
public void testExpansion7() {
long start = System.currentTimeMillis();
var subquery = parseAndGetSpecs("amazing work being done");
System.out.println("Time: " + (System.currentTimeMillis() - start));
System.out.println(subquery);
}
}

View File

@ -31,10 +31,10 @@ public class IndexJournalReaderSingleFile implements IndexJournalReader {
private static IndexJournalFileHeader readHeader(Path file) throws IOException {
try (var raf = new RandomAccessFile(file.toFile(), "r")) {
long recordCount = raf.readLong();
long unused = raf.readLong();
long wordCount = raf.readLong();
return new IndexJournalFileHeader(unused, wordCount);
return new IndexJournalFileHeader(recordCount, unused);
}
}

View File

@ -205,7 +205,8 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier());
}
private SearchResultSet executeSearch(SearchParameters params) throws SQLException, InterruptedException {
// accessible for tests
public SearchResultSet executeSearch(SearchParameters params) throws SQLException, InterruptedException {
if (!statefulIndex.isLoaded()) {
// Short-circuit if the index is not loaded, as we trivially know that there can be no results

View File

@ -1,6 +1,5 @@
package nu.marginalia.converting.writer;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import lombok.SneakyThrows;
import nu.marginalia.converting.model.ProcessedDocument;
@ -103,8 +102,6 @@ public class ConverterBatchWriter implements AutoCloseable, ConverterBatchWriter
String domainName = domain.toString();
ByteBuffer workArea = ByteBuffer.allocate(1024);
while (documentIterator.hasNext()) {
var document = documentIterator.next();
if (document.details == null) {

View File

@ -45,7 +45,6 @@ public class LoaderMain extends ProcessMainClass {
private final MessageQueueFactory messageQueueFactory;
private final FileStorageService fileStorageService;
private final DocumentDbWriter documentDbWriter;
private final LoaderIndexJournalWriter journalWriter;
private final DomainLoaderService domainService;
private final DomainLinksLoaderService linksService;
private final KeywordLoaderService keywordLoaderService;
@ -79,7 +78,6 @@ public class LoaderMain extends ProcessMainClass {
MessageQueueFactory messageQueueFactory,
FileStorageService fileStorageService,
DocumentDbWriter documentDbWriter,
LoaderIndexJournalWriter journalWriter,
DomainLoaderService domainService,
DomainLinksLoaderService linksService,
KeywordLoaderService keywordLoaderService,
@ -92,7 +90,6 @@ public class LoaderMain extends ProcessMainClass {
this.messageQueueFactory = messageQueueFactory;
this.fileStorageService = fileStorageService;
this.documentDbWriter = documentDbWriter;
this.journalWriter = journalWriter;
this.domainService = domainService;
this.linksService = linksService;
this.keywordLoaderService = keywordLoaderService;
@ -132,7 +129,7 @@ public class LoaderMain extends ProcessMainClass {
logger.error("Error", ex);
}
finally {
journalWriter.close();
keywordLoaderService.close();
documentDbWriter.close();
heartbeat.shutDown();
}

View File

@ -11,7 +11,6 @@ import nu.marginalia.model.id.UrlIdCodec;
import nu.marginalia.model.processed.DocumentRecordKeywordsProjection;
import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.sequence.GammaCodedSequence;
import org.roaringbitmap.RoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -55,7 +54,7 @@ public class KeywordLoaderService {
logger.info("Loading keywords from {}", file);
stream.filter(DocumentRecordKeywordsProjection::hasKeywords)
.forEach(proj -> insertKeywords(domainIdRegistry, proj));
.forEach(proj -> insertKeywords(domainIdRegistry, proj));
}
}
@ -78,4 +77,12 @@ public class KeywordLoaderService {
projection.length,
words);
}
public void close() {
try {
writer.close();
} catch (Exception e) {
logger.error("Failed to close writer", e);
}
}
}

View File

@ -10,6 +10,7 @@ import nu.marginalia.loading.LoaderInputData;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.processed.DomainRecord;
import nu.marginalia.model.processed.DomainWithIp;
import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.process.control.ProcessHeartbeatImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -108,7 +109,7 @@ public class DomainLoaderService {
return domainNamesAll;
}
public boolean loadDomainMetadata(DomainIdRegistry domainIdRegistry, ProcessHeartbeatImpl heartbeat, LoaderInputData inputData) {
public boolean loadDomainMetadata(DomainIdRegistry domainIdRegistry, ProcessHeartbeat heartbeat, LoaderInputData inputData) {
var files = inputData.listDomainFiles();

View File

@ -0,0 +1,47 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:processes:crawling-process')
implementation project(':code:processes:converting-process')
implementation project(':code:processes:loading-process')
implementation project(':code:process-models:crawling-model')
implementation project(':code:process-models:processed-data')
implementation project(':code:processes:index-constructor-process')
implementation project(':code:index')
implementation project(':code:functions:search-query:api')
implementation project(':code:index:index-reverse')
implementation project(':code:index:index-forward')
implementation project(':code:index:query')
implementation project(':code:index:index-journal')
implementation project(':code:functions:link-graph:partition')
implementation project(':code:libraries:array')
implementation project(':code:common:db')
implementation project(':code:common:config')
implementation project(':code:common:linkdb')
implementation project(':code:common:process')
implementation project(':code:common:service')
implementation project(':code:common:model')
implementation libs.bundles.slf4j
implementation libs.bundles.grpc
implementation libs.mockito
implementation libs.notnull
implementation libs.guice
implementation libs.fastutil
implementation libs.trove
testImplementation libs.bundles.junit
}

View File

@ -0,0 +1,316 @@
package nu.marginalia;
import com.google.inject.Guice;
import com.google.inject.Inject;
import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.converting.processor.DomainProcessor;
import nu.marginalia.converting.writer.ConverterBatchWriter;
import nu.marginalia.crawl.retreival.DomainProber;
import nu.marginalia.crawl.retreival.fetcher.ContentTags;
import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
import nu.marginalia.crawling.io.CrawledDomainReader;
import nu.marginalia.crawling.parquet.CrawledDocumentParquetRecordFileWriter;
import nu.marginalia.index.IndexGrpcService;
import nu.marginalia.index.ReverseIndexFullFileNames;
import nu.marginalia.index.ReverseIndexPrioFileNames;
import nu.marginalia.index.construction.ReverseIndexConstructor;
import nu.marginalia.index.domainrankings.DomainRankings;
import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.journal.reader.IndexJournalReader;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.model.SearchParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.searchset.SearchSetAny;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.loading.LoaderIndexJournalWriter;
import nu.marginalia.loading.LoaderInputData;
import nu.marginalia.loading.documents.DocumentLoaderService;
import nu.marginalia.loading.documents.KeywordLoaderService;
import nu.marginalia.loading.domains.DomainIdRegistry;
import nu.marginalia.loading.links.DomainLinksLoaderService;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.UrlIdCodec;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.process.control.FakeProcessHeartbeat;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import org.junit.jupiter.api.*;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.function.LongPredicate;
import static nu.marginalia.index.journal.reader.IndexJournalReader.FILE_HEADER_SIZE_BYTES;
import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
public class IntegrationTest {
IntegrationTestModule testModule;
@Inject
DomainProcessor domainProcessor;
@Inject
DomainLinksLoaderService linksService;
@Inject
KeywordLoaderService keywordLoaderService;
@Inject
DocumentLoaderService documentLoaderService;
@Inject
FileStorageService fileStorageService;
@Inject
DomainRankings domainRankings;
@Inject
DocumentDbWriter documentDbWriter;
@Inject
LoaderIndexJournalWriter journalWriter;
Path warcData = null;
Path crawlDataParquet = null;
Path processedDataDir = null;
@Inject
StatefulIndex statefulIndex;
@Inject
IndexGrpcService indexGrpcService;
@Inject
DocumentDbReader documentDbReader;
@BeforeEach
public void setupTest() throws IOException {
testModule = new IntegrationTestModule();
Guice.createInjector(testModule).injectMembers(this);
warcData = Files.createTempFile("warc", ".warc.gz");
crawlDataParquet = Files.createTempFile("crawl", ".parquet");
processedDataDir = Files.createTempDirectory("processed");
}
@AfterEach
public void tearDownTest() throws IOException {
Files.deleteIfExists(warcData);
Files.deleteIfExists(crawlDataParquet);
TestUtil.clearTempDir(processedDataDir);
testModule.cleanUp();
}
@Test
public void run() throws Exception {
/** CREATE WARC */
try (WarcRecorder warcRecorder = new WarcRecorder(warcData)) {
warcRecorder.writeWarcinfoHeader("127.0.0.1", new EdgeDomain("www.example.com"),
new DomainProber.ProbeResultOk(new EdgeUrl("https://www.example.com/")));
warcRecorder.writeReferenceCopy(new EdgeUrl("https://www.example.com/"),
"text/html", 200,
"""
<html>
<h1>Hello World</h1>
<body>
<p>The best description of my problem solving process is the Feynman algorithm, which is sometimes presented as a joke where the hidden subtext is be smart, but I disagree. The algorithm is a surprisingly lucid description of how thinking works in the context of hard problems where the answer cant simply be looked up or trivially broken down, iterated upon in a bottom-up fashion, or approached with similar methods.
The trick is that there is no trick. This is how thinking works. It appears that when you feed your brain related information, without further active involvement, it starts to digest the information youve fed it.
</p>
</body>
</html>
""",
ContentTags.empty()
);
}
/** CONVERT WARC */
CrawledDocumentParquetRecordFileWriter.convertWarc(
"www.example.com",
new UserAgent("search.marginalia.nu",
"search.marginalia.nu"),
warcData,
crawlDataParquet);
/** PROCESS CRAWL DATA */
var processedDomain = domainProcessor.fullProcessing(CrawledDomainReader.createDataStream(crawlDataParquet));
System.out.println(processedDomain);
/** WRITE PROCESSED DATA */
try (ConverterBatchWriter cbw = new ConverterBatchWriter(processedDataDir, 0)) {
cbw.writeProcessedDomain(processedDomain);
}
// Write a single batch-switch marker in the process log so that the loader will read the data
Files.writeString(processedDataDir.resolve("processor.log"), "F\n", StandardOpenOption.CREATE_NEW);
/** LOAD PROCESSED DATA */
LoaderInputData inputData = new LoaderInputData(List.of(processedDataDir));
DomainIdRegistry domainIdRegistry = Mockito.mock(DomainIdRegistry.class);
when(domainIdRegistry.getDomainId(any())).thenReturn(1);
linksService.loadLinks(domainIdRegistry, new FakeProcessHeartbeat(), inputData);
keywordLoaderService.loadKeywords(domainIdRegistry, new FakeProcessHeartbeat(), inputData);
documentLoaderService.loadDocuments(domainIdRegistry, new FakeProcessHeartbeat(), inputData);
// These must be closed to finalize the associated files
documentDbWriter.close();
keywordLoaderService.close();
Path journalFile = fileStorageService
.getStorageBase(FileStorageBaseType.CURRENT)
.asPath()
.resolve("iw/page-index-0000.dat");
assertTrue(Files.exists(journalFile), "Journal file not found: " + journalFile);
assertTrue(Files.size(journalFile) > FILE_HEADER_SIZE_BYTES, "Journal file does not contain data");
/** CONSTRUCT INDEX */
createForwardIndex();
createFullReverseIndex();
createPrioReverseIndex();
/** SWITCH INDEX */
statefulIndex.switchIndex();
// Move the docdb file to the live location
Files.move(
IndexLocations.getLinkdbWritePath(fileStorageService).resolve(DOCDB_FILE_NAME),
IndexLocations.getLinkdbLivePath(fileStorageService).resolve(DOCDB_FILE_NAME)
);
// Reconnect the document reader to the new docdb file
documentDbReader.reconnect();
/** QUERY */
var rs = indexGrpcService.executeSearch(new SearchParameters(new SearchSpecification(
new SearchQuery("problem solving process",
List.of("problem", "solving", "process"),
List.of(),
List.of(),
List.of(),
List.of(new SearchCoherenceConstraint(true, List.of("problem", "solving", "process")))
),
null,
"NONE",
"feynman",
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
new QueryLimits(10, 10, 100, 100),
QueryStrategy.AUTO,
ResultRankingParameters.sensibleDefaults()
), new SearchSetAny()));
System.out.println(rs);
}
private void createFullReverseIndex() throws IOException {
Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path tmpDir = workDir.resolve("tmp");
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
var constructor = new ReverseIndexConstructor(
outputFileDocs,
outputFileWords,
outputFilePositions,
IndexJournalReader::singleFile,
this::addRankToIdEncoding,
tmpDir);
constructor.createReverseIndex(new FakeProcessHeartbeat(), "createReverseIndexFull", workDir);
}
private void createPrioReverseIndex() throws IOException {
Path outputFileDocs = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
Path outputFilePositions = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.POSITIONS, ReverseIndexPrioFileNames.FileVersion.NEXT);
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path tmpDir = workDir.resolve("tmp");
// The priority index only includes words that have bits indicating they are
// important to the document. This filter will act on the encoded {@see WordMetadata}
LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter();
var constructor = new ReverseIndexConstructor(
outputFileDocs,
outputFileWords,
outputFilePositions,
(path) -> IndexJournalReader.singleFile(path).filtering(wordMetaFilter),
this::addRankToIdEncoding,
tmpDir);
constructor.createReverseIndex(new FakeProcessHeartbeat(), "createReverseIndexPrio", workDir);
}
private static LongPredicate getPriorityIndexWordMetaFilter() {
long highPriorityFlags =
WordFlags.Title.asBit()
| WordFlags.Subjects.asBit()
| WordFlags.TfIdfHigh.asBit()
| WordFlags.NamesWords.asBit()
| WordFlags.UrlDomain.asBit()
| WordFlags.UrlPath.asBit()
| WordFlags.Site.asBit()
| WordFlags.ExternalLink.asBit()
| WordFlags.SiteAdjacent.asBit();
return r -> WordMetadata.hasAnyFlags(r, highPriorityFlags);
}
private void createForwardIndex() throws IOException {
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
ForwardIndexConverter converter = new ForwardIndexConverter(new FakeProcessHeartbeat(),
IndexJournalReader.paging(workDir),
outputFileDocsId,
outputFileDocsData,
domainRankings
);
converter.convert();
}
private long addRankToIdEncoding(long docId) {
return UrlIdCodec.addRank(
255,
docId);
}
}

View File

@ -0,0 +1,161 @@
package nu.marginalia;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Names;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.db.DomainTypes;
import nu.marginalia.index.domainrankings.DomainRankings;
import nu.marginalia.index.journal.writer.IndexJournalWriter;
import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
import nu.marginalia.index.searchset.SearchSetAny;
import nu.marginalia.index.searchset.SearchSetsService;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.linkgraph.io.DomainLinksWriter;
import nu.marginalia.process.control.FakeProcessHeartbeat;
import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.service.ServiceId;
import nu.marginalia.service.control.FakeServiceHeartbeat;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.control.ServiceHeartbeat;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBase;
import nu.marginalia.storage.model.FileStorageBaseType;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Random;
import java.util.UUID;
import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME;
import static nu.marginalia.linkdb.LinkdbFileNames.DOMAIN_LINKS_FILE_NAME;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
public class IntegrationTestModule extends AbstractModule {
Path workDir;
Path slowDir;
Path fastDir;
Path indexDir;
Random random = new Random();
public IntegrationTestModule() throws IOException {
workDir = Files.createTempDirectory("IntegrationTest");
slowDir = workDir.resolve("slow");
fastDir = workDir.resolve("fast");
indexDir = workDir.resolve("index");
Files.createDirectory(slowDir);
Files.createDirectory(fastDir);
}
public void cleanUp() {
TestUtil.clearTempDir(workDir);
}
@Override
protected void configure() {
try {
var fileStorageServiceMock = Mockito.mock(FileStorageService.class);
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.WORK))
.thenReturn(new FileStorageBase(null, null, 0,null, slowDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.CURRENT))
.thenReturn(new FileStorageBase(null, null, 0,null, fastDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.STORAGE))
.thenReturn(new FileStorageBase(null, null, 0, null, fastDir.toString()));
bind(DocumentDbReader.class).toInstance(new DocumentDbReader(
IndexLocations.getLinkdbLivePath(fileStorageServiceMock)
.resolve(DOCDB_FILE_NAME)
));
bind(FileStorageService.class).toInstance(fileStorageServiceMock);
bind(ServiceHeartbeat.class).toInstance(new FakeServiceHeartbeat());
bind(ProcessHeartbeat.class).toInstance(new FakeProcessHeartbeat());
SearchSetsService setsServiceMock = Mockito.mock(SearchSetsService.class);
when(setsServiceMock.getSearchSetByName("NONE")).thenReturn(new SearchSetAny());
when(setsServiceMock.getDomainRankings()).thenReturn(new DomainRankings());
bind(SearchSetsService.class).toInstance(setsServiceMock);
DomainTypes domainTypes = Mockito.mock(DomainTypes.class);
when(domainTypes.getAllDomainsByType(any())).thenReturn(new ArrayList<>());
when(domainTypes.getKnownDomainsByType(any())).thenReturn(new TIntArrayList());
when(domainTypes.downloadList(any())).thenReturn(new ArrayList<>());
bind(DomainTypes.class).toInstance(domainTypes);
bind(ServiceEventLog.class).toInstance(Mockito.mock(ServiceEventLog.class));
bind(IndexJournalWriter.class).toInstance(new IndexJournalWriterPagingImpl(
IndexLocations.getIndexConstructionArea(fileStorageServiceMock)
));
bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration(
ServiceId.Index,
0,
"127.0.0.1",
"127.0.0.1",
randomPort(),
UUID.randomUUID()
));
bind(ProcessConfiguration.class).toInstance(new ProcessConfiguration(
"TEST",
0,
UUID.randomUUID()));
bind(Double.class).annotatedWith(Names.named("min-document-quality")).toInstance(-15.);
bind(Integer.class).annotatedWith(Names.named("min-document-length")).toInstance(32);
bind(Integer.class).annotatedWith(Names.named("max-title-length")).toInstance(128);
bind(Integer.class).annotatedWith(Names.named("max-summary-length")).toInstance(255);
bind(Path.class).annotatedWith(Names.named("local-index-path")).toInstance(indexDir);
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
} catch (IOException | SQLException e) {
throw new RuntimeException(e);
}
}
@Inject
@Provides
@Singleton
private DocumentDbWriter createLinkdbWriter(FileStorageService service) throws SQLException, IOException {
// Migrate
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOCDB_FILE_NAME);
if (Files.exists(dbPath)) {
Files.delete(dbPath);
}
return new DocumentDbWriter(dbPath);
}
@Inject @Provides @Singleton
private DomainLinksWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException {
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME);
if (Files.exists(dbPath)) {
Files.delete(dbPath);
}
return new DomainLinksWriter(dbPath);
}
private int randomPort() {
return random.nextInt(10000, 30000);
}
}

View File

@ -0,0 +1,52 @@
package nu.marginalia;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
public class TestUtil {
public static void clearTempDir(Path path) {
if (!Files.exists(path))
return;
if (Files.isDirectory(path)) {
for (File f : path.toFile().listFiles()) {
if (f.isDirectory()) {
File[] files = f.listFiles();
if (files != null) {
Arrays.stream(files).map(File::toPath).forEach(TestUtil::clearTempDir);
}
}
else {
System.out.println("Deleting " + f + " (" + fileSize(f.toPath()) + ")");
f.delete();
}
}
}
else {
System.out.println("Deleting " + path + " (" + fileSize(path) + ")");
}
path.toFile().delete();
}
private static String fileSize(Path path) {
try {
long sizeBytes = Files.size(path);
if (sizeBytes > 1024 * 1024 * 1024) return round(sizeBytes / 1073741824.) + "Gb";
if (sizeBytes > 1024 * 1024) return round(sizeBytes / 1048576.) + "Mb";
if (sizeBytes > 1024) return round(sizeBytes / 1024.) + "Kb";
return sizeBytes + "b";
}
catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private static String round(double d) {
return String.format("%.2f", d);
}
}

View File

@ -95,6 +95,7 @@ include 'code:process-models:processed-data'
include 'code:tools:experiment-runner'
include 'code:tools:screenshot-capture-tool'
include 'code:tools:load-test'
include 'code:tools:integration-test'
include 'third-party:porterstemmer'
include 'third-party:symspell'