mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(WIP) Make it possible to sideload encyclopedia data.
This is mostly a pilot track for sideloading other large websites. Also change coverter to produce a more compact output (java serialization instead of json).
This commit is contained in:
parent
9288d311d4
commit
f11103d31d
@ -0,0 +1,6 @@
|
|||||||
|
package nu.marginalia.mqapi.converting;
|
||||||
|
|
||||||
|
public enum ConvertAction {
|
||||||
|
ConvertCrawlData,
|
||||||
|
SideloadEncyclopedia
|
||||||
|
}
|
@ -5,6 +5,8 @@ import nu.marginalia.db.storage.model.FileStorageId;
|
|||||||
|
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class ConvertRequest {
|
public class ConvertRequest {
|
||||||
|
public final ConvertAction action;
|
||||||
|
public final String inputSource;
|
||||||
public final FileStorageId crawlStorage;
|
public final FileStorageId crawlStorage;
|
||||||
public final FileStorageId processedDataStorage;
|
public final FileStorageId processedDataStorage;
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,7 @@ dependencies {
|
|||||||
implementation libs.crawlercommons
|
implementation libs.crawlercommons
|
||||||
|
|
||||||
implementation libs.commons.lang3
|
implementation libs.commons.lang3
|
||||||
|
implementation libs.sqlite
|
||||||
|
|
||||||
testImplementation libs.bundles.slf4j.test
|
testImplementation libs.bundles.slf4j.test
|
||||||
testImplementation libs.bundles.junit
|
testImplementation libs.bundles.junit
|
||||||
|
@ -5,24 +5,26 @@ import com.google.inject.Guice;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Injector;
|
import com.google.inject.Injector;
|
||||||
import nu.marginalia.converting.model.ProcessedDomain;
|
import nu.marginalia.converting.model.ProcessedDomain;
|
||||||
|
import nu.marginalia.converting.sideload.SideloadSource;
|
||||||
|
import nu.marginalia.converting.sideload.SideloadSourceFactory;
|
||||||
import nu.marginalia.db.storage.FileStorageService;
|
import nu.marginalia.db.storage.FileStorageService;
|
||||||
import nu.marginalia.mq.MessageQueueFactory;
|
import nu.marginalia.mq.MessageQueueFactory;
|
||||||
import nu.marginalia.mq.MqMessage;
|
import nu.marginalia.mq.MqMessage;
|
||||||
import nu.marginalia.mq.inbox.MqInboxResponse;
|
import nu.marginalia.mq.inbox.MqInboxResponse;
|
||||||
import nu.marginalia.mq.inbox.MqSingleShotInbox;
|
import nu.marginalia.mq.inbox.MqSingleShotInbox;
|
||||||
|
import nu.marginalia.mqapi.converting.ConvertAction;
|
||||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||||
import nu.marginalia.process.log.WorkLog;
|
import nu.marginalia.process.log.WorkLog;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
import plan.CrawlPlan;
|
import plan.CrawlPlan;
|
||||||
import nu.marginalia.converting.compiler.InstructionsCompiler;
|
import nu.marginalia.converting.compiler.InstructionsCompiler;
|
||||||
import nu.marginalia.converting.instruction.Instruction;
|
|
||||||
import nu.marginalia.converting.processor.DomainProcessor;
|
import nu.marginalia.converting.processor.DomainProcessor;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
@ -34,7 +36,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||||||
import static nu.marginalia.mqapi.ProcessInboxNames.CONVERTER_INBOX;
|
import static nu.marginalia.mqapi.ProcessInboxNames.CONVERTER_INBOX;
|
||||||
|
|
||||||
public class ConverterMain {
|
public class ConverterMain {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(ConverterMain.class);
|
private static final Logger logger = LoggerFactory.getLogger(ConverterMain.class);
|
||||||
private final DomainProcessor processor;
|
private final DomainProcessor processor;
|
||||||
private final InstructionsCompiler compiler;
|
private final InstructionsCompiler compiler;
|
||||||
@ -42,10 +43,9 @@ public class ConverterMain {
|
|||||||
private final ProcessHeartbeat heartbeat;
|
private final ProcessHeartbeat heartbeat;
|
||||||
private final MessageQueueFactory messageQueueFactory;
|
private final MessageQueueFactory messageQueueFactory;
|
||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
|
private final SideloadSourceFactory sideloadSourceFactory;
|
||||||
|
|
||||||
public static void main(String... args) throws Exception {
|
public static void main(String... args) throws Exception {
|
||||||
|
|
||||||
|
|
||||||
Injector injector = Guice.createInjector(
|
Injector injector = Guice.createInjector(
|
||||||
new ConverterModule(),
|
new ConverterModule(),
|
||||||
new DatabaseModule()
|
new DatabaseModule()
|
||||||
@ -55,15 +55,9 @@ public class ConverterMain {
|
|||||||
|
|
||||||
logger.info("Starting pipe");
|
logger.info("Starting pipe");
|
||||||
|
|
||||||
var request = converter.fetchInstructions();
|
converter
|
||||||
try {
|
.fetchInstructions()
|
||||||
converter.convert(request);
|
.execute(converter);
|
||||||
request.ok();
|
|
||||||
}
|
|
||||||
catch (Exception ex) {
|
|
||||||
logger.error("Conversion failed", ex);
|
|
||||||
request.err();
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info("Finished");
|
logger.info("Finished");
|
||||||
|
|
||||||
@ -77,21 +71,42 @@ public class ConverterMain {
|
|||||||
Gson gson,
|
Gson gson,
|
||||||
ProcessHeartbeat heartbeat,
|
ProcessHeartbeat heartbeat,
|
||||||
MessageQueueFactory messageQueueFactory,
|
MessageQueueFactory messageQueueFactory,
|
||||||
FileStorageService fileStorageService
|
FileStorageService fileStorageService,
|
||||||
) {
|
SideloadSourceFactory sideloadSourceFactory
|
||||||
|
)
|
||||||
|
{
|
||||||
this.processor = processor;
|
this.processor = processor;
|
||||||
this.compiler = compiler;
|
this.compiler = compiler;
|
||||||
this.gson = gson;
|
this.gson = gson;
|
||||||
this.heartbeat = heartbeat;
|
this.heartbeat = heartbeat;
|
||||||
this.messageQueueFactory = messageQueueFactory;
|
this.messageQueueFactory = messageQueueFactory;
|
||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
|
this.sideloadSourceFactory = sideloadSourceFactory;
|
||||||
|
|
||||||
heartbeat.start();
|
heartbeat.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void convert(ConvertRequest request) throws Exception {
|
public void convert(SideloadSource sideloadSource, Path writeDir) throws Exception {
|
||||||
|
int maxPoolSize = 16;
|
||||||
|
|
||||||
var plan = request.getPlan();
|
try (WorkLog workLog = new WorkLog(writeDir.resolve("processor.log"));
|
||||||
|
ConversionLog conversionLog = new ConversionLog(writeDir)) {
|
||||||
|
var instructionWriter = new InstructionWriterFactory(conversionLog, writeDir, gson);
|
||||||
|
|
||||||
|
final String where;
|
||||||
|
final int size;
|
||||||
|
|
||||||
|
try (var writer = instructionWriter.createInstructionsForDomainWriter(sideloadSource.getId())) {
|
||||||
|
compiler.compileStreaming(sideloadSource, writer::accept);
|
||||||
|
where = writer.getFileName();
|
||||||
|
size = writer.getSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
workLog.setJobToFinished(sideloadSource.getId(), where, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void convert(CrawlPlan plan) throws Exception {
|
||||||
|
|
||||||
final int maxPoolSize = 16;
|
final int maxPoolSize = 16;
|
||||||
|
|
||||||
@ -146,29 +161,19 @@ public class ConverterMain {
|
|||||||
do {
|
do {
|
||||||
System.out.println("Waiting for pool to terminate... " + pool.getActiveCount() + " remaining");
|
System.out.println("Waiting for pool to terminate... " + pool.getActiveCount() + " remaining");
|
||||||
} while (!pool.awaitTermination(60, TimeUnit.SECONDS));
|
} while (!pool.awaitTermination(60, TimeUnit.SECONDS));
|
||||||
|
|
||||||
request.ok();
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
request.err();
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ConvertRequest {
|
private abstract static class ConvertRequest {
|
||||||
private final CrawlPlan plan;
|
|
||||||
private final MqMessage message;
|
private final MqMessage message;
|
||||||
private final MqSingleShotInbox inbox;
|
private final MqSingleShotInbox inbox;
|
||||||
|
|
||||||
ConvertRequest(CrawlPlan plan, MqMessage message, MqSingleShotInbox inbox) {
|
private ConvertRequest(MqMessage message, MqSingleShotInbox inbox) {
|
||||||
this.plan = plan;
|
|
||||||
this.message = message;
|
this.message = message;
|
||||||
this.inbox = inbox;
|
this.inbox = inbox;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CrawlPlan getPlan() {
|
public abstract void execute(ConverterMain converterMain) throws Exception;
|
||||||
return plan;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void ok() {
|
public void ok() {
|
||||||
inbox.sendResponse(message, MqInboxResponse.ok());
|
inbox.sendResponse(message, MqInboxResponse.ok());
|
||||||
@ -176,9 +181,55 @@ public class ConverterMain {
|
|||||||
public void err() {
|
public void err() {
|
||||||
inbox.sendResponse(message, MqInboxResponse.err());
|
inbox.sendResponse(message, MqInboxResponse.err());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class SideloadAction extends ConvertRequest {
|
||||||
|
|
||||||
|
private final SideloadSource sideloadSource;
|
||||||
|
private final Path workDir;
|
||||||
|
|
||||||
|
SideloadAction(SideloadSource sideloadSource,
|
||||||
|
Path workDir,
|
||||||
|
MqMessage message, MqSingleShotInbox inbox) {
|
||||||
|
super(message, inbox);
|
||||||
|
this.sideloadSource = sideloadSource;
|
||||||
|
this.workDir = workDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(ConverterMain converterMain) throws Exception {
|
||||||
|
try {
|
||||||
|
converterMain.convert(sideloadSource, workDir);
|
||||||
|
ok();
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Error sideloading", ex);
|
||||||
|
err();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ConvertCrawlDataAction extends ConvertRequest {
|
||||||
|
private final CrawlPlan plan;
|
||||||
|
|
||||||
|
private ConvertCrawlDataAction(CrawlPlan plan, MqMessage message, MqSingleShotInbox inbox) {
|
||||||
|
super(message, inbox);
|
||||||
|
this.plan = plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(ConverterMain converterMain) throws Exception {
|
||||||
|
try {
|
||||||
|
converterMain.convert(plan);
|
||||||
|
ok();
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
err();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private ConvertRequest fetchInstructions() throws Exception {
|
private ConvertRequest fetchInstructions() throws Exception {
|
||||||
|
|
||||||
var inbox = messageQueueFactory.createSingleShotInbox(CONVERTER_INBOX, UUID.randomUUID());
|
var inbox = messageQueueFactory.createSingleShotInbox(CONVERTER_INBOX, UUID.randomUUID());
|
||||||
@ -188,14 +239,30 @@ public class ConverterMain {
|
|||||||
|
|
||||||
var request = gson.fromJson(msg.payload(), nu.marginalia.mqapi.converting.ConvertRequest.class);
|
var request = gson.fromJson(msg.payload(), nu.marginalia.mqapi.converting.ConvertRequest.class);
|
||||||
|
|
||||||
var crawlData = fileStorageService.getStorage(request.crawlStorage);
|
if (request.action == ConvertAction.ConvertCrawlData) {
|
||||||
var processData = fileStorageService.getStorage(request.processedDataStorage);
|
|
||||||
|
|
||||||
var plan = new CrawlPlan(null,
|
var crawlData = fileStorageService.getStorage(request.crawlStorage);
|
||||||
new CrawlPlan.WorkDir(crawlData.path(), "crawler.log"),
|
var processData = fileStorageService.getStorage(request.processedDataStorage);
|
||||||
new CrawlPlan.WorkDir(processData.path(), "processor.log"));
|
|
||||||
|
|
||||||
return new ConvertRequest(plan, msg, inbox);
|
var plan = new CrawlPlan(null,
|
||||||
|
new CrawlPlan.WorkDir(crawlData.path(), "crawler.log"),
|
||||||
|
new CrawlPlan.WorkDir(processData.path(), "processor.log"));
|
||||||
|
|
||||||
|
return new ConvertCrawlDataAction(plan, msg, inbox);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.action == ConvertAction.SideloadEncyclopedia) {
|
||||||
|
var processData = fileStorageService.getStorage(request.processedDataStorage);
|
||||||
|
var filePath = Path.of(request.inputSource);
|
||||||
|
|
||||||
|
return new SideloadAction(sideloadSourceFactory.sideloadEncyclopediaMarginaliaNu(filePath),
|
||||||
|
processData.asPath(),
|
||||||
|
msg, inbox);
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
throw new RuntimeException("Unknown action: " + request.action);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Optional<MqMessage> getMessage(MqSingleShotInbox inbox, String expectedFunction) throws SQLException, InterruptedException {
|
private Optional<MqMessage> getMessage(MqSingleShotInbox inbox, String expectedFunction) throws SQLException, InterruptedException {
|
||||||
|
@ -42,7 +42,7 @@ public class InstructionWriterFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public class InstructionWriter implements AutoCloseable {
|
public class InstructionWriter implements AutoCloseable {
|
||||||
private final OutputStreamWriter outputStream;
|
private final ObjectOutputStream outputStream;
|
||||||
private final String where;
|
private final String where;
|
||||||
private final SummarizingInterpreter summary = new SummarizingInterpreter();
|
private final SummarizingInterpreter summary = new SummarizingInterpreter();
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ public class InstructionWriterFactory {
|
|||||||
InstructionWriter(Path filename) throws IOException {
|
InstructionWriter(Path filename) throws IOException {
|
||||||
where = filename.getFileName().toString();
|
where = filename.getFileName().toString();
|
||||||
Files.deleteIfExists(filename);
|
Files.deleteIfExists(filename);
|
||||||
outputStream = new OutputStreamWriter(new ZstdOutputStream(new BufferedOutputStream(new FileOutputStream(filename.toFile()))));
|
outputStream = new ObjectOutputStream(new ZstdOutputStream(new FileOutputStream(filename.toFile())));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void accept(Instruction instruction) {
|
public void accept(Instruction instruction) {
|
||||||
@ -64,10 +64,12 @@ public class InstructionWriterFactory {
|
|||||||
size++;
|
size++;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
outputStream.append(instruction.tag().name());
|
outputStream.writeObject(instruction);
|
||||||
outputStream.append(' ');
|
|
||||||
gson.toJson(instruction, outputStream);
|
// Reset the stream to avoid keeping references to the objects
|
||||||
outputStream.append('\n');
|
// (as this will cause the memory usage to grow indefinitely when
|
||||||
|
// writing huge amounts of data)
|
||||||
|
outputStream.reset();
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
catch (IOException ex) {
|
||||||
logger.warn("IO exception writing instruction", ex);
|
logger.warn("IO exception writing instruction", ex);
|
||||||
|
@ -23,7 +23,7 @@ public class DocumentsCompiler {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void compileDocumentDetails(Consumer<Instruction> instructionConsumer, ProcessedDocument doc) {
|
public void compileDocumentDetails(Consumer<Instruction> instructionConsumer, ProcessedDocument doc) {
|
||||||
var details = doc.details;
|
var details = doc.details;
|
||||||
|
|
||||||
if (details != null) {
|
if (details != null) {
|
||||||
@ -31,7 +31,7 @@ public class DocumentsCompiler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void compileWords(Consumer<Instruction> instructionConsumer, ProcessedDocument doc) {
|
public void compileWords(Consumer<Instruction> instructionConsumer, ProcessedDocument doc) {
|
||||||
var words = doc.words;
|
var words = doc.words;
|
||||||
|
|
||||||
if (words != null) {
|
if (words != null) {
|
||||||
|
@ -40,4 +40,8 @@ public class DomainMetadataCompiler {
|
|||||||
instructionConsumer.accept(new LoadDomainMetadata(domain, knownUrls.size(), goodUrls, visitedUrls));
|
instructionConsumer.accept(new LoadDomainMetadata(domain, knownUrls.size(), goodUrls, visitedUrls));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void compileFake(Consumer<Instruction> instructionConsumer, EdgeDomain domain, int countAll, int countGood) {
|
||||||
|
instructionConsumer.accept(new LoadDomainMetadata(domain, countAll, countGood, countAll));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,11 +3,15 @@ package nu.marginalia.converting.compiler;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.converting.instruction.Instruction;
|
import nu.marginalia.converting.instruction.Instruction;
|
||||||
import nu.marginalia.converting.instruction.instructions.LoadProcessedDomain;
|
import nu.marginalia.converting.instruction.instructions.LoadProcessedDomain;
|
||||||
|
import nu.marginalia.converting.model.ProcessedDocument;
|
||||||
import nu.marginalia.converting.model.ProcessedDomain;
|
import nu.marginalia.converting.model.ProcessedDomain;
|
||||||
|
import nu.marginalia.converting.sideload.SideloadSource;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.Iterator;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
import static java.util.Objects.requireNonNullElse;
|
import static java.util.Objects.requireNonNullElse;
|
||||||
@ -20,6 +24,8 @@ public class InstructionsCompiler {
|
|||||||
private final LinksCompiler linksCompiler;
|
private final LinksCompiler linksCompiler;
|
||||||
private final RedirectCompiler redirectCompiler;
|
private final RedirectCompiler redirectCompiler;
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(InstructionsCompiler.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public InstructionsCompiler(UrlsCompiler urlsCompiler,
|
public InstructionsCompiler(UrlsCompiler urlsCompiler,
|
||||||
DocumentsCompiler documentsCompiler,
|
DocumentsCompiler documentsCompiler,
|
||||||
@ -53,4 +59,35 @@ public class InstructionsCompiler {
|
|||||||
|
|
||||||
domainMetadataCompiler.compile(instructionConsumer, domain.domain, requireNonNullElse(domain.documents, Collections.emptyList()));
|
domainMetadataCompiler.compile(instructionConsumer, domain.domain, requireNonNullElse(domain.documents, Collections.emptyList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void compileStreaming(SideloadSource sideloadSource,
|
||||||
|
Consumer<Instruction> instructionConsumer) {
|
||||||
|
ProcessedDomain domain = sideloadSource.getDomain();
|
||||||
|
Iterator<EdgeUrl> urlsIterator = sideloadSource.getUrlsIterator();
|
||||||
|
Iterator<ProcessedDocument> documentsIterator = sideloadSource.getDocumentsStream();
|
||||||
|
|
||||||
|
// Guaranteed to always be first
|
||||||
|
instructionConsumer.accept(new LoadProcessedDomain(domain.domain, domain.state, domain.ip));
|
||||||
|
|
||||||
|
int countAll = 0;
|
||||||
|
int countGood = 0;
|
||||||
|
|
||||||
|
logger.info("Writing domains");
|
||||||
|
urlsCompiler.compileJustDomain(instructionConsumer, domain.domain);
|
||||||
|
logger.info("Writing urls");
|
||||||
|
urlsCompiler.compileJustUrls(instructionConsumer, urlsIterator);
|
||||||
|
|
||||||
|
logger.info("Writing docs");
|
||||||
|
|
||||||
|
while (documentsIterator.hasNext()) {
|
||||||
|
var doc = documentsIterator.next();
|
||||||
|
countAll++;
|
||||||
|
if (doc.isOk()) countGood++;
|
||||||
|
|
||||||
|
documentsCompiler.compileDocumentDetails(instructionConsumer, doc);
|
||||||
|
documentsCompiler.compileWords(instructionConsumer, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
domainMetadataCompiler.compileFake(instructionConsumer, domain.domain, countAll, countGood);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,10 +9,7 @@ import nu.marginalia.model.EdgeUrl;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
public class UrlsCompiler {
|
public class UrlsCompiler {
|
||||||
@ -58,4 +55,20 @@ public class UrlsCompiler {
|
|||||||
instructionConsumer.accept(new LoadUrl(seenUrls.toArray(EdgeUrl[]::new)));
|
instructionConsumer.accept(new LoadUrl(seenUrls.toArray(EdgeUrl[]::new)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void compileJustUrls(Consumer<Instruction> instructionConsumer, Iterator<EdgeUrl> urlsIterator) {
|
||||||
|
var urls = new ArrayList<EdgeUrl>(1000);
|
||||||
|
|
||||||
|
while (urlsIterator.hasNext()) {
|
||||||
|
if (urls.size() >= 1000) {
|
||||||
|
instructionConsumer.accept(new LoadUrl(urls.toArray(EdgeUrl[]::new)));
|
||||||
|
urls.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
urls.add(urlsIterator.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void compileJustDomain(Consumer<Instruction> instructionConsumer, EdgeDomain domain) {
|
||||||
|
instructionConsumer.accept(new LoadDomain(domain));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,8 @@ public class DomainProcessor {
|
|||||||
@Inject
|
@Inject
|
||||||
public DomainProcessor(DocumentProcessor documentProcessor,
|
public DomainProcessor(DocumentProcessor documentProcessor,
|
||||||
SiteWords siteWords,
|
SiteWords siteWords,
|
||||||
LshDocumentDeduplicator documentDeduplicator) {
|
LshDocumentDeduplicator documentDeduplicator)
|
||||||
|
{
|
||||||
this.documentProcessor = documentProcessor;
|
this.documentProcessor = documentProcessor;
|
||||||
this.siteWords = siteWords;
|
this.siteWords = siteWords;
|
||||||
this.documentDeduplicator = documentDeduplicator;
|
this.documentDeduplicator = documentDeduplicator;
|
||||||
|
@ -0,0 +1,247 @@
|
|||||||
|
package nu.marginalia.converting.sideload;
|
||||||
|
|
||||||
|
import com.github.luben.zstd.ZstdInputStream;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
import nu.marginalia.converting.model.DisqualifiedException;
|
||||||
|
import nu.marginalia.converting.model.ProcessedDocument;
|
||||||
|
import nu.marginalia.converting.model.ProcessedDomain;
|
||||||
|
import nu.marginalia.converting.processor.plugin.HtmlDocumentProcessorPlugin;
|
||||||
|
import nu.marginalia.crawling.model.CrawledDocument;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
|
import nu.marginalia.model.crawl.UrlIndexingState;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.*;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.Semaphore;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
/** This is an experimental sideloader for encyclopedia.marginalia.nu's database;
|
||||||
|
* (which serves as a way of loading wikipedia's zim files without binding to GPL2'd code)
|
||||||
|
*
|
||||||
|
* See https://github.com/MarginaliaSearch/encyclopedia.marginalia.nu for extracting the data
|
||||||
|
*/
|
||||||
|
public class EncyclopediaMarginaliaNuSideloader implements SideloadSource, AutoCloseable {
|
||||||
|
|
||||||
|
private final Connection connection;
|
||||||
|
private final Gson gson;
|
||||||
|
private final HtmlDocumentProcessorPlugin htmlProcessorPlugin;
|
||||||
|
|
||||||
|
public EncyclopediaMarginaliaNuSideloader(Path pathToDbFile,
|
||||||
|
Gson gson,
|
||||||
|
HtmlDocumentProcessorPlugin htmlProcessorPlugin) throws SQLException {
|
||||||
|
this.gson = gson;
|
||||||
|
this.htmlProcessorPlugin = htmlProcessorPlugin;
|
||||||
|
String sqliteDbString = "jdbc:sqlite:" + pathToDbFile.toString();
|
||||||
|
|
||||||
|
connection = DriverManager.getConnection(sqliteDbString);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ProcessedDomain getDomain() {
|
||||||
|
var ret = new ProcessedDomain();
|
||||||
|
|
||||||
|
ret.domain = new EdgeDomain("encyclopedia.marginalia.nu");
|
||||||
|
ret.id = "encyclopedia.marginalia.nu";
|
||||||
|
ret.ip = "127.0.0.1";
|
||||||
|
ret.state = DomainIndexingState.ACTIVE;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@SneakyThrows
|
||||||
|
public Iterator<EdgeUrl> getUrlsIterator() {
|
||||||
|
EdgeUrl base = new EdgeUrl("https://encyclopedia.marginalia.nu/");
|
||||||
|
|
||||||
|
return new SqlQueryIterator<>(connection.prepareStatement("""
|
||||||
|
SELECT url, html FROM articles
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public EdgeUrl convert(ResultSet rs) throws Exception {
|
||||||
|
var path = URLEncoder.encode(rs.getString("url"), StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
return base.withPathAndParam("/article/"+path, null);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
@Override
|
||||||
|
public Iterator<ProcessedDocument> getDocumentsStream() {
|
||||||
|
LinkedBlockingQueue<ProcessedDocument> docs = new LinkedBlockingQueue<>(32);
|
||||||
|
AtomicBoolean isFinished = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
ExecutorService executorService = Executors.newFixedThreadPool(16);
|
||||||
|
Semaphore sem = new Semaphore(16);
|
||||||
|
|
||||||
|
executorService.submit(() -> {
|
||||||
|
try {
|
||||||
|
var stmt = connection.prepareStatement("""
|
||||||
|
SELECT url,title,html FROM articles
|
||||||
|
""");
|
||||||
|
stmt.setFetchSize(100);
|
||||||
|
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
var articleParts = fromCompressedJson(rs.getBytes("html"), ArticleParts.class);
|
||||||
|
String title = rs.getString("title");
|
||||||
|
String url = rs.getString("url");
|
||||||
|
|
||||||
|
sem.acquire();
|
||||||
|
|
||||||
|
executorService.submit(() -> {
|
||||||
|
try {
|
||||||
|
docs.add(convertDocument(articleParts.parts, title, url));
|
||||||
|
} catch (URISyntaxException | DisqualifiedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
sem.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.close();
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
isFinished.set(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return new Iterator<>() {
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !isFinished.get() || !docs.isEmpty() || sem.availablePermits() < 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
@Override
|
||||||
|
public ProcessedDocument next() {
|
||||||
|
return docs.take();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private ProcessedDocument convertDocument(List<String> parts, String title, String url) throws URISyntaxException, DisqualifiedException {
|
||||||
|
String fullUrl = "https://encyclopedia.marginalia.nu/article/"+url;
|
||||||
|
|
||||||
|
StringBuilder fullHtml = new StringBuilder();
|
||||||
|
fullHtml.append("<!DOCTYPE html><html><head><title>").append(title).append("</title></head><body>");
|
||||||
|
for (String part : parts) {
|
||||||
|
fullHtml.append("<p>");
|
||||||
|
fullHtml.append(part);
|
||||||
|
fullHtml.append("</p>");
|
||||||
|
}
|
||||||
|
fullHtml.append("</body></html>");
|
||||||
|
|
||||||
|
var crawledDoc = new CrawledDocument(
|
||||||
|
"encyclopedia.marginalia.nu",
|
||||||
|
fullUrl,
|
||||||
|
"text/html",
|
||||||
|
LocalDateTime.now().toString(),
|
||||||
|
200,
|
||||||
|
"OK",
|
||||||
|
"NP",
|
||||||
|
"",
|
||||||
|
fullHtml.toString(),
|
||||||
|
Integer.toHexString(fullHtml.hashCode()),
|
||||||
|
fullUrl,
|
||||||
|
"",
|
||||||
|
"SIDELOAD"
|
||||||
|
);
|
||||||
|
|
||||||
|
var ret = new ProcessedDocument();
|
||||||
|
try {
|
||||||
|
var details = htmlProcessorPlugin.createDetails(crawledDoc);
|
||||||
|
|
||||||
|
ret.words = details.words();
|
||||||
|
ret.details = details.details();
|
||||||
|
ret.url = new EdgeUrl(fullUrl);
|
||||||
|
ret.state = UrlIndexingState.OK;
|
||||||
|
ret.stateReason = "SIDELOAD";
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
ret.url = new EdgeUrl(fullUrl);
|
||||||
|
ret.state = UrlIndexingState.DISQUALIFIED;
|
||||||
|
ret.stateReason = "SIDELOAD";
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T fromCompressedJson(byte[] stream, Class<T> type) throws IOException {
|
||||||
|
return gson.fromJson(new InputStreamReader(new ZstdInputStream(new ByteArrayInputStream(stream))), type);
|
||||||
|
}
|
||||||
|
|
||||||
|
private record ArticleParts(List<String> parts) {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getId() {
|
||||||
|
return "encyclopedia.marginalia.nu";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws Exception {
|
||||||
|
connection.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private abstract static class SqlQueryIterator<T> implements Iterator<T> {
|
||||||
|
PreparedStatement stmt;
|
||||||
|
ResultSet rs;
|
||||||
|
T next = null;
|
||||||
|
|
||||||
|
public SqlQueryIterator(PreparedStatement stmt) throws SQLException {
|
||||||
|
this.stmt = stmt;
|
||||||
|
stmt.setFetchSize(1000);
|
||||||
|
rs = stmt.executeQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (next != null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!rs.next()) {
|
||||||
|
stmt.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
next = convert(rs);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract T convert(ResultSet rs) throws Exception;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T next () {
|
||||||
|
if (!hasNext())
|
||||||
|
throw new IllegalStateException("No next element");
|
||||||
|
var ret = next;
|
||||||
|
next = null;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
package nu.marginalia.converting.sideload;
|
||||||
|
|
||||||
|
import nu.marginalia.converting.model.ProcessedDocument;
|
||||||
|
import nu.marginalia.converting.model.ProcessedDomain;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
public interface SideloadSource {
|
||||||
|
ProcessedDomain getDomain();
|
||||||
|
Iterator<EdgeUrl> getUrlsIterator();
|
||||||
|
Iterator<ProcessedDocument> getDocumentsStream();
|
||||||
|
|
||||||
|
String getId();
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
package nu.marginalia.converting.sideload;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.converting.processor.plugin.HtmlDocumentProcessorPlugin;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
public class SideloadSourceFactory {
|
||||||
|
private final Gson gson;
|
||||||
|
private final HtmlDocumentProcessorPlugin htmlProcessorPlugin;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SideloadSourceFactory(Gson gson, HtmlDocumentProcessorPlugin htmlProcessorPlugin) {
|
||||||
|
this.gson = gson;
|
||||||
|
this.htmlProcessorPlugin = htmlProcessorPlugin;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SideloadSource sideloadEncyclopediaMarginaliaNu(Path pathToDbFile) throws SQLException {
|
||||||
|
return new EncyclopediaMarginaliaNuSideloader(pathToDbFile, gson, htmlProcessorPlugin);
|
||||||
|
}
|
||||||
|
}
|
@ -2,10 +2,8 @@ package nu.marginalia.loading;
|
|||||||
|
|
||||||
import com.github.luben.zstd.ZstdInputStream;
|
import com.github.luben.zstd.ZstdInputStream;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.gson.JsonParseException;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.converting.instruction.Instruction;
|
import nu.marginalia.converting.instruction.Instruction;
|
||||||
import nu.marginalia.converting.instruction.InstructionTag;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -13,6 +11,7 @@ import javax.inject.Inject;
|
|||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class ConvertedDomainReader {
|
public class ConvertedDomainReader {
|
||||||
@ -27,30 +26,48 @@ public class ConvertedDomainReader {
|
|||||||
public List<Instruction> read(Path path, int cntHint) throws IOException {
|
public List<Instruction> read(Path path, int cntHint) throws IOException {
|
||||||
List<Instruction> ret = new ArrayList<>(cntHint);
|
List<Instruction> ret = new ArrayList<>(cntHint);
|
||||||
|
|
||||||
try (var br = new BufferedReader(new InputStreamReader(new ZstdInputStream(new BufferedInputStream(new FileInputStream(path.toFile())))))) {
|
try (var or = new ObjectInputStream(new ZstdInputStream(new FileInputStream(path.toFile())))) {
|
||||||
String line;
|
var object = or.readObject();
|
||||||
for (;;) {
|
if (object instanceof Instruction is) {
|
||||||
line = br.readLine();
|
ret.add(is);
|
||||||
|
|
||||||
if (line == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (line.isBlank()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
var parts= line.split(" ", 2);
|
|
||||||
var type = InstructionTag.valueOf(parts[0]).clazz;
|
|
||||||
|
|
||||||
try {
|
|
||||||
ret.add(gson.fromJson(parts[1], type));
|
|
||||||
}
|
|
||||||
catch (NullPointerException|JsonParseException ex) {
|
|
||||||
logger.warn("Failed to deserialize {} {}", type.getSimpleName(), StringUtils.abbreviate(parts[1], 255));
|
|
||||||
logger.warn("Json error", ex);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} catch (ClassNotFoundException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Iterator<Instruction> createIterator(Path path) throws IOException {
|
||||||
|
var or = new ObjectInputStream(new ZstdInputStream(new BufferedInputStream(new FileInputStream(path.toFile()))));
|
||||||
|
|
||||||
|
return new Iterator<>() {
|
||||||
|
Instruction next;
|
||||||
|
@SneakyThrows
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (next != null)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
next = (Instruction) or.readObject();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (java.io.EOFException ex) {
|
||||||
|
or.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Instruction next() {
|
||||||
|
if (next != null || hasNext()) {
|
||||||
|
var ret = next;
|
||||||
|
next = null;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.List;
|
import java.util.Iterator;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
@ -55,8 +55,13 @@ public class LoaderMain {
|
|||||||
);
|
);
|
||||||
|
|
||||||
var instance = injector.getInstance(LoaderMain.class);
|
var instance = injector.getInstance(LoaderMain.class);
|
||||||
var instructions = instance.fetchInstructions();
|
try {
|
||||||
instance.run(instructions);
|
var instructions = instance.fetchInstructions();
|
||||||
|
instance.run(instructions);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Error running loader", ex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@ -101,7 +106,19 @@ public class LoaderMain {
|
|||||||
for (var entry : WorkLog.iterable(logFile)) {
|
for (var entry : WorkLog.iterable(logFile)) {
|
||||||
heartbeat.setProgress(loaded++ / (double) loadTotal);
|
heartbeat.setProgress(loaded++ / (double) loadTotal);
|
||||||
|
|
||||||
load(plan, entry.path(), entry.cnt());
|
var loader = loaderFactory.create(entry.cnt());
|
||||||
|
Path destDir = plan.getProcessedFilePath(entry.path());
|
||||||
|
|
||||||
|
var instructionsIter = instructionsReader.createIterator(destDir);
|
||||||
|
while (instructionsIter.hasNext()) {
|
||||||
|
var next = instructionsIter.next();
|
||||||
|
try {
|
||||||
|
next.apply(loader);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Failed to load instruction {}", next);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
running = false;
|
running = false;
|
||||||
@ -110,6 +127,7 @@ public class LoaderMain {
|
|||||||
|
|
||||||
// This needs to be done in order to have a readable index journal
|
// This needs to be done in order to have a readable index journal
|
||||||
indexLoadKeywords.close();
|
indexLoadKeywords.close();
|
||||||
|
logger.info("Loading finished");
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
logger.error("Failed to load", ex);
|
logger.error("Failed to load", ex);
|
||||||
@ -119,6 +137,7 @@ public class LoaderMain {
|
|||||||
finally {
|
finally {
|
||||||
heartbeat.shutDown();
|
heartbeat.shutDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +147,7 @@ public class LoaderMain {
|
|||||||
Path destDir = plan.getProcessedFilePath(path);
|
Path destDir = plan.getProcessedFilePath(path);
|
||||||
try {
|
try {
|
||||||
var loader = loaderFactory.create(cnt);
|
var loader = loaderFactory.create(cnt);
|
||||||
var instructions = instructionsReader.read(destDir, cnt);
|
var instructions = instructionsReader.createIterator(destDir);
|
||||||
processQueue.put(new LoadJob(path, loader, instructions));
|
processQueue.put(new LoadJob(path, loader, instructions));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed to load " + destDir, e);
|
logger.error("Failed to load " + destDir, e);
|
||||||
@ -137,15 +156,16 @@ public class LoaderMain {
|
|||||||
|
|
||||||
static final TaskStats taskStats = new TaskStats(100);
|
static final TaskStats taskStats = new TaskStats(100);
|
||||||
|
|
||||||
private record LoadJob(String path, Loader loader, List<Instruction> instructionList) {
|
private record LoadJob(String path, Loader loader, Iterator<Instruction> instructionIterator) {
|
||||||
public void run() {
|
public void run() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
for (var i : instructionList) {
|
while (instructionIterator.hasNext()) {
|
||||||
|
var next = instructionIterator.next();
|
||||||
try {
|
try {
|
||||||
i.apply(loader);
|
next.apply(loader);
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
logger.error("Failed to load instruction {}", i);
|
logger.error("Failed to load instruction {}", next);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,40 +86,24 @@ public class Loader implements Interpreter {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void loadProcessedDocument(LoadProcessedDocument document) {
|
public void loadProcessedDocument(LoadProcessedDocument document) {
|
||||||
deferralCheck(document.url());
|
|
||||||
|
|
||||||
processedDocumentList.add(document);
|
processedDocumentList.add(document);
|
||||||
|
if (processedDocumentList.size() > 100) {
|
||||||
|
sqlLoadProcessedDocument.load(data, processedDocumentList);
|
||||||
|
processedDocumentList.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void loadProcessedDocumentWithError(LoadProcessedDocumentWithError document) {
|
public void loadProcessedDocumentWithError(LoadProcessedDocumentWithError document) {
|
||||||
deferralCheck(document.url());
|
|
||||||
|
|
||||||
processedDocumentWithErrorList.add(document);
|
processedDocumentWithErrorList.add(document);
|
||||||
}
|
if (processedDocumentWithErrorList.size() > 100) {
|
||||||
|
sqlLoadProcessedDocument.loadWithError(data, processedDocumentWithErrorList);
|
||||||
private void deferralCheck(EdgeUrl url) {
|
processedDocumentWithErrorList.clear();
|
||||||
if (data.getDomainId(url.domain) <= 0)
|
}
|
||||||
deferredDomains.add(url.domain);
|
|
||||||
|
|
||||||
if (data.getUrlId(url) <= 0)
|
|
||||||
deferredUrls.add(url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void loadKeywords(EdgeUrl url, DocumentMetadata metadata, DocumentKeywords words) {
|
public void loadKeywords(EdgeUrl url, DocumentMetadata metadata, DocumentKeywords words) {
|
||||||
// This is a bit of a bandaid safeguard against a bug in
|
|
||||||
// in the converter, shouldn't be necessary in the future
|
|
||||||
if (!deferredDomains.isEmpty()) {
|
|
||||||
loadDomain(deferredDomains.toArray(EdgeDomain[]::new));
|
|
||||||
deferredDomains.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!deferredUrls.isEmpty()) {
|
|
||||||
loadUrl(deferredUrls.toArray(EdgeUrl[]::new));
|
|
||||||
deferredUrls.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
indexLoadKeywords.load(data, url, metadata, words);
|
indexLoadKeywords.load(data, url, metadata, words);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
@ -140,8 +124,12 @@ public class Loader implements Interpreter {
|
|||||||
public void finish() {
|
public void finish() {
|
||||||
// Some work needs to be processed out of order for the database relations to work out
|
// Some work needs to be processed out of order for the database relations to work out
|
||||||
|
|
||||||
sqlLoadProcessedDocument.load(data, processedDocumentList);
|
if (processedDocumentList.size() > 0) {
|
||||||
sqlLoadProcessedDocument.loadWithError(data, processedDocumentWithErrorList);
|
sqlLoadProcessedDocument.load(data, processedDocumentList);
|
||||||
|
}
|
||||||
|
if (processedDocumentWithErrorList.size() > 0) {
|
||||||
|
sqlLoadProcessedDocument.loadWithError(data, processedDocumentWithErrorList);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -73,7 +73,7 @@ public class SqlLoadProcessedDocument {
|
|||||||
int urlId = data.getUrlId(doc.url());
|
int urlId = data.getUrlId(doc.url());
|
||||||
if (urlId <= 0) {
|
if (urlId <= 0) {
|
||||||
logger.warn("Failed to resolve ID for URL {}", doc.url());
|
logger.warn("Failed to resolve ID for URL {}", doc.url());
|
||||||
return;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
stmt.setInt(1, urlId);
|
stmt.setInt(1, urlId);
|
||||||
|
@ -14,12 +14,14 @@ import java.sql.SQLException;
|
|||||||
public class SqlLoadProcessedDomain {
|
public class SqlLoadProcessedDomain {
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final SqlLoadDomains loadDomains;
|
private final SqlLoadDomains loadDomains;
|
||||||
|
private final SqlLoadUrls loadUrls;
|
||||||
private static final Logger logger = LoggerFactory.getLogger(SqlLoadProcessedDomain.class);
|
private static final Logger logger = LoggerFactory.getLogger(SqlLoadProcessedDomain.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SqlLoadProcessedDomain(HikariDataSource dataSource, SqlLoadDomains loadDomains) {
|
public SqlLoadProcessedDomain(HikariDataSource dataSource, SqlLoadDomains loadDomains, SqlLoadUrls loadUrls) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.loadDomains = loadDomains;
|
this.loadDomains = loadDomains;
|
||||||
|
this.loadUrls = loadUrls;
|
||||||
|
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection()) {
|
try (var conn = dataSource.getConnection()) {
|
||||||
@ -34,7 +36,7 @@ public class SqlLoadProcessedDomain {
|
|||||||
BEGIN
|
BEGIN
|
||||||
DELETE FROM DOMAIN_METADATA WHERE ID=DID;
|
DELETE FROM DOMAIN_METADATA WHERE ID=DID;
|
||||||
DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=DID;
|
DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=DID;
|
||||||
DELETE FROM EC_URL WHERE DOMAIN_ID = DID;
|
DELETE FROM EC_PAGE_DATA WHERE ID IN (SELECT ID FROM EC_URL WHERE DOMAIN_ID = DID);
|
||||||
UPDATE EC_DOMAIN SET INDEX_DATE=NOW(), STATE=ST, DOMAIN_ALIAS=NULL, INDEXED=GREATEST(INDEXED,IDX), IP=IP WHERE ID=DID;
|
UPDATE EC_DOMAIN SET INDEX_DATE=NOW(), STATE=ST, DOMAIN_ALIAS=NULL, INDEXED=GREATEST(INDEXED,IDX), IP=IP WHERE ID=DID;
|
||||||
DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=DID;
|
DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=DID;
|
||||||
END
|
END
|
||||||
@ -47,6 +49,7 @@ public class SqlLoadProcessedDomain {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void load(LoaderData data, EdgeDomain domain, DomainIndexingState state, String ip) {
|
public void load(LoaderData data, EdgeDomain domain, DomainIndexingState state, String ip) {
|
||||||
|
|
||||||
data.setTargetDomain(domain);
|
data.setTargetDomain(domain);
|
||||||
|
|
||||||
loadDomains.load(data, domain);
|
loadDomains.load(data, domain);
|
||||||
@ -63,6 +66,8 @@ public class SqlLoadProcessedDomain {
|
|||||||
if (rc < 1) {
|
if (rc < 1) {
|
||||||
logger.warn("load({},{}) -- bad rowcount {}", domain, state, rc);
|
logger.warn("load({},{}) -- bad rowcount {}", domain, state, rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
loadUrls.loadUrlsForDomain(data, domain, 0);
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error initializing domain", ex);
|
logger.warn("SQL error initializing domain", ex);
|
||||||
|
@ -30,23 +30,35 @@ public class SqlLoadUrls {
|
|||||||
public void load(LoaderData data, EdgeUrl[] urls) {
|
public void load(LoaderData data, EdgeUrl[] urls) {
|
||||||
Set<EdgeDomain> affectedDomains = new HashSet<>();
|
Set<EdgeDomain> affectedDomains = new HashSet<>();
|
||||||
|
|
||||||
|
if (urls.length == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int maxOldId = 0;
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var insertCall = conn.prepareStatement("INSERT IGNORE INTO EC_URL (PROTO,DOMAIN_ID,PORT,PATH,PARAM,PATH_HASH) VALUES (?,?,?,?,?,?)");
|
var insertCall = conn.prepareStatement("INSERT IGNORE INTO EC_URL (PROTO,DOMAIN_ID,PORT,PATH,PARAM,PATH_HASH) VALUES (?,?,?,?,?,?)");
|
||||||
var queryCall = conn.prepareStatement("SELECT ID, PROTO, PATH, PARAM FROM EC_URL WHERE DOMAIN_ID=?")
|
var queryMaxId = conn.prepareStatement("SELECT MAX(ID) FROM EC_URL"))
|
||||||
)
|
|
||||||
{
|
{
|
||||||
conn.setAutoCommit(false);
|
conn.setAutoCommit(false);
|
||||||
|
var rs = queryMaxId.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
maxOldId = rs.getInt(1);
|
||||||
|
}
|
||||||
|
|
||||||
int cnt = 0; int batchOffset = 0;
|
int cnt = 0; int batchOffset = 0;
|
||||||
|
|
||||||
for (var url : urls) {
|
for (var url : urls) {
|
||||||
|
if (data.getUrlId(url) != 0)
|
||||||
|
continue;
|
||||||
if (url.path.length() >= 255) {
|
if (url.path.length() >= 255) {
|
||||||
logger.debug("Skipping bad URL {}", url);
|
logger.info("Skipping bad URL {}", url);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
var domainId = data.getDomainId(url.domain);
|
||||||
|
|
||||||
affectedDomains.add(url.domain);
|
affectedDomains.add(url.domain);
|
||||||
|
|
||||||
insertCall.setString(1, url.proto);
|
insertCall.setString(1, url.proto);
|
||||||
insertCall.setInt(2, data.getDomainId(url.domain));
|
insertCall.setInt(2, domainId);
|
||||||
if (url.port != null) {
|
if (url.port != null) {
|
||||||
insertCall.setInt(3, url.port);
|
insertCall.setInt(3, url.port);
|
||||||
}
|
}
|
||||||
@ -58,10 +70,8 @@ public class SqlLoadUrls {
|
|||||||
insertCall.setLong(6, hashPath(url.path, url.param));
|
insertCall.setLong(6, hashPath(url.path, url.param));
|
||||||
insertCall.addBatch();
|
insertCall.addBatch();
|
||||||
|
|
||||||
if (cnt++ == 1000) {
|
if (++cnt == 1000) {
|
||||||
var ret = insertCall.executeBatch();
|
var ret = insertCall.executeBatch();
|
||||||
conn.commit();
|
|
||||||
|
|
||||||
for (int rv = 0; rv < cnt; rv++) {
|
for (int rv = 0; rv < cnt; rv++) {
|
||||||
if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) {
|
if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) {
|
||||||
logger.warn("load({}) -- bad row count {}", urls[batchOffset + rv], ret[rv]);
|
logger.warn("load({}) -- bad row count {}", urls[batchOffset + rv], ret[rv]);
|
||||||
@ -72,10 +82,9 @@ public class SqlLoadUrls {
|
|||||||
cnt = 0;
|
cnt = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cnt > 0) {
|
if (cnt > 0) {
|
||||||
var ret = insertCall.executeBatch();
|
var ret = insertCall.executeBatch();
|
||||||
conn.commit();
|
|
||||||
|
|
||||||
for (int rv = 0; rv < cnt; rv++) {
|
for (int rv = 0; rv < cnt; rv++) {
|
||||||
if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) {
|
if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) {
|
||||||
logger.warn("load({}) -- bad row count {}", urls[batchOffset + rv], ret[rv]);
|
logger.warn("load({}) -- bad row count {}", urls[batchOffset + rv], ret[rv]);
|
||||||
@ -83,24 +92,12 @@ public class SqlLoadUrls {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
conn.commit();
|
||||||
conn.setAutoCommit(true);
|
conn.setAutoCommit(true);
|
||||||
|
|
||||||
|
|
||||||
for (var domain : affectedDomains) {
|
for (var domain : affectedDomains) {
|
||||||
queryCall.setInt(1, data.getDomainId(domain));
|
loadUrlsForDomain(data, domain, maxOldId);
|
||||||
var rsp = queryCall.executeQuery();
|
|
||||||
rsp.setFetchSize(1000);
|
|
||||||
|
|
||||||
while (rsp.next()) {
|
|
||||||
int urlId = rsp.getInt(1);
|
|
||||||
String proto = rsp.getString(2);
|
|
||||||
String path = rsp.getString(3);
|
|
||||||
String param = rsp.getString(4);
|
|
||||||
|
|
||||||
data.addUrl(new EdgeUrl(proto, domain, null, path, param), urlId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting URLs", ex);
|
logger.warn("SQL error inserting URLs", ex);
|
||||||
@ -121,4 +118,27 @@ public class SqlLoadUrls {
|
|||||||
return pathHash + murmur3_128.hashString(queryParam, StandardCharsets.UTF_8).padToLong();
|
return pathHash + murmur3_128.hashString(queryParam, StandardCharsets.UTF_8).padToLong();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Loads urlIDs for the domain into `data` from the database, starting at URL ID minId. */
|
||||||
|
public void loadUrlsForDomain(LoaderData data, EdgeDomain domain, int minId) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var queryCall = conn.prepareStatement("SELECT ID, PROTO, PATH, PARAM FROM EC_URL WHERE DOMAIN_ID=? AND ID > ?")) {
|
||||||
|
|
||||||
|
queryCall.setInt(1, data.getDomainId(domain));
|
||||||
|
queryCall.setInt(2, minId);
|
||||||
|
|
||||||
|
var rsp = queryCall.executeQuery();
|
||||||
|
rsp.setFetchSize(1000);
|
||||||
|
|
||||||
|
while (rsp.next()) {
|
||||||
|
int urlId = rsp.getInt(1);
|
||||||
|
String proto = rsp.getString(2);
|
||||||
|
String path = rsp.getString(3);
|
||||||
|
String param = rsp.getString(4);
|
||||||
|
|
||||||
|
data.addUrl(new EdgeUrl(proto, domain, null, path, param), urlId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import nu.marginalia.loading.loader.LoaderData;
|
|||||||
import nu.marginalia.loading.loader.SqlLoadDomains;
|
import nu.marginalia.loading.loader.SqlLoadDomains;
|
||||||
import nu.marginalia.loading.loader.SqlLoadProcessedDomain;
|
import nu.marginalia.loading.loader.SqlLoadProcessedDomain;
|
||||||
import nu.marginalia.converting.instruction.instructions.DomainLink;
|
import nu.marginalia.converting.instruction.instructions.DomainLink;
|
||||||
|
import nu.marginalia.loading.loader.SqlLoadUrls;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
@ -50,18 +51,18 @@ class SqlLoadProcessedDomainTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadProcessedDomain() {
|
public void loadProcessedDomain() {
|
||||||
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource));
|
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource), new SqlLoadUrls(dataSource));
|
||||||
loader.load(loaderData, new EdgeDomain("www.marginalia.nu"), DomainIndexingState.BLOCKED, "127.0.0.1");
|
loader.load(loaderData, new EdgeDomain("www.marginalia.nu"), DomainIndexingState.BLOCKED, "127.0.0.1");
|
||||||
}
|
}
|
||||||
@Test
|
@Test
|
||||||
public void loadProcessedDomainTwice() {
|
public void loadProcessedDomainTwice() {
|
||||||
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource));
|
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource), new SqlLoadUrls(dataSource));
|
||||||
loader.load(loaderData, new EdgeDomain("www.marginalia.nu"), DomainIndexingState.BLOCKED, "127.0.0.1");
|
loader.load(loaderData, new EdgeDomain("www.marginalia.nu"), DomainIndexingState.BLOCKED, "127.0.0.1");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadProcessedDomaiWithExtremelyLongIP() {
|
public void loadProcessedDomaiWithExtremelyLongIP() {
|
||||||
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource));
|
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource), new SqlLoadUrls(dataSource));
|
||||||
|
|
||||||
String ip = Stream.generate(() -> "127.").limit(1024).collect(Collectors.joining());
|
String ip = Stream.generate(() -> "127.").limit(1024).collect(Collectors.joining());
|
||||||
|
|
||||||
@ -70,7 +71,7 @@ class SqlLoadProcessedDomainTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDomainAlias() {
|
public void loadDomainAlias() {
|
||||||
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource));
|
var loader = new SqlLoadProcessedDomain(dataSource, new SqlLoadDomains(dataSource), new SqlLoadUrls(dataSource));
|
||||||
loader.loadAlias(loaderData, new DomainLink(new EdgeDomain("memex.marginalia.nu"), new EdgeDomain("www.marginalia.nu")));
|
loader.loadAlias(loaderData, new DomainLink(new EdgeDomain("memex.marginalia.nu"), new EdgeDomain("www.marginalia.nu")));
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -10,6 +10,7 @@ import nu.marginalia.control.svc.ProcessOutboxFactory;
|
|||||||
import nu.marginalia.control.svc.ProcessService;
|
import nu.marginalia.control.svc.ProcessService;
|
||||||
import nu.marginalia.index.client.IndexClient;
|
import nu.marginalia.index.client.IndexClient;
|
||||||
import nu.marginalia.index.client.IndexMqEndpoints;
|
import nu.marginalia.index.client.IndexMqEndpoints;
|
||||||
|
import nu.marginalia.mqapi.converting.ConvertAction;
|
||||||
import nu.marginalia.mqapi.converting.ConvertRequest;
|
import nu.marginalia.mqapi.converting.ConvertRequest;
|
||||||
import nu.marginalia.mqapi.loading.LoadRequest;
|
import nu.marginalia.mqapi.loading.LoadRequest;
|
||||||
import nu.marginalia.db.storage.FileStorageService;
|
import nu.marginalia.db.storage.FileStorageService;
|
||||||
@ -121,7 +122,10 @@ public class ReconvertAndLoadActor extends AbstractStateGraph {
|
|||||||
storageService.relateFileStorages(toProcess.id(), processedArea.id());
|
storageService.relateFileStorages(toProcess.id(), processedArea.id());
|
||||||
|
|
||||||
// Pre-send convert request
|
// Pre-send convert request
|
||||||
var request = new ConvertRequest(message.crawlStorageId, processedArea.id());
|
var request = new ConvertRequest(ConvertAction.ConvertCrawlData,
|
||||||
|
null,
|
||||||
|
message.crawlStorageId,
|
||||||
|
processedArea.id());
|
||||||
long id = mqConverterOutbox.sendAsync(ConvertRequest.class.getSimpleName(), gson.toJson(request));
|
long id = mqConverterOutbox.sendAsync(ConvertRequest.class.getSimpleName(), gson.toJson(request));
|
||||||
|
|
||||||
return message
|
return message
|
||||||
|
@ -175,6 +175,8 @@ dependencyResolutionManagement {
|
|||||||
library('handlebars','com.github.jknack','handlebars').version('4.3.1')
|
library('handlebars','com.github.jknack','handlebars').version('4.3.1')
|
||||||
library('handlebars.markdown','com.github.jknack','handlebars-markdown').version('4.2.1')
|
library('handlebars.markdown','com.github.jknack','handlebars-markdown').version('4.2.1')
|
||||||
|
|
||||||
|
library('sqlite','org.xerial','sqlite-jdbc').version('3.41.2.1')
|
||||||
|
|
||||||
bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j'])
|
bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j'])
|
||||||
bundle('slf4j.test', ['slf4j.jdk14'])
|
bundle('slf4j.test', ['slf4j.jdk14'])
|
||||||
bundle('prometheus', ['prometheus', 'prometheus-servlet', 'prometheus-server', 'prometheus-hotspot'])
|
bundle('prometheus', ['prometheus', 'prometheus-servlet', 'prometheus-server', 'prometheus-hotspot'])
|
||||||
|
Loading…
Reference in New Issue
Block a user