2023-03-04 12:19:01 +00:00
|
|
|
package nu.marginalia.loading;
|
2022-05-19 15:45:26 +00:00
|
|
|
|
2023-07-14 15:08:10 +00:00
|
|
|
import com.google.gson.Gson;
|
2022-05-19 15:45:26 +00:00
|
|
|
import com.google.inject.Guice;
|
|
|
|
import com.google.inject.Inject;
|
|
|
|
import com.google.inject.Injector;
|
2024-01-08 18:56:33 +00:00
|
|
|
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
2023-09-13 14:13:41 +00:00
|
|
|
import nu.marginalia.loading.documents.DocumentLoaderService;
|
|
|
|
import nu.marginalia.loading.documents.KeywordLoaderService;
|
|
|
|
import nu.marginalia.loading.domains.DomainIdRegistry;
|
|
|
|
import nu.marginalia.loading.domains.DomainLoaderService;
|
|
|
|
import nu.marginalia.loading.links.DomainLinksLoaderService;
|
2023-07-14 15:08:10 +00:00
|
|
|
import nu.marginalia.mq.MessageQueueFactory;
|
2024-11-21 15:00:09 +00:00
|
|
|
import nu.marginalia.mqapi.loading.LoadRequest;
|
|
|
|
import nu.marginalia.process.ProcessConfiguration;
|
|
|
|
import nu.marginalia.process.ProcessConfigurationModule;
|
|
|
|
import nu.marginalia.process.ProcessMainClass;
|
2023-08-29 11:07:55 +00:00
|
|
|
import nu.marginalia.process.control.ProcessHeartbeatImpl;
|
2023-03-04 12:19:01 +00:00
|
|
|
import nu.marginalia.service.module.DatabaseModule;
|
2024-07-23 13:14:25 +00:00
|
|
|
import nu.marginalia.storage.FileStorageService;
|
2022-05-19 15:45:26 +00:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import java.nio.file.Path;
|
2023-09-22 11:14:58 +00:00
|
|
|
import java.util.ArrayList;
|
2023-09-14 08:11:57 +00:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.concurrent.ForkJoinPool;
|
|
|
|
import java.util.concurrent.Future;
|
2023-07-14 15:08:10 +00:00
|
|
|
|
2023-07-17 11:57:32 +00:00
|
|
|
import static nu.marginalia.mqapi.ProcessInboxNames.LOADER_INBOX;
|
2022-05-19 15:45:26 +00:00
|
|
|
|
2024-01-13 16:12:18 +00:00
|
|
|
public class LoaderMain extends ProcessMainClass {
|
2022-07-18 15:22:22 +00:00
|
|
|
private static final Logger logger = LoggerFactory.getLogger(LoaderMain.class);
|
|
|
|
|
2023-08-29 11:07:55 +00:00
|
|
|
private final ProcessHeartbeatImpl heartbeat;
|
2023-07-14 15:08:10 +00:00
|
|
|
private final FileStorageService fileStorageService;
|
2024-01-08 14:53:13 +00:00
|
|
|
private final DocumentDbWriter documentDbWriter;
|
2023-09-13 14:13:41 +00:00
|
|
|
private final DomainLoaderService domainService;
|
|
|
|
private final DomainLinksLoaderService linksService;
|
|
|
|
private final KeywordLoaderService keywordLoaderService;
|
|
|
|
private final DocumentLoaderService documentLoaderService;
|
2022-05-19 15:45:26 +00:00
|
|
|
|
2023-11-22 17:31:27 +00:00
|
|
|
public static void main(String... args) {
|
|
|
|
try {
|
|
|
|
new org.mariadb.jdbc.Driver();
|
2023-03-04 12:19:01 +00:00
|
|
|
|
2023-11-22 17:31:27 +00:00
|
|
|
Injector injector = Guice.createInjector(
|
|
|
|
new ProcessConfigurationModule("loader"),
|
|
|
|
new LoaderModule(),
|
2024-01-11 11:40:03 +00:00
|
|
|
new DatabaseModule(false)
|
2023-11-22 17:31:27 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
var instance = injector.getInstance(LoaderMain.class);
|
2022-05-19 15:45:26 +00:00
|
|
|
|
2024-11-21 15:00:09 +00:00
|
|
|
var instructions = instance.fetchInstructions(LoadRequest.class);
|
2023-07-28 20:00:07 +00:00
|
|
|
logger.info("Instructions received");
|
2023-07-28 16:14:43 +00:00
|
|
|
instance.run(instructions);
|
|
|
|
}
|
2024-11-11 20:14:38 +00:00
|
|
|
catch (Throwable ex) {
|
2023-07-28 16:14:43 +00:00
|
|
|
logger.error("Error running loader", ex);
|
|
|
|
}
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@Inject
|
2023-09-13 14:13:41 +00:00
|
|
|
public LoaderMain(ProcessHeartbeatImpl heartbeat,
|
2023-07-14 15:08:10 +00:00
|
|
|
MessageQueueFactory messageQueueFactory,
|
|
|
|
FileStorageService fileStorageService,
|
2024-01-08 14:53:13 +00:00
|
|
|
DocumentDbWriter documentDbWriter,
|
2023-09-13 14:13:41 +00:00
|
|
|
DomainLoaderService domainService,
|
|
|
|
DomainLinksLoaderService linksService,
|
|
|
|
KeywordLoaderService keywordLoaderService,
|
|
|
|
DocumentLoaderService documentLoaderService,
|
2023-10-14 10:07:40 +00:00
|
|
|
ProcessConfiguration processConfiguration,
|
2023-07-14 15:08:10 +00:00
|
|
|
Gson gson
|
2023-07-11 12:46:21 +00:00
|
|
|
) {
|
2024-11-21 15:00:09 +00:00
|
|
|
|
|
|
|
super(messageQueueFactory, processConfiguration, gson, LOADER_INBOX);
|
|
|
|
|
2023-07-11 12:46:21 +00:00
|
|
|
this.heartbeat = heartbeat;
|
2023-07-14 15:08:10 +00:00
|
|
|
this.fileStorageService = fileStorageService;
|
2024-01-08 14:53:13 +00:00
|
|
|
this.documentDbWriter = documentDbWriter;
|
2023-09-13 14:13:41 +00:00
|
|
|
this.domainService = domainService;
|
|
|
|
this.linksService = linksService;
|
|
|
|
this.keywordLoaderService = keywordLoaderService;
|
|
|
|
this.documentLoaderService = documentLoaderService;
|
2023-07-11 12:46:21 +00:00
|
|
|
|
|
|
|
heartbeat.start();
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|
|
|
|
|
2024-11-21 15:00:09 +00:00
|
|
|
void run(Instructions<LoadRequest> instructions) throws Throwable {
|
|
|
|
|
|
|
|
List<Path> inputSources = new ArrayList<>();
|
|
|
|
for (var storageId : instructions.value().inputProcessDataStorageIds) {
|
|
|
|
inputSources.add(fileStorageService.getStorage(storageId).asPath());
|
|
|
|
}
|
|
|
|
var inputData = new LoaderInputData(inputSources);
|
2023-07-31 12:22:24 +00:00
|
|
|
|
2024-07-23 13:14:25 +00:00
|
|
|
DomainIdRegistry domainIdRegistry = domainService.getOrCreateDomainIds(heartbeat, inputData);
|
2023-07-31 12:22:24 +00:00
|
|
|
|
2023-09-13 14:13:41 +00:00
|
|
|
try {
|
2023-09-14 08:11:57 +00:00
|
|
|
var results = ForkJoinPool.commonPool()
|
|
|
|
.invokeAll(
|
|
|
|
List.of(
|
2023-09-22 11:14:58 +00:00
|
|
|
() -> linksService.loadLinks(domainIdRegistry, heartbeat, inputData),
|
|
|
|
() -> keywordLoaderService.loadKeywords(domainIdRegistry, heartbeat, inputData),
|
2023-10-20 11:03:27 +00:00
|
|
|
() -> documentLoaderService.loadDocuments(domainIdRegistry, heartbeat, inputData),
|
|
|
|
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
|
2023-09-14 08:11:57 +00:00
|
|
|
)
|
|
|
|
);
|
|
|
|
|
|
|
|
for (var result : results) {
|
|
|
|
if (result.state() == Future.State.FAILED) {
|
|
|
|
throw result.exceptionNow();
|
|
|
|
}
|
|
|
|
}
|
2022-05-25 16:02:19 +00:00
|
|
|
|
2023-07-14 15:08:10 +00:00
|
|
|
instructions.ok();
|
|
|
|
}
|
|
|
|
catch (Exception ex) {
|
|
|
|
instructions.err();
|
2023-09-13 14:13:41 +00:00
|
|
|
logger.error("Error", ex);
|
2023-07-11 12:46:21 +00:00
|
|
|
}
|
|
|
|
finally {
|
2024-06-25 20:17:26 +00:00
|
|
|
keywordLoaderService.close();
|
2024-01-08 14:53:13 +00:00
|
|
|
documentDbWriter.close();
|
2023-07-11 12:46:21 +00:00
|
|
|
heartbeat.shutDown();
|
|
|
|
}
|
2023-07-28 16:14:43 +00:00
|
|
|
|
2022-05-25 16:02:19 +00:00
|
|
|
System.exit(0);
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|
|
|
|
|
2023-07-17 10:27:27 +00:00
|
|
|
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|