mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(loader) Add heartbeat to update domain-ids step
This commit is contained in:
parent
2bb9f18411
commit
2ad564404e
@ -8,8 +8,6 @@ import lombok.Getter;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.ProcessConfigurationModule;
|
||||
import nu.marginalia.service.ProcessMainClass;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.loading.documents.DocumentLoaderService;
|
||||
import nu.marginalia.loading.documents.KeywordLoaderService;
|
||||
@ -22,7 +20,9 @@ import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.inbox.MqInboxResponse;
|
||||
import nu.marginalia.mq.inbox.MqSingleShotInbox;
|
||||
import nu.marginalia.process.control.ProcessHeartbeatImpl;
|
||||
import nu.marginalia.service.ProcessMainClass;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -103,7 +103,7 @@ public class LoaderMain extends ProcessMainClass {
|
||||
void run(LoadRequest instructions) {
|
||||
LoaderInputData inputData = instructions.getInputData();
|
||||
|
||||
DomainIdRegistry domainIdRegistry = domainService.getOrCreateDomainIds(inputData);
|
||||
DomainIdRegistry domainIdRegistry = domainService.getOrCreateDomainIds(heartbeat, inputData);
|
||||
|
||||
try {
|
||||
var results = ForkJoinPool.commonPool()
|
||||
|
@ -19,7 +19,9 @@ import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class DomainLoaderService {
|
||||
@ -36,21 +38,29 @@ public class DomainLoaderService {
|
||||
this.nodeId = processConfiguration.node();
|
||||
}
|
||||
|
||||
enum Steps {
|
||||
PREP_DATA,
|
||||
INSERT_NEW,
|
||||
FETCH_ALL,
|
||||
DONE
|
||||
}
|
||||
/** Read the domain names from each parquet file
|
||||
* compare with SQL domain database, fetch those
|
||||
* that exist, insert those that don't.
|
||||
*/
|
||||
public DomainIdRegistry getOrCreateDomainIds(LoaderInputData inputData)
|
||||
public DomainIdRegistry getOrCreateDomainIds(ProcessHeartbeatImpl heartbeat, LoaderInputData inputData)
|
||||
throws IOException, SQLException
|
||||
{
|
||||
Set<String> domainNamesAll = new HashSet<>(100_000);
|
||||
DomainIdRegistry ret = new DomainIdRegistry();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var taskHeartbeat = heartbeat.createProcessTaskHeartbeat(Steps.class, "DOMAIN_IDS");
|
||||
var selectStmt = conn.prepareStatement("""
|
||||
SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?
|
||||
""")
|
||||
) {
|
||||
taskHeartbeat.progress(Steps.PREP_DATA);
|
||||
|
||||
try (var inserter = new DomainInserter(conn, nodeId)) {
|
||||
for (var domainWithIp : readBasicDomainInformation(inputData)) {
|
||||
@ -65,12 +75,16 @@ public class DomainLoaderService {
|
||||
}
|
||||
}
|
||||
|
||||
taskHeartbeat.progress(Steps.INSERT_NEW);
|
||||
|
||||
try (var updater = new DomainAffinityAndIpUpdater(conn, nodeId)) {
|
||||
for (var domainWithIp : readBasicDomainInformation(inputData)) {
|
||||
updater.accept(new EdgeDomain(domainWithIp.domain), domainWithIp.ip);
|
||||
}
|
||||
}
|
||||
|
||||
taskHeartbeat.progress(Steps.FETCH_ALL);
|
||||
|
||||
selectStmt.setFetchSize(1000);
|
||||
for (var domain : domainNamesAll) {
|
||||
selectStmt.setString(1, domain);
|
||||
@ -82,6 +96,8 @@ public class DomainLoaderService {
|
||||
logger.error("Unknown domain {}", domain);
|
||||
}
|
||||
}
|
||||
|
||||
taskHeartbeat.progress(Steps.DONE);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user