(loader) Don't truncate the entire links table on load

This behavior is an old vestige from the days of only having a single loader process.  We'd truncate the links table because doing inserts/updates was too slow.  This was also important because we had 32 bit ID, and there's a lot of links between domains to go around...

Instead we delete the rows associated with the current node with a stored procedure PURGE_LINKS_TABLE.

We also update the PRIMARY KEY to a BIGINT.  We'll need to load the data in excess of billion times to hit an ID rollover, so it'll be fine.
This commit is contained in:
Viktor Lofgren 2023-11-16 10:30:12 +01:00
parent fd77e62a13
commit f58a9f46be
2 changed files with 20 additions and 6 deletions

View File

@ -0,0 +1,10 @@
ALTER TABLE WMSA_prod.EC_DOMAIN_LINK
MODIFY COLUMN ID BIGINT NOT NULL AUTO_INCREMENT;
CREATE OR REPLACE PROCEDURE PURGE_LINKS_TABLE (IN nodeId INT)
BEGIN
DELETE EC_DOMAIN_LINK
FROM EC_DOMAIN_LINK INNER JOIN WMSA_prod.EC_DOMAIN
ON EC_DOMAIN_LINK.SOURCE_DOMAIN_ID = EC_DOMAIN.ID
WHERE NODE_AFFINITY = nodeId;
END;

View File

@ -3,6 +3,7 @@ package nu.marginalia.loading.links;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
import nu.marginalia.io.processed.ProcessedDataFileNames;
import nu.marginalia.loading.LoaderInputData;
@ -23,10 +24,12 @@ public class DomainLinksLoaderService {
private final HikariDataSource dataSource;
private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class);
private final int nodeId;
@Inject
public DomainLinksLoaderService(HikariDataSource dataSource) {
public DomainLinksLoaderService(HikariDataSource dataSource,
ProcessConfiguration processConfiguration) {
this.dataSource = dataSource;
this.nodeId = processConfiguration.node();
}
public boolean loadLinks(DomainIdRegistry domainIdRegistry,
@ -54,11 +57,12 @@ public class DomainLinksLoaderService {
}
private void dropLinkData() throws SQLException {
logger.info("Truncating EC_DOMAIN_LINK");
logger.info("Clearing EC_DOMAIN_LINK");
try (var conn = dataSource.getConnection();
var stmt = conn.createStatement()) {
stmt.executeUpdate("TRUNCATE TABLE EC_DOMAIN_LINK");
var call = conn.prepareCall("CALL PURGE_LINKS_TABLE(?)")) {
call.setInt(1, nodeId);
call.executeUpdate();
}
}
@ -84,7 +88,7 @@ public class DomainLinksLoaderService {
connection = dataSource.getConnection();
insertStatement = connection.prepareStatement("""
INSERT INTO EC_DOMAIN_LINK(SOURCE_DOMAIN_ID, DEST_DOMAIN_ID)
INSERT IGNORE INTO EC_DOMAIN_LINK(SOURCE_DOMAIN_ID, DEST_DOMAIN_ID)
VALUES (?, ?)
""");
}