mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(live-crawler) Alter DbDomainIdRegistry to make inserts if an id is missing, as this is apparently a rare scenario we need to deal with.
This commit is contained in:
parent
52eb5bc84f
commit
b941604135
@ -4,6 +4,7 @@ import com.google.gson.Gson;
|
|||||||
import com.google.inject.Guice;
|
import com.google.inject.Guice;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Injector;
|
import com.google.inject.Injector;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.api.feeds.FeedsClient;
|
import nu.marginalia.api.feeds.FeedsClient;
|
||||||
import nu.marginalia.converting.ConverterModule;
|
import nu.marginalia.converting.ConverterModule;
|
||||||
@ -56,6 +57,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
private final KeywordLoaderService keywordLoaderService;
|
private final KeywordLoaderService keywordLoaderService;
|
||||||
private final DocumentLoaderService documentLoaderService;
|
private final DocumentLoaderService documentLoaderService;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public LiveCrawlerMain(FeedsClient feedsClient,
|
public LiveCrawlerMain(FeedsClient feedsClient,
|
||||||
@ -68,7 +70,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
DomainProcessor domainProcessor,
|
DomainProcessor domainProcessor,
|
||||||
FileStorageService fileStorageService,
|
FileStorageService fileStorageService,
|
||||||
KeywordLoaderService keywordLoaderService,
|
KeywordLoaderService keywordLoaderService,
|
||||||
DocumentLoaderService documentLoaderService)
|
DocumentLoaderService documentLoaderService, HikariDataSource dataSource)
|
||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
super(messageQueueFactory, config, gson, LIVE_CRAWLER_INBOX);
|
super(messageQueueFactory, config, gson, LIVE_CRAWLER_INBOX);
|
||||||
@ -81,6 +83,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
this.keywordLoaderService = keywordLoaderService;
|
this.keywordLoaderService = keywordLoaderService;
|
||||||
this.documentLoaderService = documentLoaderService;
|
this.documentLoaderService = documentLoaderService;
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
domainBlacklist.waitUntilLoaded();
|
domainBlacklist.waitUntilLoaded();
|
||||||
}
|
}
|
||||||
@ -201,7 +204,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
|
|
||||||
LoaderInputData lid = new LoaderInputData(tempPath, 1);
|
LoaderInputData lid = new LoaderInputData(tempPath, 1);
|
||||||
|
|
||||||
DomainIdRegistry domainIdRegistry = new DbDomainIdRegistry(domainQueries);
|
DomainIdRegistry domainIdRegistry = new DbDomainIdRegistry(dataSource);
|
||||||
|
|
||||||
keywordLoaderService.loadKeywords(domainIdRegistry, heartbeat, lid);
|
keywordLoaderService.loadKeywords(domainIdRegistry, heartbeat, lid);
|
||||||
documentLoaderService.loadDocuments(domainIdRegistry, heartbeat, lid);
|
documentLoaderService.loadDocuments(domainIdRegistry, heartbeat, lid);
|
||||||
|
@ -1,18 +1,55 @@
|
|||||||
package nu.marginalia.loading.domains;
|
package nu.marginalia.loading.domains;
|
||||||
|
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
|
||||||
public class DbDomainIdRegistry implements DomainIdRegistry {
|
import java.sql.Statement;
|
||||||
private final DbDomainQueries dbDomainQueries;
|
|
||||||
|
|
||||||
public DbDomainIdRegistry(DbDomainQueries dbDomainQueries) {
|
public class DbDomainIdRegistry implements DomainIdRegistry {
|
||||||
this.dbDomainQueries = dbDomainQueries;
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
public DbDomainIdRegistry(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getDomainId(String domainName) {
|
public int getDomainId(String domainName) {
|
||||||
return dbDomainQueries.getDomainId(new EdgeDomain(domainName));
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||||
|
|
||||||
|
stmt.setString(1, domainName);
|
||||||
|
var rsp = stmt.executeQuery();
|
||||||
|
if (rsp.next()) {
|
||||||
|
return rsp.getInt(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to query domain ID", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert the domain if it doesn't exist (unlikely)
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("INSERT IGNORE INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES (?, ?, ?)",
|
||||||
|
Statement.RETURN_GENERATED_KEYS)) {
|
||||||
|
|
||||||
|
var domain = new EdgeDomain(domainName);
|
||||||
|
|
||||||
|
stmt.setString(1, domain.toString());
|
||||||
|
stmt.setString(2, domain.getTopDomain());
|
||||||
|
stmt.setInt(3, 0); // "up for grabs" node affinity
|
||||||
|
stmt.executeUpdate();
|
||||||
|
|
||||||
|
var gk = stmt.getGeneratedKeys();
|
||||||
|
if (gk.next()) {
|
||||||
|
return gk.getInt(1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// recurse in the doubly unlikely event that the domain was inserted by another thread
|
||||||
|
return getDomainId(domainName);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -0,0 +1,44 @@
|
|||||||
|
package nu.marginalia.loading.domains;
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.test.TestMigrationLoader;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Tag;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.testcontainers.containers.MariaDBContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
@Testcontainers
|
||||||
|
@Tag("slow")
|
||||||
|
class DbDomainIdRegistryTest {
|
||||||
|
|
||||||
|
@Container
|
||||||
|
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||||
|
.withDatabaseName("WMSA_prod")
|
||||||
|
.withUsername("wmsa")
|
||||||
|
.withPassword("wmsa")
|
||||||
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
|
static HikariDataSource dataSource;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setup() {
|
||||||
|
HikariConfig config = new HikariConfig();
|
||||||
|
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||||
|
config.setUsername("wmsa");
|
||||||
|
config.setPassword("wmsa");
|
||||||
|
|
||||||
|
dataSource = new HikariDataSource(config);
|
||||||
|
|
||||||
|
TestMigrationLoader.flywayMigration(dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getDomainId() {
|
||||||
|
Assertions.assertEquals(1, new DbDomainIdRegistry(dataSource).getDomainId("test.com"));
|
||||||
|
Assertions.assertEquals(1, new DbDomainIdRegistry(dataSource).getDomainId("test.com"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user