diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java index c43535dd..4e2d2a0d 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java @@ -47,18 +47,23 @@ public class SearchAddToCrawlQueueService { return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domainName)); } - private void addToCrawlQueue(int id) throws SQLException { + /** Mark a domain for crawling by setting node affinity to zero, + * unless it is already marked for crawling, then node affinity should + * be left unchanged. + * */ + void addToCrawlQueue(int domainId) throws SQLException { try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" - INSERT IGNORE INTO CRAWL_QUEUE(DOMAIN_NAME, SOURCE) - SELECT DOMAIN_NAME, "user" FROM EC_DOMAIN WHERE ID=? + UPDATE EC_DOMAIN + SET WMSA_prod.EC_DOMAIN.NODE_AFFINITY = 0 + WHERE ID=? AND WMSA_prod.EC_DOMAIN.NODE_AFFINITY < 0 """)) { - stmt.setInt(1, id); + stmt.setInt(1, domainId); stmt.executeUpdate(); } } - private String getDomainName(int id) { + String getDomainName(int id) { var domain = domainQueries.getDomain(id); if (domain.isEmpty()) throw new IllegalArgumentException(); diff --git a/code/services-application/search-service/test/nu/marginalia/search/svc/SearchAddToCrawlQueueServiceTest.java b/code/services-application/search-service/test/nu/marginalia/search/svc/SearchAddToCrawlQueueServiceTest.java new file mode 100644 index 00000000..f9453435 --- /dev/null +++ b/code/services-application/search-service/test/nu/marginalia/search/svc/SearchAddToCrawlQueueServiceTest.java @@ -0,0 +1,85 @@ +package nu.marginalia.search.svc; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.db.DbDomainQueries; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.test.TestMigrationLoader; +import org.junit.jupiter.api.*; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.sql.SQLException; + +@Tag("slow") +@Testcontainers +class SearchAddToCrawlQueueServiceTest { + @Container + static MariaDBContainer mariaDBContainer = new MariaDBContainer<>("mariadb") + .withDatabaseName("WMSA_prod") + .withUsername("wmsa") + .withPassword("wmsa") + .withNetworkAliases("mariadb"); + + static HikariDataSource dataSource; + + private DbDomainQueries domainQueries; + private SearchAddToCrawlQueueService addToCrawlQueueService; + + @BeforeEach + public void setUp() throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement()) { + stmt.executeQuery("DELETE FROM EC_DOMAIN"); // Wipe any old state from other test runs + + stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('known.example.com', 'example.com', -1)"); + stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('added.example.com', 'example.com', 0)"); + stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('indexed.example.com', 'example.com', 1)"); + } + + domainQueries = new DbDomainQueries(dataSource); + addToCrawlQueueService = new SearchAddToCrawlQueueService(domainQueries, dataSource); + } + + @BeforeAll + public static void setUpAll() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl(mariaDBContainer.getJdbcUrl()); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + + dataSource = new HikariDataSource(config); + TestMigrationLoader.flywayMigration(dataSource); + } + + private int getNodeAffinity(String domainName) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement("SELECT NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) + { + stmt.setString(1, domainName); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } + + return -1; + } + + @Test + void addToCrawlQueue() throws SQLException { + int knownId = domainQueries.getDomainId(new EdgeDomain("known.example.com")); + int addedId = domainQueries.getDomainId(new EdgeDomain("added.example.com")); + int indexedId = domainQueries.getDomainId(new EdgeDomain("indexed.example.com")); + + addToCrawlQueueService.addToCrawlQueue(knownId); + addToCrawlQueueService.addToCrawlQueue(addedId); + addToCrawlQueueService.addToCrawlQueue(indexedId); + + Assertions.assertEquals(0, getNodeAffinity("known.example.com")); + Assertions.assertEquals(0, getNodeAffinity("added.example.com")); + Assertions.assertEquals(1, getNodeAffinity("indexed.example.com")); + } + +} \ No newline at end of file