From 2d1723336651b9f04e60f34c3c0a52e2ecab9432 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 10 Jan 2025 13:53:56 +0100 Subject: [PATCH] (search) Reduce the number of db queries a bit by caching data that doesn't change too often --- .../db/DbDomainStatsExportMultitool.java | 118 ------------------ .../search/svc/SearchSiteInfoService.java | 66 ++++------ .../resources/jte/siteinfo/view/overview.jte | 1 - 3 files changed, 27 insertions(+), 158 deletions(-) delete mode 100644 code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java diff --git a/code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java b/code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java deleted file mode 100644 index 6fa07c7e..00000000 --- a/code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java +++ /dev/null @@ -1,118 +0,0 @@ -package nu.marginalia.db; - -import com.zaxxer.hikari.HikariDataSource; - -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.OptionalInt; - -/** Class used in exporting data. This is intended to be used for a brief time - * and then discarded, not kept around as a service. - */ -public class DbDomainStatsExportMultitool implements AutoCloseable { - private final Connection connection; - private final int nodeId; - private final PreparedStatement knownUrlsQuery; - private final PreparedStatement visitedUrlsQuery; - private final PreparedStatement goodUrlsQuery; - private final PreparedStatement domainNameToId; - - private final PreparedStatement allDomainsQuery; - private final PreparedStatement crawlQueueDomains; - private final PreparedStatement indexedDomainsQuery; - - public DbDomainStatsExportMultitool(HikariDataSource dataSource, int nodeId) throws SQLException { - this.connection = dataSource.getConnection(); - this.nodeId = nodeId; - - knownUrlsQuery = connection.prepareStatement(""" - SELECT KNOWN_URLS - FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA - ON EC_DOMAIN.ID=DOMAIN_METADATA.ID - WHERE DOMAIN_NAME=? - """); - visitedUrlsQuery = connection.prepareStatement(""" - SELECT VISITED_URLS - FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA - ON EC_DOMAIN.ID=DOMAIN_METADATA.ID - WHERE DOMAIN_NAME=? - """); - goodUrlsQuery = connection.prepareStatement(""" - SELECT GOOD_URLS - FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA - ON EC_DOMAIN.ID=DOMAIN_METADATA.ID - WHERE DOMAIN_NAME=? - """); - domainNameToId = connection.prepareStatement(""" - SELECT ID - FROM EC_DOMAIN - WHERE DOMAIN_NAME=? - """); - allDomainsQuery = connection.prepareStatement(""" - SELECT DOMAIN_NAME - FROM EC_DOMAIN - """); - crawlQueueDomains = connection.prepareStatement(""" - SELECT DOMAIN_NAME - FROM CRAWL_QUEUE - """); - indexedDomainsQuery = connection.prepareStatement(""" - SELECT DOMAIN_NAME - FROM EC_DOMAIN - WHERE INDEXED > 0 - """); - } - - public OptionalInt getVisitedUrls(String domainName) throws SQLException { - return executeNameToIntQuery(domainName, visitedUrlsQuery); - } - - public OptionalInt getDomainId(String domainName) throws SQLException { - return executeNameToIntQuery(domainName, domainNameToId); - } - - public List getCrawlQueueDomains() throws SQLException { - return executeListQuery(crawlQueueDomains, 100); - } - public List getAllIndexedDomains() throws SQLException { - return executeListQuery(indexedDomainsQuery, 100_000); - } - - private OptionalInt executeNameToIntQuery(String domainName, PreparedStatement statement) - throws SQLException { - statement.setString(1, domainName); - var rs = statement.executeQuery(); - - if (rs.next()) { - return OptionalInt.of(rs.getInt(1)); - } - - return OptionalInt.empty(); - } - - private List executeListQuery(PreparedStatement statement, int sizeHint) throws SQLException { - List ret = new ArrayList<>(sizeHint); - - var rs = statement.executeQuery(); - - while (rs.next()) { - ret.add(rs.getString(1)); - } - - return ret; - } - - @Override - public void close() throws SQLException { - knownUrlsQuery.close(); - goodUrlsQuery.close(); - visitedUrlsQuery.close(); - allDomainsQuery.close(); - crawlQueueDomains.close(); - domainNameToId.close(); - connection.close(); - } -} diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java index d06d5530..daba5a8e 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -26,8 +26,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.SQLException; -import java.time.Duration; -import java.time.Instant; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; @@ -69,9 +67,11 @@ public class SearchSiteInfoService { this.screenshotService = screenshotService; this.dataSource = dataSource; this.searchSiteSubscriptions = searchSiteSubscriptions; + + Thread.ofPlatform().name("Recently Added Domains Model Updater").start(this::modelUpdater); } - private volatile SiteOverviewModel model = new SiteOverviewModel(List.of(), Instant.EPOCH); + private volatile SiteOverviewModel model = new SiteOverviewModel(List.of()); @GET @Path("/site") @@ -81,55 +81,43 @@ public class SearchSiteInfoService { return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain)); } - if (model.age().compareTo(Duration.ofMinutes(15)) > 0) { - updateModel(); - } - return new MapModelAndView("siteinfo/start.jte", Map.of("navbar", NavbarModel.SITEINFO, "model", model)); } - /** Update the model if it is older than 15 minutes. - * This query is expensive and should not be run too often, - * and the data doesn't change that often either. - *

- * This method is synchronized to avoid multiple threads updating the model at the same time. - */ - private synchronized void updateModel() { - var currentModel = model; - if (currentModel.age().compareTo(Duration.ofMinutes(15)) < 0) { - return; - } + private void modelUpdater() { + while (!Thread.interrupted()) { + List domains = new ArrayList<>(); - List domains = new ArrayList<>(); + // This query can be quite expensive, so we can't run it on demand + // for every request. Instead, we run it every 15 minutes and cache + // the result. - try (var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) { - var rs = stmt.executeQuery(); - while (rs.next()) { - domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE"))); + var rs = stmt.executeQuery(); + while (rs.next()) { + domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE"))); + } + } catch (SQLException ex) { + throw new RuntimeException(); + } + + model = new SiteOverviewModel(domains); + + try { + TimeUnit.MINUTES.sleep(15); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; } } - catch (SQLException ex) { - throw new RuntimeException(); - } - - model = new SiteOverviewModel(domains); } - public record SiteOverviewModel(List domains, Instant captureTime) { - - public SiteOverviewModel(List domains) { - this(domains, Instant.now()); - } - + public record SiteOverviewModel(List domains) { public record DiscoveredDomain(String name, String timestamp) {} - - public Duration age() { - return Duration.between(captureTime, Instant.now()); - } } @GET diff --git a/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte b/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte index c3b86f4f..367b77f4 100644 --- a/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte +++ b/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte @@ -1,5 +1,4 @@ @import nu.marginalia.db.DbDomainQueries -@import nu.marginalia.model.EdgeDomain @import nu.marginalia.search.svc.SearchSiteInfoService @import nu.marginalia.search.svc.SearchSiteInfoService.* @import nu.marginalia.search.model.UrlDetails