mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(search) Reduce the number of db queries a bit by caching data that doesn't change too often
This commit is contained in:
parent
b245cc9f38
commit
2d17233366
@ -1,118 +0,0 @@
|
|||||||
package nu.marginalia.db;
|
|
||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
|
|
||||||
import java.sql.Connection;
|
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.OptionalInt;
|
|
||||||
|
|
||||||
/** Class used in exporting data. This is intended to be used for a brief time
|
|
||||||
* and then discarded, not kept around as a service.
|
|
||||||
*/
|
|
||||||
public class DbDomainStatsExportMultitool implements AutoCloseable {
|
|
||||||
private final Connection connection;
|
|
||||||
private final int nodeId;
|
|
||||||
private final PreparedStatement knownUrlsQuery;
|
|
||||||
private final PreparedStatement visitedUrlsQuery;
|
|
||||||
private final PreparedStatement goodUrlsQuery;
|
|
||||||
private final PreparedStatement domainNameToId;
|
|
||||||
|
|
||||||
private final PreparedStatement allDomainsQuery;
|
|
||||||
private final PreparedStatement crawlQueueDomains;
|
|
||||||
private final PreparedStatement indexedDomainsQuery;
|
|
||||||
|
|
||||||
public DbDomainStatsExportMultitool(HikariDataSource dataSource, int nodeId) throws SQLException {
|
|
||||||
this.connection = dataSource.getConnection();
|
|
||||||
this.nodeId = nodeId;
|
|
||||||
|
|
||||||
knownUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT KNOWN_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
visitedUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT VISITED_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
goodUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT GOOD_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
domainNameToId = connection.prepareStatement("""
|
|
||||||
SELECT ID
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
allDomainsQuery = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
""");
|
|
||||||
crawlQueueDomains = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM CRAWL_QUEUE
|
|
||||||
""");
|
|
||||||
indexedDomainsQuery = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
WHERE INDEXED > 0
|
|
||||||
""");
|
|
||||||
}
|
|
||||||
|
|
||||||
public OptionalInt getVisitedUrls(String domainName) throws SQLException {
|
|
||||||
return executeNameToIntQuery(domainName, visitedUrlsQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
public OptionalInt getDomainId(String domainName) throws SQLException {
|
|
||||||
return executeNameToIntQuery(domainName, domainNameToId);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getCrawlQueueDomains() throws SQLException {
|
|
||||||
return executeListQuery(crawlQueueDomains, 100);
|
|
||||||
}
|
|
||||||
public List<String> getAllIndexedDomains() throws SQLException {
|
|
||||||
return executeListQuery(indexedDomainsQuery, 100_000);
|
|
||||||
}
|
|
||||||
|
|
||||||
private OptionalInt executeNameToIntQuery(String domainName, PreparedStatement statement)
|
|
||||||
throws SQLException {
|
|
||||||
statement.setString(1, domainName);
|
|
||||||
var rs = statement.executeQuery();
|
|
||||||
|
|
||||||
if (rs.next()) {
|
|
||||||
return OptionalInt.of(rs.getInt(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return OptionalInt.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> executeListQuery(PreparedStatement statement, int sizeHint) throws SQLException {
|
|
||||||
List<String> ret = new ArrayList<>(sizeHint);
|
|
||||||
|
|
||||||
var rs = statement.executeQuery();
|
|
||||||
|
|
||||||
while (rs.next()) {
|
|
||||||
ret.add(rs.getString(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws SQLException {
|
|
||||||
knownUrlsQuery.close();
|
|
||||||
goodUrlsQuery.close();
|
|
||||||
visitedUrlsQuery.close();
|
|
||||||
allDomainsQuery.close();
|
|
||||||
crawlQueueDomains.close();
|
|
||||||
domainNameToId.close();
|
|
||||||
connection.close();
|
|
||||||
}
|
|
||||||
}
|
|
@ -26,8 +26,6 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.time.Duration;
|
|
||||||
import java.time.Instant;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
@ -69,9 +67,11 @@ public class SearchSiteInfoService {
|
|||||||
this.screenshotService = screenshotService;
|
this.screenshotService = screenshotService;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.searchSiteSubscriptions = searchSiteSubscriptions;
|
this.searchSiteSubscriptions = searchSiteSubscriptions;
|
||||||
|
|
||||||
|
Thread.ofPlatform().name("Recently Added Domains Model Updater").start(this::modelUpdater);
|
||||||
}
|
}
|
||||||
|
|
||||||
private volatile SiteOverviewModel model = new SiteOverviewModel(List.of(), Instant.EPOCH);
|
private volatile SiteOverviewModel model = new SiteOverviewModel(List.of());
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("/site")
|
@Path("/site")
|
||||||
@ -81,55 +81,43 @@ public class SearchSiteInfoService {
|
|||||||
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain));
|
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model.age().compareTo(Duration.ofMinutes(15)) > 0) {
|
|
||||||
updateModel();
|
|
||||||
}
|
|
||||||
|
|
||||||
return new MapModelAndView("siteinfo/start.jte",
|
return new MapModelAndView("siteinfo/start.jte",
|
||||||
Map.of("navbar", NavbarModel.SITEINFO,
|
Map.of("navbar", NavbarModel.SITEINFO,
|
||||||
"model", model));
|
"model", model));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Update the model if it is older than 15 minutes.
|
private void modelUpdater() {
|
||||||
* This query is expensive and should not be run too often,
|
while (!Thread.interrupted()) {
|
||||||
* and the data doesn't change that often either.
|
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
||||||
* <p></p>
|
|
||||||
* This method is synchronized to avoid multiple threads updating the model at the same time.
|
|
||||||
*/
|
|
||||||
private synchronized void updateModel() {
|
|
||||||
var currentModel = model;
|
|
||||||
if (currentModel.age().compareTo(Duration.ofMinutes(15)) < 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
// This query can be quite expensive, so we can't run it on demand
|
||||||
|
// for every request. Instead, we run it every 15 minutes and cache
|
||||||
|
// the result.
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) {
|
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) {
|
||||||
|
|
||||||
var rs = stmt.executeQuery();
|
var rs = stmt.executeQuery();
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE")));
|
domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE")));
|
||||||
|
}
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
|
||||||
|
model = new SiteOverviewModel(domains);
|
||||||
|
|
||||||
|
try {
|
||||||
|
TimeUnit.MINUTES.sleep(15);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
|
||||||
throw new RuntimeException();
|
|
||||||
}
|
|
||||||
|
|
||||||
model = new SiteOverviewModel(domains);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public record SiteOverviewModel(List<DiscoveredDomain> domains, Instant captureTime) {
|
public record SiteOverviewModel(List<DiscoveredDomain> domains) {
|
||||||
|
|
||||||
public SiteOverviewModel(List<DiscoveredDomain> domains) {
|
|
||||||
this(domains, Instant.now());
|
|
||||||
}
|
|
||||||
|
|
||||||
public record DiscoveredDomain(String name, String timestamp) {}
|
public record DiscoveredDomain(String name, String timestamp) {}
|
||||||
|
|
||||||
public Duration age() {
|
|
||||||
return Duration.between(captureTime, Instant.now());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
@import nu.marginalia.db.DbDomainQueries
|
@import nu.marginalia.db.DbDomainQueries
|
||||||
@import nu.marginalia.model.EdgeDomain
|
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||||
@import nu.marginalia.search.model.UrlDetails
|
@import nu.marginalia.search.model.UrlDetails
|
||||||
|
Loading…
Reference in New Issue
Block a user