mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(db) Retire the EC_DOMAIN_LINK table
Retire the EC_DOMAIN_LINK table as the data has been migrated off into a file instead.
This commit is contained in:
parent
ef261cbbd7
commit
b15f47d80e
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE EC_DOMAIN_LINK;
|
@ -1,9 +1,7 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkdb.dlinks;
|
||||||
|
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -12,18 +10,17 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
|
|
||||||
/** DomainLinkDb that delegates to either a FileDomainLinkDb or a SqlDomainLinkDb,
|
/** DomainLinkDb that delegates a FileDomainLinkDb, but handles the case where the database
|
||||||
* depending on whether the file exists. This is part of the migration path to
|
* is not yet loaded. This speeds up the startup of the index service, as the database is
|
||||||
* always using FileDomainLinkDb.
|
* loaded in a separate thread.
|
||||||
*/
|
*/
|
||||||
public class SelectingDomainLinkDb implements DomainLinkDb {
|
public class DelayingDomainLinkDb implements DomainLinkDb {
|
||||||
private final static Logger logger = LoggerFactory.getLogger(SelectingDomainLinkDb.class);
|
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class);
|
||||||
|
|
||||||
private volatile DomainLinkDb currentDb;
|
private volatile DomainLinkDb currentDb;
|
||||||
private final Path filename;
|
private final Path filename;
|
||||||
public SelectingDomainLinkDb(@Named("domain-linkdb-file") Path filename,
|
|
||||||
ServiceConfiguration serviceConfiguration,
|
public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) {
|
||||||
HikariDataSource dataSource) {
|
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
|
|
||||||
// Load the database in a separate thread, so that the constructor can return
|
// Load the database in a separate thread, so that the constructor can return
|
||||||
@ -32,12 +29,7 @@ public class SelectingDomainLinkDb implements DomainLinkDb {
|
|||||||
|
|
||||||
Thread.ofPlatform().start(() -> {
|
Thread.ofPlatform().start(() -> {
|
||||||
try {
|
try {
|
||||||
if (Files.exists(filename)) {
|
currentDb = new FileDomainLinkDb(filename);
|
||||||
currentDb = new FileDomainLinkDb(filename);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
currentDb = new SqlDomainLinkDb(filename, dataSource, serviceConfiguration);
|
|
||||||
}
|
|
||||||
logger.info("Loaded linkdb");
|
logger.info("Loaded linkdb");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed to load linkdb", e);
|
logger.error("Failed to load linkdb", e);
|
@ -23,7 +23,9 @@ public class FileDomainLinkDb implements DomainLinkDb {
|
|||||||
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
|
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
|
|
||||||
loadInput(filename);
|
if (Files.exists(filename)) {
|
||||||
|
loadInput(filename);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1,150 +0,0 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
|
||||||
|
|
||||||
import com.google.inject.name.Named;
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
|
||||||
import gnu.trove.list.array.TLongArrayList;
|
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.StandardCopyOption;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/** DomainLinkDb implementation that goes through the motions of
|
|
||||||
* being a File-backed DomainLinkDb, but actually uses the legacy SQL database
|
|
||||||
* for loading the data.
|
|
||||||
* <p>
|
|
||||||
* This is part of the migration path to using FileDomainLinkDb.
|
|
||||||
*/
|
|
||||||
public class SqlDomainLinkDb implements DomainLinkDb {
|
|
||||||
private volatile long[] sourceToDest = new long[0];
|
|
||||||
private volatile long[] destToSource = new long[0];
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(SqlDomainLinkDb.class);
|
|
||||||
|
|
||||||
private final Path filename;
|
|
||||||
private final HikariDataSource dataSource;
|
|
||||||
private final int node;
|
|
||||||
|
|
||||||
public SqlDomainLinkDb(@Named("domain-linkdb-file") Path filename,
|
|
||||||
HikariDataSource dataSource,
|
|
||||||
ServiceConfiguration configuration)
|
|
||||||
{
|
|
||||||
this.filename = filename;
|
|
||||||
this.dataSource = dataSource;
|
|
||||||
|
|
||||||
node = configuration.node();
|
|
||||||
loadDb();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void switchInput(Path newFilename) throws IOException {
|
|
||||||
throw new UnsupportedEncodingException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void loadDb() {
|
|
||||||
try (var conn = dataSource.getConnection();
|
|
||||||
var stmt = conn.prepareStatement(
|
|
||||||
STR."""
|
|
||||||
SELECT
|
|
||||||
SOURCE_DOMAIN_ID,
|
|
||||||
DEST_DOMAIN_ID
|
|
||||||
FROM EC_DOMAIN_LINK
|
|
||||||
INNER JOIN EC_DOMAIN
|
|
||||||
ON EC_DOMAIN.ID = EC_DOMAIN_LINK.SOURCE_DOMAIN_ID
|
|
||||||
WHERE NODE_AFFINITY=\{node}
|
|
||||||
""");
|
|
||||||
var rs = stmt.executeQuery())
|
|
||||||
{
|
|
||||||
TLongArrayList sourceToDest = new TLongArrayList(10_000_000);
|
|
||||||
TLongArrayList destToSource = new TLongArrayList(10_000_000);
|
|
||||||
|
|
||||||
while (rs.next()) {
|
|
||||||
long source = Integer.toUnsignedLong(rs.getInt(1));
|
|
||||||
long dest = Integer.toUnsignedLong(rs.getInt(2));
|
|
||||||
|
|
||||||
sourceToDest.add((source << 32) | dest);
|
|
||||||
destToSource.add((dest << 32) | source);
|
|
||||||
}
|
|
||||||
|
|
||||||
sourceToDest.sort();
|
|
||||||
destToSource.sort();
|
|
||||||
|
|
||||||
this.sourceToDest = sourceToDest.toArray();
|
|
||||||
this.destToSource = destToSource.toArray();
|
|
||||||
}
|
|
||||||
catch (Exception ex) {
|
|
||||||
logger.error("Failed to load linkdb", ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info("LinkDB loaded, size = {}", sourceToDest.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TIntArrayList findDestinations(int source) {
|
|
||||||
return findRelated(sourceToDest, source);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TIntArrayList findSources(int dest) {
|
|
||||||
return findRelated(destToSource, dest);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int countDestinations(int source) {
|
|
||||||
return countRelated(sourceToDest, source);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int countSources(int dest) {
|
|
||||||
return countRelated(destToSource, dest);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void forEach(SourceDestConsumer consumer) {
|
|
||||||
for (long val : sourceToDest) {
|
|
||||||
consumer.accept((int) (val >>> 32), (int) (val & 0xFFFF_FFFFL));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private TIntArrayList findRelated(long[] range, int key) {
|
|
||||||
long keyLong = Integer.toUnsignedLong(key) << 32;
|
|
||||||
long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32;
|
|
||||||
|
|
||||||
int start = Arrays.binarySearch(range, keyLong);
|
|
||||||
|
|
||||||
if (start < 0) {
|
|
||||||
// Key is not found, get the insertion point
|
|
||||||
start = -start - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
TIntArrayList result = new TIntArrayList();
|
|
||||||
|
|
||||||
for (int i = start; i < range.length && range[i] < nextKeyLong; i++) {
|
|
||||||
result.add((int) (range[i] & 0xFFFF_FFFFL));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int countRelated(long[] range, int key) {
|
|
||||||
long keyLong = Integer.toUnsignedLong(key) << 32;
|
|
||||||
long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32;
|
|
||||||
|
|
||||||
int start = Arrays.binarySearch(range, keyLong);
|
|
||||||
|
|
||||||
if (start < 0) {
|
|
||||||
// Key is not found, get the insertion point
|
|
||||||
start = -start - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int num = 0;
|
|
||||||
for (int i = start; i < range.length && range[i] < nextKeyLong; i++, num++);
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -38,10 +38,22 @@ public class RankingDomainFetcher {
|
|||||||
public void getDomains(Consumer<RankingDomainData> consumer) {
|
public void getDomains(Consumer<RankingDomainData> consumer) {
|
||||||
String query;
|
String query;
|
||||||
if (getNames) {
|
if (getNames) {
|
||||||
query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID";
|
query = """
|
||||||
|
SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||||
|
WHERE NODE_AFFINITY>0
|
||||||
|
GROUP BY EC_DOMAIN.ID
|
||||||
|
""";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID";
|
query = """
|
||||||
|
SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||||
|
WHERE NODE_AFFINITY>0
|
||||||
|
GROUP BY EC_DOMAIN.ID
|
||||||
|
""";
|
||||||
}
|
}
|
||||||
|
|
||||||
getDomains(query, consumer);
|
getDomains(query, consumer);
|
||||||
@ -51,10 +63,24 @@ public class RankingDomainFetcher {
|
|||||||
public void getPeripheralDomains(Consumer<RankingDomainData> consumer) {
|
public void getPeripheralDomains(Consumer<RankingDomainData> consumer) {
|
||||||
String query;
|
String query;
|
||||||
if (getNames) {
|
if (getNames) {
|
||||||
query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID";
|
query = """
|
||||||
|
SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||||
|
WHERE ((INDEXED>1 AND IS_ALIVE)
|
||||||
|
OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0))
|
||||||
|
GROUP BY EC_DOMAIN.ID
|
||||||
|
""";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID";
|
query = """
|
||||||
|
SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||||
|
WHERE ((INDEXED>1 AND IS_ALIVE)
|
||||||
|
OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0))
|
||||||
|
GROUP BY EC_DOMAIN.ID
|
||||||
|
""";
|
||||||
}
|
}
|
||||||
|
|
||||||
getDomains(query, consumer);
|
getDomains(query, consumer);
|
||||||
|
@ -4,10 +4,8 @@ import com.google.inject.AbstractModule;
|
|||||||
import com.google.inject.Provides;
|
import com.google.inject.Provides;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
||||||
import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb;
|
import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb;
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.IndexLocations;
|
import nu.marginalia.IndexLocations;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -29,14 +27,12 @@ public class IndexModule extends AbstractModule {
|
|||||||
@Provides
|
@Provides
|
||||||
@Singleton
|
@Singleton
|
||||||
public DomainLinkDb domainLinkDb (
|
public DomainLinkDb domainLinkDb (
|
||||||
FileStorageService storageService,
|
FileStorageService storageService
|
||||||
HikariDataSource dataSource,
|
|
||||||
ServiceConfiguration serviceConfiguration
|
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
||||||
|
|
||||||
return new SelectingDomainLinkDb(path, serviceConfiguration, dataSource);
|
return new DelayingDomainLinkDb(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Provides
|
@Provides
|
||||||
|
Loading…
Reference in New Issue
Block a user