mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(linkdb) Add delegating implementation of DomainLinkDb
This facilitates switching between SQL and File-backed implementations on the fly while migrating from one to the other.
This commit is contained in:
parent
e49ba887e9
commit
fbad625126
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
@ -22,9 +22,8 @@ public class FileDomainLinkDb implements DomainLinkDb {
|
||||
|
||||
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
|
||||
this.filename = filename;
|
||||
if (Files.exists(filename)) {
|
||||
switchInput(filename);
|
||||
}
|
||||
|
||||
loadInput(filename);
|
||||
}
|
||||
|
||||
@Override
|
@ -0,0 +1,104 @@
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
|
||||
/** DomainLinkDb that delegates to either a FileDomainLinkDb or a SqlDomainLinkDb,
|
||||
* depending on whether the file exists. This is part of the migration path to
|
||||
* always using FileDomainLinkDb.
|
||||
*/
|
||||
public class SelectingDomainLinkDb implements DomainLinkDb {
|
||||
private final static Logger logger = LoggerFactory.getLogger(SelectingDomainLinkDb.class);
|
||||
|
||||
private volatile DomainLinkDb currentDb;
|
||||
private final Path filename;
|
||||
public SelectingDomainLinkDb(@Named("domain-linkdb-file") Path filename,
|
||||
ServiceConfiguration serviceConfiguration,
|
||||
HikariDataSource dataSource) {
|
||||
this.filename = filename;
|
||||
|
||||
// Load the database in a separate thread, so that the constructor can return
|
||||
// immediately. This would otherwise add a lot of time to the startup of the
|
||||
// index service.
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
try {
|
||||
if (Files.exists(filename)) {
|
||||
currentDb = new FileDomainLinkDb(filename);
|
||||
}
|
||||
else {
|
||||
currentDb = new SqlDomainLinkDb(filename, dataSource, serviceConfiguration);
|
||||
}
|
||||
logger.info("Loaded linkdb");
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to load linkdb", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void switchInput(Path newFilename) throws Exception {
|
||||
Files.move(newFilename, filename, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
try {
|
||||
currentDb = new FileDomainLinkDb(filename);
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to load linkdb", e);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntArrayList findDestinations(int source) {
|
||||
// A race condition is not possible here, as the nullity of currentDb only changes from
|
||||
// null to non-null
|
||||
|
||||
if (currentDb == null)
|
||||
return new TIntArrayList();
|
||||
|
||||
return currentDb.findDestinations(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countDestinations(int source) {
|
||||
if (currentDb == null)
|
||||
return 0;
|
||||
|
||||
return currentDb.countDestinations(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntArrayList findSources(int dest) {
|
||||
if (currentDb == null)
|
||||
return new TIntArrayList();
|
||||
|
||||
return currentDb.findSources(dest);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countSources(int source) {
|
||||
if (currentDb == null)
|
||||
return 0;
|
||||
|
||||
return currentDb.countSources(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEach(SourceDestConsumer consumer) {
|
||||
if (currentDb == null)
|
||||
throw new IllegalStateException("No linkdb loaded");
|
||||
|
||||
currentDb.forEach(consumer);
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
@ -9,6 +9,7 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
@ -37,21 +38,12 @@ public class SqlDomainLinkDb implements DomainLinkDb {
|
||||
this.dataSource = dataSource;
|
||||
|
||||
node = configuration.node();
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
try {
|
||||
loadDb();
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to load linkdb", e);
|
||||
}
|
||||
});
|
||||
loadDb();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void switchInput(Path newFilename) throws IOException {
|
||||
Files.move(newFilename, filename, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
loadDb();
|
||||
throw new UnsupportedEncodingException();
|
||||
}
|
||||
|
||||
public void loadDb() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.docs;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.linkdb;
|
||||
package nu.marginalia.linkdb.docs;
|
||||
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
|
@ -1,6 +1,8 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -1,5 +1,7 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
@ -46,7 +46,6 @@ public class ConverterMain {
|
||||
private final MessageQueueFactory messageQueueFactory;
|
||||
private final FileStorageService fileStorageService;
|
||||
private final SideloadSourceFactory sideloadSourceFactory;
|
||||
|
||||
private final int node;
|
||||
|
||||
public static void main(String... args) throws Exception {
|
||||
|
@ -9,7 +9,7 @@ import lombok.SneakyThrows;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.ProcessConfigurationModule;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.linkdb.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.loading.documents.DocumentLoaderService;
|
||||
import nu.marginalia.loading.documents.KeywordLoaderService;
|
||||
import nu.marginalia.loading.domains.DomainIdRegistry;
|
||||
|
@ -9,9 +9,9 @@ import com.google.inject.name.Names;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import nu.marginalia.linkdb.DomainLinkDbWriter;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.linkdb.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.service.SearchServiceDescriptors;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
|
@ -4,7 +4,7 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.io.processed.DocumentRecordParquetFileReader;
|
||||
import nu.marginalia.linkdb.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.loading.LoaderInputData;
|
||||
import nu.marginalia.loading.domains.DomainIdRegistry;
|
||||
|
@ -4,7 +4,7 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
|
||||
import nu.marginalia.linkdb.DomainLinkDbWriter;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
||||
import nu.marginalia.loading.LoaderInputData;
|
||||
import nu.marginalia.loading.domains.DomainIdRegistry;
|
||||
import nu.marginalia.model.processed.DomainLinkRecord;
|
||||
|
@ -5,9 +5,10 @@ import com.google.inject.Provides;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.linkdb.DomainLinkDb;
|
||||
import nu.marginalia.linkdb.FileDomainLinkDb;
|
||||
import nu.marginalia.linkdb.SqlDomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.FileDomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.SqlDomainLinkDb;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.IndexLocations;
|
||||
@ -41,18 +42,11 @@ public class IndexModule extends AbstractModule {
|
||||
FileStorageService storageService,
|
||||
HikariDataSource dataSource,
|
||||
ServiceConfiguration serviceConfiguration
|
||||
) throws IOException
|
||||
)
|
||||
{
|
||||
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
||||
|
||||
if (Files.exists(path)) {
|
||||
logger.info("Using file domain link db {}", path);
|
||||
return new FileDomainLinkDb(path);
|
||||
}
|
||||
else {
|
||||
logger.warn("Using legacy sql domain link db");
|
||||
return new SqlDomainLinkDb(path, dataSource, serviceConfiguration);
|
||||
}
|
||||
return new SelectingDomainLinkDb(path, serviceConfiguration, dataSource);
|
||||
}
|
||||
|
||||
@Provides
|
||||
|
@ -7,13 +7,13 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import nu.marginalia.index.svc.IndexDomainLinksService;
|
||||
import nu.marginalia.linkdb.DomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.index.client.IndexMqEndpoints;
|
||||
import nu.marginalia.index.index.SearchIndex;
|
||||
import nu.marginalia.index.svc.IndexOpsService;
|
||||
import nu.marginalia.index.svc.IndexQueryService;
|
||||
import nu.marginalia.linkdb.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.server.*;
|
||||
|
@ -7,7 +7,7 @@ import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.linkdb.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.ranking.ResultValuator;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -3,9 +3,7 @@ package nu.marginalia.index.svc;
|
||||
import com.google.inject.Inject;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.linkdb.DomainLinkDb;
|
||||
|
||||
import static io.grpc.stub.ServerCalls.asyncUnimplementedUnaryCall;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
||||
|
||||
/** GRPC service for interrogating domain links
|
||||
*/
|
||||
|
@ -24,8 +24,8 @@ import nu.marginalia.index.journal.writer.IndexJournalWriter;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.linkdb.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
|
@ -23,8 +23,8 @@ import nu.marginalia.index.journal.writer.IndexJournalWriter;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.linkdb.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
|
@ -7,7 +7,7 @@ import nu.marginalia.storage.model.FileStorageBase;
|
||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriter;
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
|
||||
import nu.marginalia.linkdb.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.process.control.FakeProcessHeartbeat;
|
||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
|
Loading…
Reference in New Issue
Block a user