(linkdb) Add delegating implementation of DomainLinkDb

This facilitates switching between SQL and File-backed implementations on the fly while migrating from one to the other.
This commit is contained in:
Viktor Lofgren 2024-01-08 19:56:33 +01:00
parent e49ba887e9
commit fbad625126
22 changed files with 140 additions and 50 deletions

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.dlinks;
import gnu.trove.list.array.TIntArrayList;

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.dlinks;
import java.io.DataInputStream;
import java.io.IOException;

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.dlinks;
import java.io.DataOutputStream;
import java.io.IOException;

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.dlinks;
import com.google.inject.name.Named;
import gnu.trove.list.array.TIntArrayList;
@ -22,9 +22,8 @@ public class FileDomainLinkDb implements DomainLinkDb {
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
this.filename = filename;
if (Files.exists(filename)) {
switchInput(filename);
}
loadInput(filename);
}
@Override

View File

@ -0,0 +1,104 @@
package nu.marginalia.linkdb.dlinks;
import com.google.inject.name.Named;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.service.module.ServiceConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
/** DomainLinkDb that delegates to either a FileDomainLinkDb or a SqlDomainLinkDb,
* depending on whether the file exists. This is part of the migration path to
* always using FileDomainLinkDb.
*/
public class SelectingDomainLinkDb implements DomainLinkDb {
private final static Logger logger = LoggerFactory.getLogger(SelectingDomainLinkDb.class);
private volatile DomainLinkDb currentDb;
private final Path filename;
public SelectingDomainLinkDb(@Named("domain-linkdb-file") Path filename,
ServiceConfiguration serviceConfiguration,
HikariDataSource dataSource) {
this.filename = filename;
// Load the database in a separate thread, so that the constructor can return
// immediately. This would otherwise add a lot of time to the startup of the
// index service.
Thread.ofPlatform().start(() -> {
try {
if (Files.exists(filename)) {
currentDb = new FileDomainLinkDb(filename);
}
else {
currentDb = new SqlDomainLinkDb(filename, dataSource, serviceConfiguration);
}
logger.info("Loaded linkdb");
} catch (Exception e) {
logger.error("Failed to load linkdb", e);
}
});
}
@Override
public void switchInput(Path newFilename) throws Exception {
Files.move(newFilename, filename, StandardCopyOption.REPLACE_EXISTING);
Thread.ofPlatform().start(() -> {
try {
currentDb = new FileDomainLinkDb(filename);
} catch (IOException e) {
logger.error("Failed to load linkdb", e);
}
});
}
@Override
public TIntArrayList findDestinations(int source) {
// A race condition is not possible here, as the nullity of currentDb only changes from
// null to non-null
if (currentDb == null)
return new TIntArrayList();
return currentDb.findDestinations(source);
}
@Override
public int countDestinations(int source) {
if (currentDb == null)
return 0;
return currentDb.countDestinations(source);
}
@Override
public TIntArrayList findSources(int dest) {
if (currentDb == null)
return new TIntArrayList();
return currentDb.findSources(dest);
}
@Override
public int countSources(int source) {
if (currentDb == null)
return 0;
return currentDb.countSources(source);
}
@Override
public void forEach(SourceDestConsumer consumer) {
if (currentDb == null)
throw new IllegalStateException("No linkdb loaded");
currentDb.forEach(consumer);
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.dlinks;
import com.google.inject.name.Named;
import com.zaxxer.hikari.HikariDataSource;
@ -9,6 +9,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
@ -37,21 +38,12 @@ public class SqlDomainLinkDb implements DomainLinkDb {
this.dataSource = dataSource;
node = configuration.node();
Thread.ofPlatform().start(() -> {
try {
loadDb();
} catch (Exception e) {
logger.error("Failed to load linkdb", e);
}
});
loadDb();
}
@Override
public void switchInput(Path newFilename) throws IOException {
Files.move(newFilename, filename, StandardCopyOption.REPLACE_EXISTING);
loadDb();
throw new UnsupportedEncodingException();
}
public void loadDb() {

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.docs;
import com.google.inject.Inject;
import com.google.inject.Singleton;

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkdb.docs;
import nu.marginalia.linkdb.model.DocdbUrlDetail;

View File

@ -1,6 +1,8 @@
package nu.marginalia.linkdb;
import gnu.trove.list.array.TLongArrayList;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.model.EdgeDomain;
import org.junit.jupiter.api.Test;

View File

@ -1,5 +1,7 @@
package nu.marginalia.linkdb;
import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;

View File

@ -46,7 +46,6 @@ public class ConverterMain {
private final MessageQueueFactory messageQueueFactory;
private final FileStorageService fileStorageService;
private final SideloadSourceFactory sideloadSourceFactory;
private final int node;
public static void main(String... args) throws Exception {

View File

@ -9,7 +9,7 @@ import lombok.SneakyThrows;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.linkdb.DocumentDbWriter;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.loading.documents.DocumentLoaderService;
import nu.marginalia.loading.documents.KeywordLoaderService;
import nu.marginalia.loading.domains.DomainIdRegistry;

View File

@ -9,9 +9,9 @@ import com.google.inject.name.Names;
import nu.marginalia.LanguageModels;
import nu.marginalia.WmsaHome;
import nu.marginalia.IndexLocations;
import nu.marginalia.linkdb.DomainLinkDbWriter;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.linkdb.DocumentDbWriter;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.service.SearchServiceDescriptors;
import nu.marginalia.service.descriptor.ServiceDescriptors;

View File

@ -4,7 +4,7 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.io.processed.DocumentRecordParquetFileReader;
import nu.marginalia.linkdb.DocumentDbWriter;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.loading.LoaderInputData;
import nu.marginalia.loading.domains.DomainIdRegistry;

View File

@ -4,7 +4,7 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
import nu.marginalia.linkdb.DomainLinkDbWriter;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import nu.marginalia.loading.LoaderInputData;
import nu.marginalia.loading.domains.DomainIdRegistry;
import nu.marginalia.model.processed.DomainLinkRecord;

View File

@ -5,9 +5,10 @@ import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.linkdb.DomainLinkDb;
import nu.marginalia.linkdb.FileDomainLinkDb;
import nu.marginalia.linkdb.SqlDomainLinkDb;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.linkdb.dlinks.FileDomainLinkDb;
import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb;
import nu.marginalia.linkdb.dlinks.SqlDomainLinkDb;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.IndexLocations;
@ -41,18 +42,11 @@ public class IndexModule extends AbstractModule {
FileStorageService storageService,
HikariDataSource dataSource,
ServiceConfiguration serviceConfiguration
) throws IOException
)
{
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
if (Files.exists(path)) {
logger.info("Using file domain link db {}", path);
return new FileDomainLinkDb(path);
}
else {
logger.warn("Using legacy sql domain link db");
return new SqlDomainLinkDb(path, dataSource, serviceConfiguration);
}
return new SelectingDomainLinkDb(path, serviceConfiguration, dataSource);
}
@Provides

View File

@ -7,13 +7,13 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
import lombok.SneakyThrows;
import nu.marginalia.IndexLocations;
import nu.marginalia.index.svc.IndexDomainLinksService;
import nu.marginalia.linkdb.DomainLinkDb;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.index.client.IndexMqEndpoints;
import nu.marginalia.index.index.SearchIndex;
import nu.marginalia.index.svc.IndexOpsService;
import nu.marginalia.index.svc.IndexQueryService;
import nu.marginalia.linkdb.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.server.*;

View File

@ -7,7 +7,7 @@ import gnu.trove.list.array.TLongArrayList;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.linkdb.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.ranking.ResultValuator;
import org.slf4j.Logger;

View File

@ -3,9 +3,7 @@ package nu.marginalia.index.svc;
import com.google.inject.Inject;
import io.grpc.stub.StreamObserver;
import nu.marginalia.index.api.*;
import nu.marginalia.linkdb.DomainLinkDb;
import static io.grpc.stub.ServerCalls.asyncUnimplementedUnaryCall;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
/** GRPC service for interrogating domain links
*/

View File

@ -24,8 +24,8 @@ import nu.marginalia.index.journal.writer.IndexJournalWriter;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.linkdb.DocumentDbReader;
import nu.marginalia.linkdb.DocumentDbWriter;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.UrlIdCodec;

View File

@ -23,8 +23,8 @@ import nu.marginalia.index.journal.writer.IndexJournalWriter;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.linkdb.DocumentDbReader;
import nu.marginalia.linkdb.DocumentDbWriter;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.PubDate;

View File

@ -7,7 +7,7 @@ import nu.marginalia.storage.model.FileStorageBase;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.index.journal.writer.IndexJournalWriter;
import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
import nu.marginalia.linkdb.DocumentDbReader;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.process.control.FakeProcessHeartbeat;
import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.ranking.DomainRankings;