Clean up documentation and rename domain-links to link-graph

This commit is contained in:
Viktor Lofgren 2024-02-28 11:40:11 +01:00
parent 3a65fe8917
commit 9f1649636e
54 changed files with 269 additions and 224 deletions

View File

@ -1,15 +1,3 @@
## Domain Link Database
The domain link database contains information about links
between domains. It is a static in-memory database loaded
from a binary file.
* [DomainLinkDb](java/nu/marginalia/linkdb/DomainLinkDb.java)
* * [FileDomainLinkDb](java/nu/marginalia/linkdb/FileDomainLinkDb.java)
* * [SqlDomainLinkDb](java/nu/marginalia/linkdb/SqlDomainLinkDb.java)
* [DomainLinkDbWriter](java/nu/marginalia/linkdb/DomainLinkDbWriter.java)
* [DomainLinkDbLoader](java/nu/marginalia/linkdb/DomainLinkDbLoader.java)
## Document Database
The document database contains information about links,
@ -21,10 +9,10 @@ is not in the MariaDB database is that this would make updates to
this information take effect in production immediately, even before
the information was searchable.
* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/DocumentDbWriter.java)
* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/DocumentDbReader.java)
* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/docs/DocumentDbWriter.java)
* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/docs/DocumentDbReader.java)
## See Also
These databases are constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service).
The database is constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service).

View File

@ -7,6 +7,5 @@ as shared models.
* [config](config/) contains some `@Inject`ables.
* [renderer](renderer/) contains utility code for rendering website templates.
* [service](service/) is the shared base classes for main methods and web services.
* [service-client](service-client/) is the shared base class for RPC.
* [service-discovery](service-discovery) contains tools that lets the services find each other.
* [service-discovery](service-discovery) contains tools that lets the services find each other and communicate.
* [process](process/) contains boiler plate for batch processes.

View File

@ -34,7 +34,7 @@ dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:execution:api')
implementation project(':code:process-models:crawl-spec')

View File

@ -6,7 +6,7 @@ import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
@ -32,7 +32,7 @@ public class ExportDataActor extends RecordActorPrototype {
private final FileStorageService storageService;
private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final AggregateDomainLinksClient domainLinksClient;
private final AggregateLinkGraphClient linkGraphClient;
public record Export() implements ActorStep {}
public record ExportBlacklist(FileStorageId fid) implements ActorStep {}
@ -114,7 +114,7 @@ public class ExportDataActor extends RecordActorPrototype {
var tmpFile = Files.createTempFile(storage.asPath(), "export", ".csv.gz",
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
var allLinks = domainLinksClient.getAllDomainLinks();
var allLinks = linkGraphClient.getAllDomainLinks();
try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))))
{
@ -155,12 +155,12 @@ public class ExportDataActor extends RecordActorPrototype {
public ExportDataActor(Gson gson,
FileStorageService storageService,
HikariDataSource dataSource,
AggregateDomainLinksClient domainLinksClient)
AggregateLinkGraphClient linkGraphClient)
{
super(gson);
this.storageService = storageService;
this.dataSource = dataSource;
this.domainLinksClient = domainLinksClient;
this.linkGraphClient = linkGraphClient;
}
}

12
code/execution/readme.md Normal file
View File

@ -0,0 +1,12 @@
The execution subsystem is responsible for the execution of long running tasks on each
index node. It lives in the [executor-service](../services-core/executor-service) module.
It accomplishes this using the [message queue and actor library](../libraries/message-queue/),
which permits program state to survive crashes and reboots.
The subsystem exposes four [APIs](api/src/main/protobuf/executor-api.proto):
* Execution API - for starting and stopping tasks, also contains miscellaneous commands
* Crawl API - for managing the crawl workflow
* Sideload API - for sideloading data
* Export API - for exporting data

View File

@ -16,4 +16,3 @@ holistically, not by question or answer, it is necessary to re-arrange
the data (which is very large). SQLite does a decent job of enabling
this task.
See [tools/stackexchange-converter](../../tools/stackexchange-converter).

View File

@ -15,7 +15,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:functions:domain-info:api')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:common:config')
implementation project(':code:common:service')

View File

@ -2,7 +2,7 @@ package nu.marginalia.functions.domains;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.api.domains.RpcDomainInfoResponse;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import nu.marginalia.geoip.GeoIpDictionary;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
@ -21,7 +21,7 @@ public class DomainInformationService {
private final GeoIpDictionary geoIpDictionary;
private DbDomainQueries dbDomainQueries;
private final AggregateDomainLinksClient domainLinksClient;
private final AggregateLinkGraphClient linkGraphClient;
private HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
@ -29,11 +29,11 @@ public class DomainInformationService {
public DomainInformationService(
DbDomainQueries dbDomainQueries,
GeoIpDictionary geoIpDictionary,
AggregateDomainLinksClient domainLinksClient,
AggregateLinkGraphClient linkGraphClient,
HikariDataSource dataSource) {
this.dbDomainQueries = dbDomainQueries;
this.geoIpDictionary = geoIpDictionary;
this.domainLinksClient = domainLinksClient;
this.linkGraphClient = linkGraphClient;
this.dataSource = dataSource;
}
@ -84,8 +84,8 @@ public class DomainInformationService {
inCrawlQueue = rs.next();
builder.setInCrawlQueue(inCrawlQueue);
builder.setIncomingLinks(domainLinksClient.countLinksToDomain(domainId));
builder.setOutboundLinks(domainLinksClient.countLinksFromDomain(domainId));
builder.setIncomingLinks(linkGraphClient.countLinksToDomain(domainId));
builder.setOutboundLinks(linkGraphClient.countLinksFromDomain(domainId));
rs = stmt.executeQuery(STR."""
SELECT KNOWN_URLS, GOOD_URLS, VISITED_URLS FROM DOMAIN_METADATA WHERE ID=\{domainId}

View File

@ -11,7 +11,7 @@ import gnu.trove.set.hash.TIntHashSet;
import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap;
import nu.marginalia.api.domains.*;
import nu.marginalia.api.domains.model.SimilarDomain;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import nu.marginalia.model.EdgeDomain;
import org.roaringbitmap.RoaringBitmap;
import org.slf4j.Logger;
@ -20,7 +20,6 @@ import org.slf4j.LoggerFactory;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.stream.IntStream;
@ -29,7 +28,7 @@ public class SimilarDomainsService {
private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class);
private final HikariDataSource dataSource;
private final AggregateDomainLinksClient domainLinksClient;
private final AggregateLinkGraphClient linkGraphClient;
private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000);
private volatile int[] domainIdxToId;
@ -45,9 +44,9 @@ public class SimilarDomainsService {
volatile boolean isReady = false;
@Inject
public SimilarDomainsService(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) {
public SimilarDomainsService(HikariDataSource dataSource, AggregateLinkGraphClient linkGraphClient) {
this.dataSource = dataSource;
this.domainLinksClient = domainLinksClient;
this.linkGraphClient = linkGraphClient;
Executors.newSingleThreadExecutor().submit(this::init);
}
@ -262,7 +261,7 @@ public class SimilarDomainsService {
private TIntSet getLinkingIdsDToS(int domainIdx) {
var items = new TIntHashSet();
for (int id : domainLinksClient.getLinksFromDomain(domainIdxToId[domainIdx])) {
for (int id : linkGraphClient.getLinksFromDomain(domainIdxToId[domainIdx])) {
items.add(domainIdToIdx.get(id));
}
@ -272,7 +271,7 @@ public class SimilarDomainsService {
private TIntSet getLinkingIdsSToD(int domainIdx) {
var items = new TIntHashSet();
for (int id : domainLinksClient.getLinksToDomain(domainIdxToId[domainIdx])) {
for (int id : linkGraphClient.getLinksToDomain(domainIdxToId[domainIdx])) {
items.add(domainIdToIdx.get(id));
}

View File

@ -1,30 +0,0 @@
package nu.marginalia.api.indexdomainlinks;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.api.domainlink.DomainLinksApiGrpc;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Singleton
public class PartitionDomainLinksClient {
private static final Logger logger = LoggerFactory.getLogger(PartitionDomainLinksClient.class);
private final GrpcMultiNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> channelPool;
@Inject
public PartitionDomainLinksClient(GrpcChannelPoolFactory factory) {
this.channelPool = factory.createMulti(
ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.multi()),
DomainLinksApiGrpc::newBlockingStub);
}
public GrpcMultiNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> getChannelPool() {
return channelPool;
}
}

View File

@ -14,7 +14,7 @@ java {
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:common:config')
implementation project(':code:common:service')

View File

@ -1,20 +1,25 @@
package nu.marginalia.functions.domainlinks;
package nu.marginalia.linkgraph;
import com.google.inject.Inject;
import io.grpc.stub.StreamObserver;
import nu.marginalia.api.domainlink.*;
import nu.marginalia.api.indexdomainlinks.PartitionDomainLinksClient;
import nu.marginalia.api.linkgraph.*;
import nu.marginalia.api.linkgraph.PartitionLinkGraphClient;
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc.LinkGraphApiBlockingStub;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase {
private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksService.class);
private final PartitionDomainLinksClient client;
/** This class is responsible for aggregating the link graph data from the partitioned link graph
* services.
*/
public class AggregateLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase {
private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphService.class);
private final PartitionLinkGraphClient client;
@Inject
public AggregateDomainLinksService(PartitionDomainLinksClient client) {
public AggregateLinkGraphService(PartitionLinkGraphClient client) {
this.client = client;
}
@ -22,7 +27,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void getAllLinks(Empty request,
StreamObserver<RpcDomainIdPairs> responseObserver) {
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks)
client.getChannelPool().call(LinkGraphApiBlockingStub::getAllLinks)
.run(Empty.getDefaultInstance())
.forEach(iter -> iter.forEachRemaining(responseObserver::onNext));
@ -34,7 +39,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
StreamObserver<RpcDomainIdList> responseObserver) {
var rspBuilder = RpcDomainIdList.newBuilder();
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain)
client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksFromDomain)
.run(request)
.stream()
.map(RpcDomainIdList::getDomainIdList)
@ -51,7 +56,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
var rspBuilder = RpcDomainIdList.newBuilder();
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain)
client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksToDomain)
.run(request)
.stream()
.map(RpcDomainIdList::getDomainIdList)
@ -65,7 +70,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
@Override
public void countLinksFromDomain(RpcDomainId request,
StreamObserver<RpcDomainIdCount> responseObserver) {
int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain)
int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksFromDomain)
.run(request)
.stream()
.mapToInt(RpcDomainIdCount::getIdCount)
@ -81,7 +86,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void countLinksToDomain(RpcDomainId request,
StreamObserver<RpcDomainIdCount> responseObserver) {
int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain)
int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksToDomain)
.run(request)
.stream()
.mapToInt(RpcDomainIdCount::getIdCount)

View File

@ -0,0 +1,3 @@
This module is responsible for aggregating the link graph from the partitioned services, and exposing a unified
view of the link graph. It does not keep any data or state, but instead delegates to the partitioned
services.

View File

@ -11,7 +11,7 @@ java {
}
}
jar.archiveBaseName = 'index-domain-links-api'
jar.archiveBaseName = 'link-graph-api'
apply from: "$rootProject.projectDir/protobuf.gradle"
apply from: "$rootProject.projectDir/srcsets.gradle"

View File

@ -1,10 +1,8 @@
package nu.marginalia.api.indexdomainlinks;
package nu.marginalia.api.linkgraph;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.api.domainlink.DomainLinksApiGrpc;
import nu.marginalia.api.domainlink.Empty;
import nu.marginalia.api.domainlink.RpcDomainId;
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;
@ -17,24 +15,26 @@ import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.util.List;
@Singleton
public class AggregateDomainLinksClient {
private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksClient.class);
import static nu.marginalia.api.linkgraph.LinkGraphApiGrpc.*;
private final GrpcSingleNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> channelPool;
@Singleton
public class AggregateLinkGraphClient {
private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphClient.class);
private final GrpcSingleNodeChannelPool<LinkGraphApiBlockingStub> channelPool;
@Inject
public AggregateDomainLinksClient(GrpcChannelPoolFactory factory) {
public AggregateLinkGraphClient(GrpcChannelPoolFactory factory) {
this.channelPool = factory.createSingle(
ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.any()),
DomainLinksApiGrpc::newBlockingStub);
ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.any()),
LinkGraphApiGrpc::newBlockingStub);
}
public AllLinks getAllDomainLinks() {
AllLinks links = new AllLinks();
channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks)
channelPool.call(LinkGraphApiBlockingStub::getAllLinks)
.run(Empty.getDefaultInstance())
.forEachRemaining(pairs -> {
for (int i = 0; i < pairs.getDestIdsCount(); i++) {
@ -47,7 +47,7 @@ public class AggregateDomainLinksClient {
public List<Integer> getLinksToDomain(int domainId) {
try {
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain)
return channelPool.call(LinkGraphApiBlockingStub::getLinksToDomain)
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
.getDomainIdList()
.stream()
@ -62,7 +62,7 @@ public class AggregateDomainLinksClient {
public List<Integer> getLinksFromDomain(int domainId) {
try {
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain)
return channelPool.call(LinkGraphApiBlockingStub::getLinksFromDomain)
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
.getDomainIdList()
.stream()
@ -78,7 +78,7 @@ public class AggregateDomainLinksClient {
public int countLinksToDomain(int domainId) {
try {
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain)
return channelPool.call(LinkGraphApiBlockingStub::countLinksToDomain)
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
.getIdCount();
@ -91,7 +91,7 @@ public class AggregateDomainLinksClient {
public int countLinksFromDomain(int domainId) {
try {
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain)
return channelPool.call(LinkGraphApiBlockingStub::countLinksFromDomain)
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
.getIdCount();
}

View File

@ -0,0 +1,29 @@
package nu.marginalia.api.linkgraph;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Singleton
public class PartitionLinkGraphClient {
private static final Logger logger = LoggerFactory.getLogger(PartitionLinkGraphClient.class);
private final GrpcMultiNodeChannelPool<LinkGraphApiGrpc.LinkGraphApiBlockingStub> channelPool;
@Inject
public PartitionLinkGraphClient(GrpcChannelPoolFactory factory) {
this.channelPool = factory.createMulti(
ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.multi()),
LinkGraphApiGrpc::newBlockingStub);
}
public GrpcMultiNodeChannelPool<LinkGraphApiGrpc.LinkGraphApiBlockingStub> getChannelPool() {
return channelPool;
}
}

View File

@ -1,10 +1,10 @@
syntax="proto3";
package nu.marginalia.api.domainlinks;
package nu.marginalia.api.linkgraph;
option java_package="nu.marginalia.api.domainlink";
option java_package="nu.marginalia.api.linkgraph";
option java_multiple_files=true;
service DomainLinksApi {
service LinkGraphApi {
rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {}
rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {}
rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {}

View File

@ -14,7 +14,7 @@ java {
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:common:config')
implementation project(':code:common:service')

View File

@ -1,13 +1,13 @@
package nu.marginalia.linkdb.dlinks;
package nu.marginalia.linkgraph;
import gnu.trove.list.array.TIntArrayList;
import java.nio.file.Path;
/** A database of source-destination pairs of domain IDs. The database is loaded into memory from
* a source. The database is then kept in memory, reloading it upon switchInput().
/** A repository of source-destination pairs of domain IDs. The database is loaded into memory from
* a source. The data is then kept in memory, reloading it upon switchInput().
*/
public interface DomainLinkDb {
public interface DomainLinks {
/** Replace the current db file with the provided file. The provided file will be deleted.
* The in-memory database MAY be updated to reflect the change.
* */

View File

@ -1,25 +1,28 @@
package nu.marginalia.functions.domainlinks;
package nu.marginalia.linkgraph;
import com.google.inject.Inject;
import io.grpc.stub.StreamObserver;
import nu.marginalia.api.domainlink.*;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.api.linkgraph.*;
import nu.marginalia.api.linkgraph.Empty;
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
/** GRPC service for interrogating domain links
/** GRPC service for interrogating domain links for a single partition. For accessing the data
* in the application, the AggregateLinkGraphService should be used instead via the
* AggregateLinkGraphClient.
*/
public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase {
private final DomainLinkDb domainLinkDb;
public class PartitionLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase {
private final DomainLinks domainLinks;
@Inject
public PartitionDomainLinksService(DomainLinkDb domainLinkDb) {
this.domainLinkDb = domainLinkDb;
public PartitionLinkGraphService(DomainLinks domainLinks) {
this.domainLinks = domainLinks;
}
public void getAllLinks(Empty request,
io.grpc.stub.StreamObserver<RpcDomainIdPairs> responseObserver) {
try (var idsConverter = new AllIdsResponseConverter(responseObserver)) {
domainLinkDb.forEach(idsConverter::accept);
domainLinks.forEach(idsConverter::accept);
}
responseObserver.onCompleted();
@ -58,7 +61,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void getLinksFromDomain(RpcDomainId request,
StreamObserver<RpcDomainIdList> responseObserver) {
var links = domainLinkDb.findDestinations(request.getDomainId());
var links = domainLinks.findDestinations(request.getDomainId());
var rspBuilder = RpcDomainIdList.newBuilder();
for (int i = 0; i < links.size(); i++) {
@ -73,7 +76,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void getLinksToDomain(RpcDomainId request,
StreamObserver<RpcDomainIdList> responseObserver) {
var links = domainLinkDb.findSources(request.getDomainId());
var links = domainLinks.findSources(request.getDomainId());
var rspBuilder = RpcDomainIdList.newBuilder();
for (int i = 0; i < links.size(); i++) {
@ -87,7 +90,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void countLinksFromDomain(RpcDomainId request,
StreamObserver<RpcDomainIdCount> responseObserver) {
responseObserver.onNext(RpcDomainIdCount.newBuilder()
.setIdCount(domainLinkDb.countDestinations(request.getDomainId()))
.setIdCount(domainLinks.countDestinations(request.getDomainId()))
.build());
responseObserver.onCompleted();
}
@ -95,7 +98,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
public void countLinksToDomain(RpcDomainId request,
StreamObserver<RpcDomainIdCount> responseObserver) {
responseObserver.onNext(RpcDomainIdCount.newBuilder()
.setIdCount(domainLinkDb.countSources(request.getDomainId()))
.setIdCount(domainLinks.countSources(request.getDomainId()))
.build());
responseObserver.onCompleted();
}

View File

@ -1,7 +1,8 @@
package nu.marginalia.linkdb.dlinks;
package nu.marginalia.linkgraph.impl;
import com.google.inject.name.Named;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.linkgraph.DomainLinks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -14,13 +15,13 @@ import java.nio.file.StandardCopyOption;
* is not yet loaded. This speeds up the startup of the index service, as the database is
* loaded in a separate thread.
*/
public class DelayingDomainLinkDb implements DomainLinkDb {
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class);
public class DelayingDomainLinks implements DomainLinks {
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinks.class);
private volatile DomainLinkDb currentDb;
private volatile DomainLinks currentDb;
private final Path filename;
public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) {
public DelayingDomainLinks(@Named("domain-linkdb-file") Path filename) {
this.filename = filename;
// Load the database in a separate thread, so that the constructor can return
@ -29,7 +30,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb {
Thread.ofPlatform().start(() -> {
try {
currentDb = new FileDomainLinkDb(filename);
currentDb = new FileDomainLinks(filename);
logger.info("Loaded linkdb");
} catch (Exception e) {
logger.error("Failed to load linkdb", e);
@ -43,7 +44,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb {
Thread.ofPlatform().start(() -> {
try {
currentDb = new FileDomainLinkDb(filename);
currentDb = new FileDomainLinks(filename);
} catch (IOException e) {
logger.error("Failed to load linkdb", e);
}

View File

@ -1,7 +1,9 @@
package nu.marginalia.linkdb.dlinks;
package nu.marginalia.linkgraph.impl;
import com.google.inject.name.Named;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.linkgraph.DomainLinks;
import nu.marginalia.linkgraph.io.DomainLinksLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -14,13 +16,13 @@ import java.util.Arrays;
/** Canonical DomainLinkDb implementation. The database is loaded into memory from
* a file. The database is then kept in memory, reloading it upon switchInput().
*/
public class FileDomainLinkDb implements DomainLinkDb {
private static final Logger logger = LoggerFactory.getLogger(FileDomainLinkDb.class);
public class FileDomainLinks implements DomainLinks {
private static final Logger logger = LoggerFactory.getLogger(FileDomainLinks.class);
private final Path filename;
private volatile long[] sourceToDest = new long[0];
private volatile long[] destToSource = new long[0];
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
public FileDomainLinks(@Named("domain-linkdb-file") Path filename) throws IOException {
this.filename = filename;
if (Files.exists(filename)) {
@ -35,7 +37,7 @@ public class FileDomainLinkDb implements DomainLinkDb {
}
public void loadInput(Path filename) throws IOException {
try (var loader = new DomainLinkDbLoader(filename)) {
try (var loader = new DomainLinksLoader(filename)) {
int size = loader.size();
var newSourceToDest = new long[size];

View File

@ -1,17 +1,17 @@
package nu.marginalia.linkdb.dlinks;
package nu.marginalia.linkgraph.io;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
public class DomainLinkDbLoader implements AutoCloseable {
public class DomainLinksLoader implements AutoCloseable {
private final DataInputStream stream;
private final Path filename;
private long nextVal;
public DomainLinkDbLoader(Path filename) throws IOException {
public DomainLinksLoader(Path filename) throws IOException {
this.stream = new DataInputStream(Files.newInputStream(filename));
this.filename = filename;
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.linkdb.dlinks;
package nu.marginalia.linkgraph.io;
import java.io.DataOutputStream;
import java.io.IOException;
@ -6,10 +6,10 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class DomainLinkDbWriter implements AutoCloseable {
public class DomainLinksWriter implements AutoCloseable {
private final DataOutputStream stream;
public DomainLinkDbWriter(Path fileName) throws IOException {
public DomainLinksWriter(Path fileName) throws IOException {
this.stream = new DataOutputStream(Files.newOutputStream(fileName,
StandardOpenOption.CREATE,
StandardOpenOption.WRITE,

View File

@ -0,0 +1,11 @@
The link graph partition module is responsible for knowledge about the link graph
for a single index node. It's based on in-memory data structures, and is updated
atomically from file.
## Central Classes
* [PartitionLinkGraphService](java/nu/marginalia/linkgraph/PartitionLinkGraphService.java)
* [DomainLink](java/nu/marginalia/linkgraph/DomainLinks.java)
* * [FileDomainLinks](java/nu/marginalia/linkgraph/impl/FileDomainLinks.java)
* [DomainLinksWriter](java/nu/marginalia/linkgraph/io/DomainLinksWriter.java)
* [DomainLinksLoader](java/nu/marginalia/linkgraph/io/DomainLinksLoader.java)

View File

@ -1,7 +1,7 @@
package nu.marginalia.linkdb;
package nu.marginalia.linkgraph;
import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import nu.marginalia.linkgraph.io.DomainLinksLoader;
import nu.marginalia.linkgraph.io.DomainLinksWriter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
@ -24,7 +24,7 @@ public class DomainLinkDbTest {
@Test
public void testWriteRead() {
try (var writer = new DomainLinkDbWriter(fileName)) {
try (var writer = new DomainLinksWriter(fileName)) {
writer.write(1, 2);
writer.write(2, 3);
writer.write(3, 4);
@ -33,7 +33,7 @@ public class DomainLinkDbTest {
throw new RuntimeException(ex);
}
try (var reader = new DomainLinkDbLoader(fileName)) {
try (var reader = new DomainLinksLoader(fileName)) {
Assertions.assertTrue(reader.next());
Assertions.assertEquals(1, reader.getSource());
Assertions.assertEquals(2, reader.getDest());

View File

@ -0,0 +1,9 @@
The link graph subsystem is responsible for knowledge about the link graph.
A SQL database is not very well suited for this, principally it's too slow to update,
instead the link graph is stored in memory, and atomically updated from file. The storage
aspect is handled by the [common/linkdb](../../common/linkdb/) component.
The link graph subsystem has two components, one which injects into the partitioned services,
e.g. index or execution, and one which aggregates the results from the partitioned services,
and exposes a unified view of the link graph.

View File

@ -5,7 +5,10 @@ import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.BitSet;
// It's unclear why this exists, we should probably use a BitSet instead?
// Chesterton's fence?
public class DenseBitMap {
public static final long MAX_CAPACITY_2GB_16BN_ITEMS=(1L<<34)-8;

View File

@ -0,0 +1,4 @@
The search query subsystem is responsible for parsing a query,
translating it to a request, and then dispatching it to the
appropriate index nodes and translating the responses back again.

View File

@ -17,7 +17,7 @@ dependencies {
implementation project(':third-party:commons-codec')
implementation project(':code:index:api')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:libraries:array')
implementation project(':code:libraries:btree')

View File

@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains.data;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import org.jgrapht.Graph;
import org.jgrapht.graph.DefaultDirectedGraph;
import org.jgrapht.graph.DefaultEdge;
@ -13,12 +13,12 @@ import org.jgrapht.graph.DefaultEdge;
* which is the same as the regular graph except
* the direction of the links have been inverted */
public class InvertedLinkGraphSource extends AbstractGraphSource {
private final AggregateDomainLinksClient queryClient;
private final AggregateLinkGraphClient graphClient;
@Inject
public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient queryClient) {
public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) {
super(dataSource);
this.queryClient = queryClient;
this.graphClient = graphClient;
}
@SneakyThrows
@Override
@ -27,7 +27,7 @@ public class InvertedLinkGraphSource extends AbstractGraphSource {
addVertices(graph);
var allLinks = queryClient.getAllDomainLinks();
var allLinks = graphClient.getAllDomainLinks();
var iter = allLinks.iterator();
while (iter.advance()) {
if (!graph.containsVertex(iter.dest())) {

View File

@ -3,19 +3,19 @@ package nu.marginalia.ranking.domains.data;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import org.jgrapht.Graph;
import org.jgrapht.graph.DefaultDirectedGraph;
import org.jgrapht.graph.DefaultEdge;
/** A source for the regular link graph. */
public class LinkGraphSource extends AbstractGraphSource {
private final AggregateDomainLinksClient domainLinksClient;
private final AggregateLinkGraphClient graphClient;
@Inject
public LinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) {
public LinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) {
super(dataSource);
this.domainLinksClient = domainLinksClient;
this.graphClient = graphClient;
}
@SneakyThrows
@ -25,7 +25,7 @@ public class LinkGraphSource extends AbstractGraphSource {
addVertices(graph);
var allLinks = domainLinksClient.getAllDomainLinks();
var allLinks = graphClient.getAllDomainLinks();
var iter = allLinks.iterator();
while (iter.advance()) {
if (!graph.containsVertex(iter.dest())) {

View File

@ -1,6 +1,6 @@
# Index
This module contains the components that make up the search index.
This index subsystem contains the components that make up the search index.
It exposes an API for querying the index, and contains the logic
for ranking search results. It does not parse the query, that is
@ -10,9 +10,9 @@ the responsibility of the [search-query](../functions/search-query) module.
There are two indexes with accompanying tools for constructing them.
* [index-reverse](reverse-index/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first.
* [index-reverse](index-reverse/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first.
* [index-forward](forward-index/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results.
* [index-forward](index-forward/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results.
Additionally, the [index-journal](index-journal/) contains code for constructing a journal of the index, which is used to keep the index up to date.

View File

@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import nu.marginalia.ranking.domains.data.InvertedLinkGraphSource;
import nu.marginalia.ranking.domains.data.LinkGraphSource;
import nu.marginalia.ranking.domains.data.SimilarityGraphSource;
@ -36,8 +36,8 @@ public class RankingAlgorithmsContainerTest {
static HikariDataSource dataSource;
AggregateDomainLinksClient domainLinksClient;
AggregateDomainLinksClient.AllLinks allLinks;
AggregateLinkGraphClient domainLinksClient;
AggregateLinkGraphClient.AllLinks allLinks;
@BeforeAll
public static void setup() {
@ -66,8 +66,8 @@ public class RankingAlgorithmsContainerTest {
@BeforeEach
public void setupQueryClient() {
domainLinksClient = Mockito.mock(AggregateDomainLinksClient.class);
allLinks = new AggregateDomainLinksClient.AllLinks();
domainLinksClient = Mockito.mock(AggregateLinkGraphClient.class);
allLinks = new AggregateLinkGraphClient.AllLinks();
when(domainLinksClient.getAllDomainLinks()).thenReturn(allLinks);
try (var conn = dataSource.getConnection();

View File

@ -11,6 +11,4 @@ its words, how they stem, POS tags, and so on.
## See Also
[features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords
are important.
[features-qs/query-parser](../../features-qs/query-parser) also does some language processing.
are important.

View File

@ -6,7 +6,3 @@ the TF-IDF score of a keyword.
## Central Classes
* [TermFrequencyDict](java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java)
## See Also
* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file

View File

@ -10,8 +10,8 @@ There are three types of indexes:
This is a very light-weight module that delegates the actual work to the modules:
* [features-index/index-reverse](../../features-index/index-reverse)
* [features-index/index-forward](../../features-index/index-forward)
* [features-index/index-reverse](../../index/index-reverse)
* [features-index/index-forward](../../index/index-forward)
Their respective readme files contain more information about the indexes themselves
and how they are constructed.

View File

@ -40,6 +40,8 @@ dependencies {
implementation project(':code:process-models:work-log')
implementation project(':code:features-convert:keyword-extraction')
implementation project(':code:functions:link-graph:partition')
implementation libs.bundles.slf4j
implementation libs.guice

View File

@ -9,7 +9,7 @@ import com.google.inject.name.Names;
import nu.marginalia.LanguageModels;
import nu.marginalia.WmsaHome;
import nu.marginalia.IndexLocations;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import nu.marginalia.linkgraph.io.DomainLinksWriter;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.linkdb.docs.DocumentDbWriter;
import nu.marginalia.model.gson.GsonFactory;
@ -45,7 +45,7 @@ public class LoaderModule extends AbstractModule {
}
@Inject @Provides @Singleton
private DomainLinkDbWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException {
private DomainLinksWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException {
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME);
@ -53,7 +53,7 @@ public class LoaderModule extends AbstractModule {
Files.delete(dbPath);
}
return new DomainLinkDbWriter(dbPath);
return new DomainLinksWriter(dbPath);
}
private Gson createGson() {

View File

@ -4,7 +4,7 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
import nu.marginalia.linkgraph.io.DomainLinksWriter;
import nu.marginalia.loading.LoaderInputData;
import nu.marginalia.loading.domains.DomainIdRegistry;
import nu.marginalia.model.processed.DomainLinkRecord;
@ -20,10 +20,10 @@ public class DomainLinksLoaderService {
private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class);
private final DomainLinkDbWriter domainLinkDbWriter;
private final DomainLinksWriter domainLinkDbWriter;
@Inject
public DomainLinksLoaderService(DomainLinkDbWriter domainLinkDbWriter) {
public DomainLinksLoaderService(DomainLinksWriter domainLinkDbWriter) {
this.domainLinkDbWriter = domainLinkDbWriter;
}

View File

@ -17,7 +17,7 @@ described in [processed-data](../process-models/processed-data/).
The [loading-process](loading-process/) reads the processed data.
It has creates an [index journal](../features-index/index-journal),
It has creates an [index journal](../index/index-journal),
a [link database](../common/linkdb),
and loads domains and domain-links
into the [MariaDB database](../common/db).

View File

@ -25,7 +25,7 @@ dependencies {
implementation project(':code:common:process')
implementation project(':code:common:service-discovery')
implementation project(':code:common:service')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation libs.bundles.slf4j

View File

@ -4,7 +4,7 @@ import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.set.hash.TIntHashSet;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import org.roaringbitmap.RoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -35,7 +35,7 @@ public class AdjacenciesData {
return ret;
}
public AdjacenciesData(AggregateDomainLinksClient linksClient,
public AdjacenciesData(AggregateLinkGraphClient linksClient,
DomainAliases aliases) {
logger.info("Loading adjacency data");

View File

@ -4,7 +4,7 @@ import com.google.inject.Guice;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.process.control.ProcessHeartbeat;
@ -32,7 +32,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass {
private static final Logger logger = LoggerFactory.getLogger(WebsiteAdjacenciesCalculator.class);
float[] weights;
public WebsiteAdjacenciesCalculator(AggregateDomainLinksClient domainLinksClient, HikariDataSource dataSource) throws SQLException {
public WebsiteAdjacenciesCalculator(AggregateLinkGraphClient domainLinksClient, HikariDataSource dataSource) throws SQLException {
this.dataSource = dataSource;
domainAliases = new DomainAliases(dataSource);
@ -154,7 +154,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass {
var dataSource = injector.getInstance(HikariDataSource.class);
var lc = injector.getInstance(AggregateDomainLinksClient.class);
var lc = injector.getInstance(AggregateLinkGraphClient.class);
if (!lc.waitReady(Duration.ofSeconds(30))) {
throw new IllegalStateException("Failed to connect to domain-links");

View File

@ -23,18 +23,38 @@ eligible index services. The control service is responsible for distributing co
service, and for monitoring the health of the system. It also offers a web interface for operating the system.
### Services
* [core services](services-core/) Most of these services are stateful, memory hungry, and doing heavy lifting.
* * [control](services-core/control-service)
* * [query](services-core/query-service)
* * * Exposes the [functions/link-graph](functions/link-graph) subsystem
* * * Exposes the [functions/search-query](functions/search-query) subsystem
* * [index](services-core/index-service)
* * * Exposes the [index](index) subsystem
* * * Exposes the [functions/link-graph](functions/link-graph) subsystem
* * [executor](services-core/executor-service)
* * * Exposes the [execution](execution) subsystem
* * [assistant](services-core/assistant-service)
* * * Exposes the [functions/math](functions/math) subsystem
* * * Exposes the [functions/domain-info](functions/domain-info) subsystem
* [application services](services-application/) Mostly stateless gateways providing access to the core services.
* * [api](services-application/api-service) - public API
* * [api](services-application/api-service) - public API gateway
* * [search](services-application/search-service) - marginalia search application
* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
* an [internal API](api/)
* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
The system uses a service registry to find the services. The service registry is based on zookeeper,
and is a separate service. The registry doesn't keep track of processes, but APIs. This means that
the system is flexible to reconfiguration. The same code can in principle be run as a micro-service
mesh or as a monolith.
This is an unusual architecture, but it has the benefit that you don't need to think too much about
the layout of the system. You can just request an API and talk to it. Because of this, several of the
services have almost no code of their own. They merely import a library and expose it as a service.
These skeleton services are marked with (S).
Services that expose HTTP endpoints tend to have more code. They are marked with (G).
### Processes
@ -55,7 +75,6 @@ but isolated.
* [features-search](features-search)
* [features-crawl](features-crawl)
* [features-convert](features-convert)
* [features-index](features-index)
### Libraries and primitives

View File

@ -4,8 +4,7 @@ The control service provides an operator's user interface. By default, this int
exposed on port 8081. It does not offer any sort of access control or authentication.
The control service will itself execute tasks that affect the entire system, but delegate
node-specific tasks to the corresponding [executor-service](../executor-service) via the
[executor-api](../../api/executor-api).
node-specific tasks to the corresponding to the [execution subsystem](../../execution).
Conceptually the application is broken into three parts:

View File

@ -59,7 +59,7 @@ dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:api')
implementation project(':code:process-models:crawl-spec')
implementation project(':code:process-models:crawling-model')

View File

@ -1,16 +1,10 @@
The executor service is a partitioned service responsible for executing and keeping
track of long running maintenance and operational tasks, such as crawling or data
processing.
track of long-running maintenance and operational tasks, such as crawling or data
processing.
It accomplishes this using the [message queue and actor library](../../libraries/message-queue/),
which permits program state to survive crashes and reboots. The executor service is closely
linked to the [control-service](../control-service), which provides a user interface for
much of the executor's functionality.
The executor service is closely linked to the [control-service](../control-service),
which provides a user interface for much of the executor's functionality.
## Central Classes
The service it itself relatively bare of code, but imports and exposes the [execution subsystem](../../execution),
which is responsible for the actual execution of tasks.
* [ExecutorActorControlService](java/nu/marginalia/actor/ExecutorActorControlService.java)
## See Also
* [api/executor-api](../../api/executor-api)

View File

@ -46,8 +46,8 @@ dependencies {
implementation project(':code:common:linkdb')
implementation project(':code:index')
implementation project(':code:functions:domain-links:partition')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:link-graph:partition')
implementation project(':code:functions:link-graph:api')
implementation project(':code:functions:search-query:api')
implementation project(':code:index:api')

View File

@ -4,8 +4,8 @@ import com.google.inject.AbstractModule;
import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb;
import nu.marginalia.linkgraph.DomainLinks;
import nu.marginalia.linkgraph.impl.DelayingDomainLinks;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.IndexLocations;
import org.slf4j.Logger;
@ -26,13 +26,13 @@ public class IndexModule extends AbstractModule {
@Provides
@Singleton
public DomainLinkDb domainLinkDb (
public DomainLinks domainLinkDb (
FileStorageService storageService
)
{
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
return new DelayingDomainLinkDb(path);
return new DelayingDomainLinks(path);
}
@Provides

View File

@ -3,9 +3,9 @@ package nu.marginalia.index;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.IndexLocations;
import nu.marginalia.functions.domainlinks.PartitionDomainLinksService;
import nu.marginalia.linkgraph.PartitionLinkGraphService;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.linkgraph.DomainLinks;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.index.api.IndexMqEndpoints;
@ -34,7 +34,7 @@ public class IndexService extends Service {
private final FileStorageService fileStorageService;
private final DocumentDbReader documentDbReader;
private final DomainLinkDb domainLinkDb;
private final DomainLinks domainLinks;
private final ServiceEventLog eventLog;
@ -46,21 +46,21 @@ public class IndexService extends Service {
StatefulIndex statefulIndex,
FileStorageService fileStorageService,
DocumentDbReader documentDbReader,
DomainLinkDb domainLinkDb,
PartitionDomainLinksService partitionDomainLinksService,
DomainLinks domainLinks,
PartitionLinkGraphService partitionLinkGraphService,
ServiceEventLog eventLog)
{
super(params,
ServicePartition.partition(params.configuration.node()),
List.of(indexQueryService,
partitionDomainLinksService)
partitionLinkGraphService)
);
this.opsService = opsService;
this.statefulIndex = statefulIndex;
this.fileStorageService = fileStorageService;
this.documentDbReader = documentDbReader;
this.domainLinkDb = domainLinkDb;
this.domainLinks = domainLinks;
this.eventLog = eventLog;
this.init = params.initialization;
@ -106,7 +106,7 @@ public class IndexService extends Service {
if (Files.exists(newPathDomains)) {
eventLog.logEvent("INDEX-SWITCH-DOMAIN-LINKDB", "");
domainLinkDb.switchInput(newPathDomains);
domainLinks.switchInput(newPathDomains);
}
}

View File

@ -50,8 +50,8 @@ dependencies {
implementation project(':code:functions:search-query')
implementation project(':code:functions:search-query:api')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:domain-links:aggregate')
implementation project(':code:functions:link-graph:api')
implementation project(':code:functions:link-graph:aggregate')
implementation libs.bundles.slf4j

View File

@ -2,7 +2,7 @@ package nu.marginalia.query;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.functions.domainlinks.AggregateDomainLinksService;
import nu.marginalia.linkgraph.AggregateLinkGraphService;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
@ -17,7 +17,7 @@ public class QueryService extends Service {
@SneakyThrows
@Inject
public QueryService(BaseServiceParams params,
AggregateDomainLinksService domainLinksService,
AggregateLinkGraphService domainLinksService,
QueryGRPCService queryGRPCService,
QueryBasicInterface queryBasicInterface)
{

View File

@ -17,9 +17,9 @@ include 'code:functions:math:api'
include 'code:functions:domain-info'
include 'code:functions:domain-info:api'
include 'code:functions:domain-links:partition'
include 'code:functions:domain-links:aggregate'
include 'code:functions:domain-links:api'
include 'code:functions:link-graph:partition'
include 'code:functions:link-graph:aggregate'
include 'code:functions:link-graph:api'
include 'code:functions:search-query'
include 'code:functions:search-query:api'