mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 04:58:59 +00:00
Clean up documentation and rename domain-links
to link-graph
This commit is contained in:
parent
3a65fe8917
commit
9f1649636e
@ -1,15 +1,3 @@
|
|||||||
## Domain Link Database
|
|
||||||
|
|
||||||
The domain link database contains information about links
|
|
||||||
between domains. It is a static in-memory database loaded
|
|
||||||
from a binary file.
|
|
||||||
|
|
||||||
* [DomainLinkDb](java/nu/marginalia/linkdb/DomainLinkDb.java)
|
|
||||||
* * [FileDomainLinkDb](java/nu/marginalia/linkdb/FileDomainLinkDb.java)
|
|
||||||
* * [SqlDomainLinkDb](java/nu/marginalia/linkdb/SqlDomainLinkDb.java)
|
|
||||||
* [DomainLinkDbWriter](java/nu/marginalia/linkdb/DomainLinkDbWriter.java)
|
|
||||||
* [DomainLinkDbLoader](java/nu/marginalia/linkdb/DomainLinkDbLoader.java)
|
|
||||||
|
|
||||||
## Document Database
|
## Document Database
|
||||||
|
|
||||||
The document database contains information about links,
|
The document database contains information about links,
|
||||||
@ -21,10 +9,10 @@ is not in the MariaDB database is that this would make updates to
|
|||||||
this information take effect in production immediately, even before
|
this information take effect in production immediately, even before
|
||||||
the information was searchable.
|
the information was searchable.
|
||||||
|
|
||||||
* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/DocumentDbWriter.java)
|
* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/docs/DocumentDbWriter.java)
|
||||||
* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/DocumentDbReader.java)
|
* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/docs/DocumentDbReader.java)
|
||||||
|
|
||||||
|
|
||||||
## See Also
|
## See Also
|
||||||
|
|
||||||
These databases are constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service).
|
The database is constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service).
|
@ -7,6 +7,5 @@ as shared models.
|
|||||||
* [config](config/) contains some `@Inject`ables.
|
* [config](config/) contains some `@Inject`ables.
|
||||||
* [renderer](renderer/) contains utility code for rendering website templates.
|
* [renderer](renderer/) contains utility code for rendering website templates.
|
||||||
* [service](service/) is the shared base classes for main methods and web services.
|
* [service](service/) is the shared base classes for main methods and web services.
|
||||||
* [service-client](service-client/) is the shared base class for RPC.
|
* [service-discovery](service-discovery) contains tools that lets the services find each other and communicate.
|
||||||
* [service-discovery](service-discovery) contains tools that lets the services find each other.
|
|
||||||
* [process](process/) contains boiler plate for batch processes.
|
* [process](process/) contains boiler plate for batch processes.
|
||||||
|
@ -34,7 +34,7 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
implementation project(':code:execution:api')
|
implementation project(':code:execution:api')
|
||||||
|
|
||||||
implementation project(':code:process-models:crawl-spec')
|
implementation project(':code:process-models:crawl-spec')
|
||||||
|
@ -6,7 +6,7 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
import nu.marginalia.actor.state.ActorStep;
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
@ -32,7 +32,7 @@ public class ExportDataActor extends RecordActorPrototype {
|
|||||||
private final FileStorageService storageService;
|
private final FileStorageService storageService;
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final AggregateDomainLinksClient domainLinksClient;
|
private final AggregateLinkGraphClient linkGraphClient;
|
||||||
|
|
||||||
public record Export() implements ActorStep {}
|
public record Export() implements ActorStep {}
|
||||||
public record ExportBlacklist(FileStorageId fid) implements ActorStep {}
|
public record ExportBlacklist(FileStorageId fid) implements ActorStep {}
|
||||||
@ -114,7 +114,7 @@ public class ExportDataActor extends RecordActorPrototype {
|
|||||||
var tmpFile = Files.createTempFile(storage.asPath(), "export", ".csv.gz",
|
var tmpFile = Files.createTempFile(storage.asPath(), "export", ".csv.gz",
|
||||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||||
|
|
||||||
var allLinks = domainLinksClient.getAllDomainLinks();
|
var allLinks = linkGraphClient.getAllDomainLinks();
|
||||||
|
|
||||||
try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))))
|
try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))))
|
||||||
{
|
{
|
||||||
@ -155,12 +155,12 @@ public class ExportDataActor extends RecordActorPrototype {
|
|||||||
public ExportDataActor(Gson gson,
|
public ExportDataActor(Gson gson,
|
||||||
FileStorageService storageService,
|
FileStorageService storageService,
|
||||||
HikariDataSource dataSource,
|
HikariDataSource dataSource,
|
||||||
AggregateDomainLinksClient domainLinksClient)
|
AggregateLinkGraphClient linkGraphClient)
|
||||||
{
|
{
|
||||||
super(gson);
|
super(gson);
|
||||||
this.storageService = storageService;
|
this.storageService = storageService;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.domainLinksClient = domainLinksClient;
|
this.linkGraphClient = linkGraphClient;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
12
code/execution/readme.md
Normal file
12
code/execution/readme.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
The execution subsystem is responsible for the execution of long running tasks on each
|
||||||
|
index node. It lives in the [executor-service](../services-core/executor-service) module.
|
||||||
|
|
||||||
|
It accomplishes this using the [message queue and actor library](../libraries/message-queue/),
|
||||||
|
which permits program state to survive crashes and reboots.
|
||||||
|
|
||||||
|
The subsystem exposes four [APIs](api/src/main/protobuf/executor-api.proto):
|
||||||
|
|
||||||
|
* Execution API - for starting and stopping tasks, also contains miscellaneous commands
|
||||||
|
* Crawl API - for managing the crawl workflow
|
||||||
|
* Sideload API - for sideloading data
|
||||||
|
* Export API - for exporting data
|
@ -16,4 +16,3 @@ holistically, not by question or answer, it is necessary to re-arrange
|
|||||||
the data (which is very large). SQLite does a decent job of enabling
|
the data (which is very large). SQLite does a decent job of enabling
|
||||||
this task.
|
this task.
|
||||||
|
|
||||||
See [tools/stackexchange-converter](../../tools/stackexchange-converter).
|
|
@ -15,7 +15,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
|||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:functions:domain-info:api')
|
implementation project(':code:functions:domain-info:api')
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
@ -2,7 +2,7 @@ package nu.marginalia.functions.domains;
|
|||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.api.domains.RpcDomainInfoResponse;
|
import nu.marginalia.api.domains.RpcDomainInfoResponse;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import nu.marginalia.geoip.GeoIpDictionary;
|
import nu.marginalia.geoip.GeoIpDictionary;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
@ -21,7 +21,7 @@ public class DomainInformationService {
|
|||||||
private final GeoIpDictionary geoIpDictionary;
|
private final GeoIpDictionary geoIpDictionary;
|
||||||
|
|
||||||
private DbDomainQueries dbDomainQueries;
|
private DbDomainQueries dbDomainQueries;
|
||||||
private final AggregateDomainLinksClient domainLinksClient;
|
private final AggregateLinkGraphClient linkGraphClient;
|
||||||
private HikariDataSource dataSource;
|
private HikariDataSource dataSource;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
@ -29,11 +29,11 @@ public class DomainInformationService {
|
|||||||
public DomainInformationService(
|
public DomainInformationService(
|
||||||
DbDomainQueries dbDomainQueries,
|
DbDomainQueries dbDomainQueries,
|
||||||
GeoIpDictionary geoIpDictionary,
|
GeoIpDictionary geoIpDictionary,
|
||||||
AggregateDomainLinksClient domainLinksClient,
|
AggregateLinkGraphClient linkGraphClient,
|
||||||
HikariDataSource dataSource) {
|
HikariDataSource dataSource) {
|
||||||
this.dbDomainQueries = dbDomainQueries;
|
this.dbDomainQueries = dbDomainQueries;
|
||||||
this.geoIpDictionary = geoIpDictionary;
|
this.geoIpDictionary = geoIpDictionary;
|
||||||
this.domainLinksClient = domainLinksClient;
|
this.linkGraphClient = linkGraphClient;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,8 +84,8 @@ public class DomainInformationService {
|
|||||||
inCrawlQueue = rs.next();
|
inCrawlQueue = rs.next();
|
||||||
builder.setInCrawlQueue(inCrawlQueue);
|
builder.setInCrawlQueue(inCrawlQueue);
|
||||||
|
|
||||||
builder.setIncomingLinks(domainLinksClient.countLinksToDomain(domainId));
|
builder.setIncomingLinks(linkGraphClient.countLinksToDomain(domainId));
|
||||||
builder.setOutboundLinks(domainLinksClient.countLinksFromDomain(domainId));
|
builder.setOutboundLinks(linkGraphClient.countLinksFromDomain(domainId));
|
||||||
|
|
||||||
rs = stmt.executeQuery(STR."""
|
rs = stmt.executeQuery(STR."""
|
||||||
SELECT KNOWN_URLS, GOOD_URLS, VISITED_URLS FROM DOMAIN_METADATA WHERE ID=\{domainId}
|
SELECT KNOWN_URLS, GOOD_URLS, VISITED_URLS FROM DOMAIN_METADATA WHERE ID=\{domainId}
|
||||||
|
@ -11,7 +11,7 @@ import gnu.trove.set.hash.TIntHashSet;
|
|||||||
import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap;
|
import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap;
|
||||||
import nu.marginalia.api.domains.*;
|
import nu.marginalia.api.domains.*;
|
||||||
import nu.marginalia.api.domains.model.SimilarDomain;
|
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import org.roaringbitmap.RoaringBitmap;
|
import org.roaringbitmap.RoaringBitmap;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -20,7 +20,6 @@ import org.slf4j.LoggerFactory;
|
|||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
@ -29,7 +28,7 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class);
|
private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class);
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final AggregateDomainLinksClient domainLinksClient;
|
private final AggregateLinkGraphClient linkGraphClient;
|
||||||
|
|
||||||
private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000);
|
private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000);
|
||||||
private volatile int[] domainIdxToId;
|
private volatile int[] domainIdxToId;
|
||||||
@ -45,9 +44,9 @@ public class SimilarDomainsService {
|
|||||||
volatile boolean isReady = false;
|
volatile boolean isReady = false;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SimilarDomainsService(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) {
|
public SimilarDomainsService(HikariDataSource dataSource, AggregateLinkGraphClient linkGraphClient) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.domainLinksClient = domainLinksClient;
|
this.linkGraphClient = linkGraphClient;
|
||||||
|
|
||||||
Executors.newSingleThreadExecutor().submit(this::init);
|
Executors.newSingleThreadExecutor().submit(this::init);
|
||||||
}
|
}
|
||||||
@ -262,7 +261,7 @@ public class SimilarDomainsService {
|
|||||||
private TIntSet getLinkingIdsDToS(int domainIdx) {
|
private TIntSet getLinkingIdsDToS(int domainIdx) {
|
||||||
var items = new TIntHashSet();
|
var items = new TIntHashSet();
|
||||||
|
|
||||||
for (int id : domainLinksClient.getLinksFromDomain(domainIdxToId[domainIdx])) {
|
for (int id : linkGraphClient.getLinksFromDomain(domainIdxToId[domainIdx])) {
|
||||||
items.add(domainIdToIdx.get(id));
|
items.add(domainIdToIdx.get(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,7 +271,7 @@ public class SimilarDomainsService {
|
|||||||
private TIntSet getLinkingIdsSToD(int domainIdx) {
|
private TIntSet getLinkingIdsSToD(int domainIdx) {
|
||||||
var items = new TIntHashSet();
|
var items = new TIntHashSet();
|
||||||
|
|
||||||
for (int id : domainLinksClient.getLinksToDomain(domainIdxToId[domainIdx])) {
|
for (int id : linkGraphClient.getLinksToDomain(domainIdxToId[domainIdx])) {
|
||||||
items.add(domainIdToIdx.get(id));
|
items.add(domainIdToIdx.get(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
package nu.marginalia.api.indexdomainlinks;
|
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
|
||||||
import com.google.inject.Singleton;
|
|
||||||
import nu.marginalia.api.domainlink.DomainLinksApiGrpc;
|
|
||||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
|
||||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
public class PartitionDomainLinksClient {
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(PartitionDomainLinksClient.class);
|
|
||||||
|
|
||||||
private final GrpcMultiNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> channelPool;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public PartitionDomainLinksClient(GrpcChannelPoolFactory factory) {
|
|
||||||
this.channelPool = factory.createMulti(
|
|
||||||
ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.multi()),
|
|
||||||
DomainLinksApiGrpc::newBlockingStub);
|
|
||||||
}
|
|
||||||
|
|
||||||
public GrpcMultiNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> getChannelPool() {
|
|
||||||
return channelPool;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -14,7 +14,7 @@ java {
|
|||||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
@ -1,20 +1,25 @@
|
|||||||
package nu.marginalia.functions.domainlinks;
|
package nu.marginalia.linkgraph;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.grpc.stub.StreamObserver;
|
import io.grpc.stub.StreamObserver;
|
||||||
import nu.marginalia.api.domainlink.*;
|
import nu.marginalia.api.linkgraph.*;
|
||||||
import nu.marginalia.api.indexdomainlinks.PartitionDomainLinksClient;
|
import nu.marginalia.api.linkgraph.PartitionLinkGraphClient;
|
||||||
|
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
|
||||||
|
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc.LinkGraphApiBlockingStub;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase {
|
/** This class is responsible for aggregating the link graph data from the partitioned link graph
|
||||||
private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksService.class);
|
* services.
|
||||||
private final PartitionDomainLinksClient client;
|
*/
|
||||||
|
public class AggregateLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphService.class);
|
||||||
|
private final PartitionLinkGraphClient client;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public AggregateDomainLinksService(PartitionDomainLinksClient client) {
|
public AggregateLinkGraphService(PartitionLinkGraphClient client) {
|
||||||
this.client = client;
|
this.client = client;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,7 +27,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void getAllLinks(Empty request,
|
public void getAllLinks(Empty request,
|
||||||
StreamObserver<RpcDomainIdPairs> responseObserver) {
|
StreamObserver<RpcDomainIdPairs> responseObserver) {
|
||||||
|
|
||||||
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks)
|
client.getChannelPool().call(LinkGraphApiBlockingStub::getAllLinks)
|
||||||
.run(Empty.getDefaultInstance())
|
.run(Empty.getDefaultInstance())
|
||||||
.forEach(iter -> iter.forEachRemaining(responseObserver::onNext));
|
.forEach(iter -> iter.forEachRemaining(responseObserver::onNext));
|
||||||
|
|
||||||
@ -34,7 +39,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
StreamObserver<RpcDomainIdList> responseObserver) {
|
StreamObserver<RpcDomainIdList> responseObserver) {
|
||||||
var rspBuilder = RpcDomainIdList.newBuilder();
|
var rspBuilder = RpcDomainIdList.newBuilder();
|
||||||
|
|
||||||
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain)
|
client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksFromDomain)
|
||||||
.run(request)
|
.run(request)
|
||||||
.stream()
|
.stream()
|
||||||
.map(RpcDomainIdList::getDomainIdList)
|
.map(RpcDomainIdList::getDomainIdList)
|
||||||
@ -51,7 +56,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
var rspBuilder = RpcDomainIdList.newBuilder();
|
var rspBuilder = RpcDomainIdList.newBuilder();
|
||||||
|
|
||||||
|
|
||||||
client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain)
|
client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksToDomain)
|
||||||
.run(request)
|
.run(request)
|
||||||
.stream()
|
.stream()
|
||||||
.map(RpcDomainIdList::getDomainIdList)
|
.map(RpcDomainIdList::getDomainIdList)
|
||||||
@ -65,7 +70,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
@Override
|
@Override
|
||||||
public void countLinksFromDomain(RpcDomainId request,
|
public void countLinksFromDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdCount> responseObserver) {
|
StreamObserver<RpcDomainIdCount> responseObserver) {
|
||||||
int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain)
|
int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksFromDomain)
|
||||||
.run(request)
|
.run(request)
|
||||||
.stream()
|
.stream()
|
||||||
.mapToInt(RpcDomainIdCount::getIdCount)
|
.mapToInt(RpcDomainIdCount::getIdCount)
|
||||||
@ -81,7 +86,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void countLinksToDomain(RpcDomainId request,
|
public void countLinksToDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdCount> responseObserver) {
|
StreamObserver<RpcDomainIdCount> responseObserver) {
|
||||||
|
|
||||||
int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain)
|
int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksToDomain)
|
||||||
.run(request)
|
.run(request)
|
||||||
.stream()
|
.stream()
|
||||||
.mapToInt(RpcDomainIdCount::getIdCount)
|
.mapToInt(RpcDomainIdCount::getIdCount)
|
3
code/functions/link-graph/aggregate/readme.md
Normal file
3
code/functions/link-graph/aggregate/readme.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
This module is responsible for aggregating the link graph from the partitioned services, and exposing a unified
|
||||||
|
view of the link graph. It does not keep any data or state, but instead delegates to the partitioned
|
||||||
|
services.
|
@ -11,7 +11,7 @@ java {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
jar.archiveBaseName = 'index-domain-links-api'
|
jar.archiveBaseName = 'link-graph-api'
|
||||||
|
|
||||||
apply from: "$rootProject.projectDir/protobuf.gradle"
|
apply from: "$rootProject.projectDir/protobuf.gradle"
|
||||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
@ -1,10 +1,8 @@
|
|||||||
package nu.marginalia.api.indexdomainlinks;
|
package nu.marginalia.api.linkgraph;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.api.domainlink.DomainLinksApiGrpc;
|
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
|
||||||
import nu.marginalia.api.domainlink.Empty;
|
|
||||||
import nu.marginalia.api.domainlink.RpcDomainId;
|
|
||||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
|
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
@ -17,24 +15,26 @@ import org.slf4j.LoggerFactory;
|
|||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@Singleton
|
import static nu.marginalia.api.linkgraph.LinkGraphApiGrpc.*;
|
||||||
public class AggregateDomainLinksClient {
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksClient.class);
|
|
||||||
|
|
||||||
private final GrpcSingleNodeChannelPool<DomainLinksApiGrpc.DomainLinksApiBlockingStub> channelPool;
|
@Singleton
|
||||||
|
public class AggregateLinkGraphClient {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphClient.class);
|
||||||
|
|
||||||
|
private final GrpcSingleNodeChannelPool<LinkGraphApiBlockingStub> channelPool;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public AggregateDomainLinksClient(GrpcChannelPoolFactory factory) {
|
public AggregateLinkGraphClient(GrpcChannelPoolFactory factory) {
|
||||||
this.channelPool = factory.createSingle(
|
this.channelPool = factory.createSingle(
|
||||||
ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.any()),
|
ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.any()),
|
||||||
DomainLinksApiGrpc::newBlockingStub);
|
LinkGraphApiGrpc::newBlockingStub);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public AllLinks getAllDomainLinks() {
|
public AllLinks getAllDomainLinks() {
|
||||||
AllLinks links = new AllLinks();
|
AllLinks links = new AllLinks();
|
||||||
|
|
||||||
channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks)
|
channelPool.call(LinkGraphApiBlockingStub::getAllLinks)
|
||||||
.run(Empty.getDefaultInstance())
|
.run(Empty.getDefaultInstance())
|
||||||
.forEachRemaining(pairs -> {
|
.forEachRemaining(pairs -> {
|
||||||
for (int i = 0; i < pairs.getDestIdsCount(); i++) {
|
for (int i = 0; i < pairs.getDestIdsCount(); i++) {
|
||||||
@ -47,7 +47,7 @@ public class AggregateDomainLinksClient {
|
|||||||
|
|
||||||
public List<Integer> getLinksToDomain(int domainId) {
|
public List<Integer> getLinksToDomain(int domainId) {
|
||||||
try {
|
try {
|
||||||
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain)
|
return channelPool.call(LinkGraphApiBlockingStub::getLinksToDomain)
|
||||||
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
||||||
.getDomainIdList()
|
.getDomainIdList()
|
||||||
.stream()
|
.stream()
|
||||||
@ -62,7 +62,7 @@ public class AggregateDomainLinksClient {
|
|||||||
|
|
||||||
public List<Integer> getLinksFromDomain(int domainId) {
|
public List<Integer> getLinksFromDomain(int domainId) {
|
||||||
try {
|
try {
|
||||||
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain)
|
return channelPool.call(LinkGraphApiBlockingStub::getLinksFromDomain)
|
||||||
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
||||||
.getDomainIdList()
|
.getDomainIdList()
|
||||||
.stream()
|
.stream()
|
||||||
@ -78,7 +78,7 @@ public class AggregateDomainLinksClient {
|
|||||||
|
|
||||||
public int countLinksToDomain(int domainId) {
|
public int countLinksToDomain(int domainId) {
|
||||||
try {
|
try {
|
||||||
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain)
|
return channelPool.call(LinkGraphApiBlockingStub::countLinksToDomain)
|
||||||
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
||||||
.getIdCount();
|
.getIdCount();
|
||||||
|
|
||||||
@ -91,7 +91,7 @@ public class AggregateDomainLinksClient {
|
|||||||
|
|
||||||
public int countLinksFromDomain(int domainId) {
|
public int countLinksFromDomain(int domainId) {
|
||||||
try {
|
try {
|
||||||
return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain)
|
return channelPool.call(LinkGraphApiBlockingStub::countLinksFromDomain)
|
||||||
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
.run(RpcDomainId.newBuilder().setDomainId(domainId).build())
|
||||||
.getIdCount();
|
.getIdCount();
|
||||||
}
|
}
|
@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.api.linkgraph;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
|
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||||
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class PartitionLinkGraphClient {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PartitionLinkGraphClient.class);
|
||||||
|
|
||||||
|
private final GrpcMultiNodeChannelPool<LinkGraphApiGrpc.LinkGraphApiBlockingStub> channelPool;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PartitionLinkGraphClient(GrpcChannelPoolFactory factory) {
|
||||||
|
this.channelPool = factory.createMulti(
|
||||||
|
ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.multi()),
|
||||||
|
LinkGraphApiGrpc::newBlockingStub);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GrpcMultiNodeChannelPool<LinkGraphApiGrpc.LinkGraphApiBlockingStub> getChannelPool() {
|
||||||
|
return channelPool;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,10 +1,10 @@
|
|||||||
syntax="proto3";
|
syntax="proto3";
|
||||||
package nu.marginalia.api.domainlinks;
|
package nu.marginalia.api.linkgraph;
|
||||||
|
|
||||||
option java_package="nu.marginalia.api.domainlink";
|
option java_package="nu.marginalia.api.linkgraph";
|
||||||
option java_multiple_files=true;
|
option java_multiple_files=true;
|
||||||
|
|
||||||
service DomainLinksApi {
|
service LinkGraphApi {
|
||||||
rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {}
|
rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {}
|
||||||
rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {}
|
rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {}
|
||||||
rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {}
|
rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {}
|
@ -14,7 +14,7 @@ java {
|
|||||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
@ -1,13 +1,13 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkgraph;
|
||||||
|
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
/** A database of source-destination pairs of domain IDs. The database is loaded into memory from
|
/** A repository of source-destination pairs of domain IDs. The database is loaded into memory from
|
||||||
* a source. The database is then kept in memory, reloading it upon switchInput().
|
* a source. The data is then kept in memory, reloading it upon switchInput().
|
||||||
*/
|
*/
|
||||||
public interface DomainLinkDb {
|
public interface DomainLinks {
|
||||||
/** Replace the current db file with the provided file. The provided file will be deleted.
|
/** Replace the current db file with the provided file. The provided file will be deleted.
|
||||||
* The in-memory database MAY be updated to reflect the change.
|
* The in-memory database MAY be updated to reflect the change.
|
||||||
* */
|
* */
|
@ -1,25 +1,28 @@
|
|||||||
package nu.marginalia.functions.domainlinks;
|
package nu.marginalia.linkgraph;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.grpc.stub.StreamObserver;
|
import io.grpc.stub.StreamObserver;
|
||||||
import nu.marginalia.api.domainlink.*;
|
import nu.marginalia.api.linkgraph.*;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
import nu.marginalia.api.linkgraph.Empty;
|
||||||
|
import nu.marginalia.api.linkgraph.LinkGraphApiGrpc;
|
||||||
|
|
||||||
/** GRPC service for interrogating domain links
|
/** GRPC service for interrogating domain links for a single partition. For accessing the data
|
||||||
|
* in the application, the AggregateLinkGraphService should be used instead via the
|
||||||
|
* AggregateLinkGraphClient.
|
||||||
*/
|
*/
|
||||||
public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase {
|
public class PartitionLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase {
|
||||||
private final DomainLinkDb domainLinkDb;
|
private final DomainLinks domainLinks;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public PartitionDomainLinksService(DomainLinkDb domainLinkDb) {
|
public PartitionLinkGraphService(DomainLinks domainLinks) {
|
||||||
this.domainLinkDb = domainLinkDb;
|
this.domainLinks = domainLinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getAllLinks(Empty request,
|
public void getAllLinks(Empty request,
|
||||||
io.grpc.stub.StreamObserver<RpcDomainIdPairs> responseObserver) {
|
io.grpc.stub.StreamObserver<RpcDomainIdPairs> responseObserver) {
|
||||||
|
|
||||||
try (var idsConverter = new AllIdsResponseConverter(responseObserver)) {
|
try (var idsConverter = new AllIdsResponseConverter(responseObserver)) {
|
||||||
domainLinkDb.forEach(idsConverter::accept);
|
domainLinks.forEach(idsConverter::accept);
|
||||||
}
|
}
|
||||||
|
|
||||||
responseObserver.onCompleted();
|
responseObserver.onCompleted();
|
||||||
@ -58,7 +61,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void getLinksFromDomain(RpcDomainId request,
|
public void getLinksFromDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdList> responseObserver) {
|
StreamObserver<RpcDomainIdList> responseObserver) {
|
||||||
|
|
||||||
var links = domainLinkDb.findDestinations(request.getDomainId());
|
var links = domainLinks.findDestinations(request.getDomainId());
|
||||||
|
|
||||||
var rspBuilder = RpcDomainIdList.newBuilder();
|
var rspBuilder = RpcDomainIdList.newBuilder();
|
||||||
for (int i = 0; i < links.size(); i++) {
|
for (int i = 0; i < links.size(); i++) {
|
||||||
@ -73,7 +76,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void getLinksToDomain(RpcDomainId request,
|
public void getLinksToDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdList> responseObserver) {
|
StreamObserver<RpcDomainIdList> responseObserver) {
|
||||||
|
|
||||||
var links = domainLinkDb.findSources(request.getDomainId());
|
var links = domainLinks.findSources(request.getDomainId());
|
||||||
|
|
||||||
var rspBuilder = RpcDomainIdList.newBuilder();
|
var rspBuilder = RpcDomainIdList.newBuilder();
|
||||||
for (int i = 0; i < links.size(); i++) {
|
for (int i = 0; i < links.size(); i++) {
|
||||||
@ -87,7 +90,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void countLinksFromDomain(RpcDomainId request,
|
public void countLinksFromDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdCount> responseObserver) {
|
StreamObserver<RpcDomainIdCount> responseObserver) {
|
||||||
responseObserver.onNext(RpcDomainIdCount.newBuilder()
|
responseObserver.onNext(RpcDomainIdCount.newBuilder()
|
||||||
.setIdCount(domainLinkDb.countDestinations(request.getDomainId()))
|
.setIdCount(domainLinks.countDestinations(request.getDomainId()))
|
||||||
.build());
|
.build());
|
||||||
responseObserver.onCompleted();
|
responseObserver.onCompleted();
|
||||||
}
|
}
|
||||||
@ -95,7 +98,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA
|
|||||||
public void countLinksToDomain(RpcDomainId request,
|
public void countLinksToDomain(RpcDomainId request,
|
||||||
StreamObserver<RpcDomainIdCount> responseObserver) {
|
StreamObserver<RpcDomainIdCount> responseObserver) {
|
||||||
responseObserver.onNext(RpcDomainIdCount.newBuilder()
|
responseObserver.onNext(RpcDomainIdCount.newBuilder()
|
||||||
.setIdCount(domainLinkDb.countSources(request.getDomainId()))
|
.setIdCount(domainLinks.countSources(request.getDomainId()))
|
||||||
.build());
|
.build());
|
||||||
responseObserver.onCompleted();
|
responseObserver.onCompleted();
|
||||||
}
|
}
|
@ -1,7 +1,8 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkgraph.impl;
|
||||||
|
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
|
import nu.marginalia.linkgraph.DomainLinks;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -14,13 +15,13 @@ import java.nio.file.StandardCopyOption;
|
|||||||
* is not yet loaded. This speeds up the startup of the index service, as the database is
|
* is not yet loaded. This speeds up the startup of the index service, as the database is
|
||||||
* loaded in a separate thread.
|
* loaded in a separate thread.
|
||||||
*/
|
*/
|
||||||
public class DelayingDomainLinkDb implements DomainLinkDb {
|
public class DelayingDomainLinks implements DomainLinks {
|
||||||
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class);
|
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinks.class);
|
||||||
|
|
||||||
private volatile DomainLinkDb currentDb;
|
private volatile DomainLinks currentDb;
|
||||||
private final Path filename;
|
private final Path filename;
|
||||||
|
|
||||||
public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) {
|
public DelayingDomainLinks(@Named("domain-linkdb-file") Path filename) {
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
|
|
||||||
// Load the database in a separate thread, so that the constructor can return
|
// Load the database in a separate thread, so that the constructor can return
|
||||||
@ -29,7 +30,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb {
|
|||||||
|
|
||||||
Thread.ofPlatform().start(() -> {
|
Thread.ofPlatform().start(() -> {
|
||||||
try {
|
try {
|
||||||
currentDb = new FileDomainLinkDb(filename);
|
currentDb = new FileDomainLinks(filename);
|
||||||
logger.info("Loaded linkdb");
|
logger.info("Loaded linkdb");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed to load linkdb", e);
|
logger.error("Failed to load linkdb", e);
|
||||||
@ -43,7 +44,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb {
|
|||||||
|
|
||||||
Thread.ofPlatform().start(() -> {
|
Thread.ofPlatform().start(() -> {
|
||||||
try {
|
try {
|
||||||
currentDb = new FileDomainLinkDb(filename);
|
currentDb = new FileDomainLinks(filename);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("Failed to load linkdb", e);
|
logger.error("Failed to load linkdb", e);
|
||||||
}
|
}
|
@ -1,7 +1,9 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkgraph.impl;
|
||||||
|
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
|
import nu.marginalia.linkgraph.DomainLinks;
|
||||||
|
import nu.marginalia.linkgraph.io.DomainLinksLoader;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -14,13 +16,13 @@ import java.util.Arrays;
|
|||||||
/** Canonical DomainLinkDb implementation. The database is loaded into memory from
|
/** Canonical DomainLinkDb implementation. The database is loaded into memory from
|
||||||
* a file. The database is then kept in memory, reloading it upon switchInput().
|
* a file. The database is then kept in memory, reloading it upon switchInput().
|
||||||
*/
|
*/
|
||||||
public class FileDomainLinkDb implements DomainLinkDb {
|
public class FileDomainLinks implements DomainLinks {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(FileDomainLinkDb.class);
|
private static final Logger logger = LoggerFactory.getLogger(FileDomainLinks.class);
|
||||||
private final Path filename;
|
private final Path filename;
|
||||||
private volatile long[] sourceToDest = new long[0];
|
private volatile long[] sourceToDest = new long[0];
|
||||||
private volatile long[] destToSource = new long[0];
|
private volatile long[] destToSource = new long[0];
|
||||||
|
|
||||||
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
|
public FileDomainLinks(@Named("domain-linkdb-file") Path filename) throws IOException {
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
|
|
||||||
if (Files.exists(filename)) {
|
if (Files.exists(filename)) {
|
||||||
@ -35,7 +37,7 @@ public class FileDomainLinkDb implements DomainLinkDb {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void loadInput(Path filename) throws IOException {
|
public void loadInput(Path filename) throws IOException {
|
||||||
try (var loader = new DomainLinkDbLoader(filename)) {
|
try (var loader = new DomainLinksLoader(filename)) {
|
||||||
int size = loader.size();
|
int size = loader.size();
|
||||||
|
|
||||||
var newSourceToDest = new long[size];
|
var newSourceToDest = new long[size];
|
@ -1,17 +1,17 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkgraph.io;
|
||||||
|
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
public class DomainLinkDbLoader implements AutoCloseable {
|
public class DomainLinksLoader implements AutoCloseable {
|
||||||
private final DataInputStream stream;
|
private final DataInputStream stream;
|
||||||
private final Path filename;
|
private final Path filename;
|
||||||
|
|
||||||
private long nextVal;
|
private long nextVal;
|
||||||
|
|
||||||
public DomainLinkDbLoader(Path filename) throws IOException {
|
public DomainLinksLoader(Path filename) throws IOException {
|
||||||
this.stream = new DataInputStream(Files.newInputStream(filename));
|
this.stream = new DataInputStream(Files.newInputStream(filename));
|
||||||
this.filename = filename;
|
this.filename = filename;
|
||||||
}
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.linkdb.dlinks;
|
package nu.marginalia.linkgraph.io;
|
||||||
|
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -6,10 +6,10 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
public class DomainLinkDbWriter implements AutoCloseable {
|
public class DomainLinksWriter implements AutoCloseable {
|
||||||
private final DataOutputStream stream;
|
private final DataOutputStream stream;
|
||||||
|
|
||||||
public DomainLinkDbWriter(Path fileName) throws IOException {
|
public DomainLinksWriter(Path fileName) throws IOException {
|
||||||
this.stream = new DataOutputStream(Files.newOutputStream(fileName,
|
this.stream = new DataOutputStream(Files.newOutputStream(fileName,
|
||||||
StandardOpenOption.CREATE,
|
StandardOpenOption.CREATE,
|
||||||
StandardOpenOption.WRITE,
|
StandardOpenOption.WRITE,
|
11
code/functions/link-graph/partition/readme.md
Normal file
11
code/functions/link-graph/partition/readme.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
The link graph partition module is responsible for knowledge about the link graph
|
||||||
|
for a single index node. It's based on in-memory data structures, and is updated
|
||||||
|
atomically from file.
|
||||||
|
|
||||||
|
## Central Classes
|
||||||
|
|
||||||
|
* [PartitionLinkGraphService](java/nu/marginalia/linkgraph/PartitionLinkGraphService.java)
|
||||||
|
* [DomainLink](java/nu/marginalia/linkgraph/DomainLinks.java)
|
||||||
|
* * [FileDomainLinks](java/nu/marginalia/linkgraph/impl/FileDomainLinks.java)
|
||||||
|
* [DomainLinksWriter](java/nu/marginalia/linkgraph/io/DomainLinksWriter.java)
|
||||||
|
* [DomainLinksLoader](java/nu/marginalia/linkgraph/io/DomainLinksLoader.java)
|
@ -1,7 +1,7 @@
|
|||||||
package nu.marginalia.linkdb;
|
package nu.marginalia.linkgraph;
|
||||||
|
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader;
|
import nu.marginalia.linkgraph.io.DomainLinksLoader;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
import nu.marginalia.linkgraph.io.DomainLinksWriter;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
@ -24,7 +24,7 @@ public class DomainLinkDbTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteRead() {
|
public void testWriteRead() {
|
||||||
try (var writer = new DomainLinkDbWriter(fileName)) {
|
try (var writer = new DomainLinksWriter(fileName)) {
|
||||||
writer.write(1, 2);
|
writer.write(1, 2);
|
||||||
writer.write(2, 3);
|
writer.write(2, 3);
|
||||||
writer.write(3, 4);
|
writer.write(3, 4);
|
||||||
@ -33,7 +33,7 @@ public class DomainLinkDbTest {
|
|||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (var reader = new DomainLinkDbLoader(fileName)) {
|
try (var reader = new DomainLinksLoader(fileName)) {
|
||||||
Assertions.assertTrue(reader.next());
|
Assertions.assertTrue(reader.next());
|
||||||
Assertions.assertEquals(1, reader.getSource());
|
Assertions.assertEquals(1, reader.getSource());
|
||||||
Assertions.assertEquals(2, reader.getDest());
|
Assertions.assertEquals(2, reader.getDest());
|
9
code/functions/link-graph/readme.md
Normal file
9
code/functions/link-graph/readme.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
The link graph subsystem is responsible for knowledge about the link graph.
|
||||||
|
|
||||||
|
A SQL database is not very well suited for this, principally it's too slow to update,
|
||||||
|
instead the link graph is stored in memory, and atomically updated from file. The storage
|
||||||
|
aspect is handled by the [common/linkdb](../../common/linkdb/) component.
|
||||||
|
|
||||||
|
The link graph subsystem has two components, one which injects into the partitioned services,
|
||||||
|
e.g. index or execution, and one which aggregates the results from the partitioned services,
|
||||||
|
and exposes a unified view of the link graph.
|
@ -5,7 +5,10 @@ import java.nio.ByteBuffer;
|
|||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
// It's unclear why this exists, we should probably use a BitSet instead?
|
||||||
|
// Chesterton's fence?
|
||||||
public class DenseBitMap {
|
public class DenseBitMap {
|
||||||
public static final long MAX_CAPACITY_2GB_16BN_ITEMS=(1L<<34)-8;
|
public static final long MAX_CAPACITY_2GB_16BN_ITEMS=(1L<<34)-8;
|
||||||
|
|
||||||
|
4
code/functions/search-query/readme.md
Normal file
4
code/functions/search-query/readme.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
The search query subsystem is responsible for parsing a query,
|
||||||
|
translating it to a request, and then dispatching it to the
|
||||||
|
appropriate index nodes and translating the responses back again.
|
||||||
|
|
@ -17,7 +17,7 @@ dependencies {
|
|||||||
implementation project(':third-party:commons-codec')
|
implementation project(':third-party:commons-codec')
|
||||||
|
|
||||||
implementation project(':code:index:api')
|
implementation project(':code:index:api')
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation project(':code:libraries:array')
|
implementation project(':code:libraries:array')
|
||||||
implementation project(':code:libraries:btree')
|
implementation project(':code:libraries:btree')
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains.data;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import org.jgrapht.Graph;
|
import org.jgrapht.Graph;
|
||||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||||
import org.jgrapht.graph.DefaultEdge;
|
import org.jgrapht.graph.DefaultEdge;
|
||||||
@ -13,12 +13,12 @@ import org.jgrapht.graph.DefaultEdge;
|
|||||||
* which is the same as the regular graph except
|
* which is the same as the regular graph except
|
||||||
* the direction of the links have been inverted */
|
* the direction of the links have been inverted */
|
||||||
public class InvertedLinkGraphSource extends AbstractGraphSource {
|
public class InvertedLinkGraphSource extends AbstractGraphSource {
|
||||||
private final AggregateDomainLinksClient queryClient;
|
private final AggregateLinkGraphClient graphClient;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient queryClient) {
|
public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) {
|
||||||
super(dataSource);
|
super(dataSource);
|
||||||
this.queryClient = queryClient;
|
this.graphClient = graphClient;
|
||||||
}
|
}
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Override
|
@Override
|
||||||
@ -27,7 +27,7 @@ public class InvertedLinkGraphSource extends AbstractGraphSource {
|
|||||||
|
|
||||||
addVertices(graph);
|
addVertices(graph);
|
||||||
|
|
||||||
var allLinks = queryClient.getAllDomainLinks();
|
var allLinks = graphClient.getAllDomainLinks();
|
||||||
var iter = allLinks.iterator();
|
var iter = allLinks.iterator();
|
||||||
while (iter.advance()) {
|
while (iter.advance()) {
|
||||||
if (!graph.containsVertex(iter.dest())) {
|
if (!graph.containsVertex(iter.dest())) {
|
||||||
|
@ -3,19 +3,19 @@ package nu.marginalia.ranking.domains.data;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import org.jgrapht.Graph;
|
import org.jgrapht.Graph;
|
||||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||||
import org.jgrapht.graph.DefaultEdge;
|
import org.jgrapht.graph.DefaultEdge;
|
||||||
|
|
||||||
/** A source for the regular link graph. */
|
/** A source for the regular link graph. */
|
||||||
public class LinkGraphSource extends AbstractGraphSource {
|
public class LinkGraphSource extends AbstractGraphSource {
|
||||||
private final AggregateDomainLinksClient domainLinksClient;
|
private final AggregateLinkGraphClient graphClient;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public LinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) {
|
public LinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) {
|
||||||
super(dataSource);
|
super(dataSource);
|
||||||
this.domainLinksClient = domainLinksClient;
|
this.graphClient = graphClient;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@ -25,7 +25,7 @@ public class LinkGraphSource extends AbstractGraphSource {
|
|||||||
|
|
||||||
addVertices(graph);
|
addVertices(graph);
|
||||||
|
|
||||||
var allLinks = domainLinksClient.getAllDomainLinks();
|
var allLinks = graphClient.getAllDomainLinks();
|
||||||
var iter = allLinks.iterator();
|
var iter = allLinks.iterator();
|
||||||
while (iter.advance()) {
|
while (iter.advance()) {
|
||||||
if (!graph.containsVertex(iter.dest())) {
|
if (!graph.containsVertex(iter.dest())) {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Index
|
# Index
|
||||||
|
|
||||||
This module contains the components that make up the search index.
|
This index subsystem contains the components that make up the search index.
|
||||||
|
|
||||||
It exposes an API for querying the index, and contains the logic
|
It exposes an API for querying the index, and contains the logic
|
||||||
for ranking search results. It does not parse the query, that is
|
for ranking search results. It does not parse the query, that is
|
||||||
@ -10,9 +10,9 @@ the responsibility of the [search-query](../functions/search-query) module.
|
|||||||
|
|
||||||
There are two indexes with accompanying tools for constructing them.
|
There are two indexes with accompanying tools for constructing them.
|
||||||
|
|
||||||
* [index-reverse](reverse-index/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first.
|
* [index-reverse](index-reverse/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first.
|
||||||
|
|
||||||
* [index-forward](forward-index/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results.
|
* [index-forward](index-forward/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results.
|
||||||
|
|
||||||
Additionally, the [index-journal](index-journal/) contains code for constructing a journal of the index, which is used to keep the index up to date.
|
Additionally, the [index-journal](index-journal/) contains code for constructing a journal of the index, which is used to keep the index up to date.
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains;
|
|||||||
|
|
||||||
import com.zaxxer.hikari.HikariConfig;
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import nu.marginalia.ranking.domains.data.InvertedLinkGraphSource;
|
import nu.marginalia.ranking.domains.data.InvertedLinkGraphSource;
|
||||||
import nu.marginalia.ranking.domains.data.LinkGraphSource;
|
import nu.marginalia.ranking.domains.data.LinkGraphSource;
|
||||||
import nu.marginalia.ranking.domains.data.SimilarityGraphSource;
|
import nu.marginalia.ranking.domains.data.SimilarityGraphSource;
|
||||||
@ -36,8 +36,8 @@ public class RankingAlgorithmsContainerTest {
|
|||||||
|
|
||||||
static HikariDataSource dataSource;
|
static HikariDataSource dataSource;
|
||||||
|
|
||||||
AggregateDomainLinksClient domainLinksClient;
|
AggregateLinkGraphClient domainLinksClient;
|
||||||
AggregateDomainLinksClient.AllLinks allLinks;
|
AggregateLinkGraphClient.AllLinks allLinks;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void setup() {
|
public static void setup() {
|
||||||
@ -66,8 +66,8 @@ public class RankingAlgorithmsContainerTest {
|
|||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setupQueryClient() {
|
public void setupQueryClient() {
|
||||||
domainLinksClient = Mockito.mock(AggregateDomainLinksClient.class);
|
domainLinksClient = Mockito.mock(AggregateLinkGraphClient.class);
|
||||||
allLinks = new AggregateDomainLinksClient.AllLinks();
|
allLinks = new AggregateLinkGraphClient.AllLinks();
|
||||||
when(domainLinksClient.getAllDomainLinks()).thenReturn(allLinks);
|
when(domainLinksClient.getAllDomainLinks()).thenReturn(allLinks);
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
|
@ -11,6 +11,4 @@ its words, how they stem, POS tags, and so on.
|
|||||||
## See Also
|
## See Also
|
||||||
|
|
||||||
[features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords
|
[features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords
|
||||||
are important.
|
are important.
|
||||||
|
|
||||||
[features-qs/query-parser](../../features-qs/query-parser) also does some language processing.
|
|
@ -6,7 +6,3 @@ the TF-IDF score of a keyword.
|
|||||||
## Central Classes
|
## Central Classes
|
||||||
|
|
||||||
* [TermFrequencyDict](java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java)
|
* [TermFrequencyDict](java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java)
|
||||||
|
|
||||||
## See Also
|
|
||||||
|
|
||||||
* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file
|
|
@ -10,8 +10,8 @@ There are three types of indexes:
|
|||||||
|
|
||||||
This is a very light-weight module that delegates the actual work to the modules:
|
This is a very light-weight module that delegates the actual work to the modules:
|
||||||
|
|
||||||
* [features-index/index-reverse](../../features-index/index-reverse)
|
* [features-index/index-reverse](../../index/index-reverse)
|
||||||
* [features-index/index-forward](../../features-index/index-forward)
|
* [features-index/index-forward](../../index/index-forward)
|
||||||
|
|
||||||
Their respective readme files contain more information about the indexes themselves
|
Their respective readme files contain more information about the indexes themselves
|
||||||
and how they are constructed.
|
and how they are constructed.
|
||||||
|
@ -40,6 +40,8 @@ dependencies {
|
|||||||
implementation project(':code:process-models:work-log')
|
implementation project(':code:process-models:work-log')
|
||||||
implementation project(':code:features-convert:keyword-extraction')
|
implementation project(':code:features-convert:keyword-extraction')
|
||||||
|
|
||||||
|
implementation project(':code:functions:link-graph:partition')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
implementation libs.guice
|
implementation libs.guice
|
||||||
|
@ -9,7 +9,7 @@ import com.google.inject.name.Names;
|
|||||||
import nu.marginalia.LanguageModels;
|
import nu.marginalia.LanguageModels;
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.IndexLocations;
|
import nu.marginalia.IndexLocations;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
import nu.marginalia.linkgraph.io.DomainLinksWriter;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
@ -45,7 +45,7 @@ public class LoaderModule extends AbstractModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Inject @Provides @Singleton
|
@Inject @Provides @Singleton
|
||||||
private DomainLinkDbWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException {
|
private DomainLinksWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException {
|
||||||
|
|
||||||
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME);
|
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME);
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ public class LoaderModule extends AbstractModule {
|
|||||||
Files.delete(dbPath);
|
Files.delete(dbPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DomainLinkDbWriter(dbPath);
|
return new DomainLinksWriter(dbPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Gson createGson() {
|
private Gson createGson() {
|
||||||
|
@ -4,7 +4,7 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
|
import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter;
|
import nu.marginalia.linkgraph.io.DomainLinksWriter;
|
||||||
import nu.marginalia.loading.LoaderInputData;
|
import nu.marginalia.loading.LoaderInputData;
|
||||||
import nu.marginalia.loading.domains.DomainIdRegistry;
|
import nu.marginalia.loading.domains.DomainIdRegistry;
|
||||||
import nu.marginalia.model.processed.DomainLinkRecord;
|
import nu.marginalia.model.processed.DomainLinkRecord;
|
||||||
@ -20,10 +20,10 @@ public class DomainLinksLoaderService {
|
|||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class);
|
private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class);
|
||||||
|
|
||||||
private final DomainLinkDbWriter domainLinkDbWriter;
|
private final DomainLinksWriter domainLinkDbWriter;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public DomainLinksLoaderService(DomainLinkDbWriter domainLinkDbWriter) {
|
public DomainLinksLoaderService(DomainLinksWriter domainLinkDbWriter) {
|
||||||
this.domainLinkDbWriter = domainLinkDbWriter;
|
this.domainLinkDbWriter = domainLinkDbWriter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ described in [processed-data](../process-models/processed-data/).
|
|||||||
|
|
||||||
The [loading-process](loading-process/) reads the processed data.
|
The [loading-process](loading-process/) reads the processed data.
|
||||||
|
|
||||||
It has creates an [index journal](../features-index/index-journal),
|
It has creates an [index journal](../index/index-journal),
|
||||||
a [link database](../common/linkdb),
|
a [link database](../common/linkdb),
|
||||||
and loads domains and domain-links
|
and loads domains and domain-links
|
||||||
into the [MariaDB database](../common/db).
|
into the [MariaDB database](../common/db).
|
||||||
|
@ -25,7 +25,7 @@ dependencies {
|
|||||||
implementation project(':code:common:process')
|
implementation project(':code:common:process')
|
||||||
implementation project(':code:common:service-discovery')
|
implementation project(':code:common:service-discovery')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ import gnu.trove.list.TIntList;
|
|||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
import gnu.trove.map.hash.TIntObjectHashMap;
|
import gnu.trove.map.hash.TIntObjectHashMap;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import org.roaringbitmap.RoaringBitmap;
|
import org.roaringbitmap.RoaringBitmap;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -35,7 +35,7 @@ public class AdjacenciesData {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
public AdjacenciesData(AggregateDomainLinksClient linksClient,
|
public AdjacenciesData(AggregateLinkGraphClient linksClient,
|
||||||
DomainAliases aliases) {
|
DomainAliases aliases) {
|
||||||
logger.info("Loading adjacency data");
|
logger.info("Loading adjacency data");
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ import com.google.inject.Guice;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.ProcessConfiguration;
|
import nu.marginalia.ProcessConfiguration;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.linkgraph.AggregateLinkGraphClient;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||||
@ -32,7 +32,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(WebsiteAdjacenciesCalculator.class);
|
private static final Logger logger = LoggerFactory.getLogger(WebsiteAdjacenciesCalculator.class);
|
||||||
|
|
||||||
float[] weights;
|
float[] weights;
|
||||||
public WebsiteAdjacenciesCalculator(AggregateDomainLinksClient domainLinksClient, HikariDataSource dataSource) throws SQLException {
|
public WebsiteAdjacenciesCalculator(AggregateLinkGraphClient domainLinksClient, HikariDataSource dataSource) throws SQLException {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
domainAliases = new DomainAliases(dataSource);
|
domainAliases = new DomainAliases(dataSource);
|
||||||
@ -154,7 +154,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass {
|
|||||||
|
|
||||||
|
|
||||||
var dataSource = injector.getInstance(HikariDataSource.class);
|
var dataSource = injector.getInstance(HikariDataSource.class);
|
||||||
var lc = injector.getInstance(AggregateDomainLinksClient.class);
|
var lc = injector.getInstance(AggregateLinkGraphClient.class);
|
||||||
|
|
||||||
if (!lc.waitReady(Duration.ofSeconds(30))) {
|
if (!lc.waitReady(Duration.ofSeconds(30))) {
|
||||||
throw new IllegalStateException("Failed to connect to domain-links");
|
throw new IllegalStateException("Failed to connect to domain-links");
|
||||||
|
@ -23,18 +23,38 @@ eligible index services. The control service is responsible for distributing co
|
|||||||
service, and for monitoring the health of the system. It also offers a web interface for operating the system.
|
service, and for monitoring the health of the system. It also offers a web interface for operating the system.
|
||||||
|
|
||||||
### Services
|
### Services
|
||||||
|
|
||||||
* [core services](services-core/) Most of these services are stateful, memory hungry, and doing heavy lifting.
|
* [core services](services-core/) Most of these services are stateful, memory hungry, and doing heavy lifting.
|
||||||
* * [control](services-core/control-service)
|
* * [control](services-core/control-service)
|
||||||
* * [query](services-core/query-service)
|
* * [query](services-core/query-service)
|
||||||
|
* * * Exposes the [functions/link-graph](functions/link-graph) subsystem
|
||||||
|
* * * Exposes the [functions/search-query](functions/search-query) subsystem
|
||||||
* * [index](services-core/index-service)
|
* * [index](services-core/index-service)
|
||||||
|
* * * Exposes the [index](index) subsystem
|
||||||
|
* * * Exposes the [functions/link-graph](functions/link-graph) subsystem
|
||||||
* * [executor](services-core/executor-service)
|
* * [executor](services-core/executor-service)
|
||||||
|
* * * Exposes the [execution](execution) subsystem
|
||||||
* * [assistant](services-core/assistant-service)
|
* * [assistant](services-core/assistant-service)
|
||||||
|
* * * Exposes the [functions/math](functions/math) subsystem
|
||||||
|
* * * Exposes the [functions/domain-info](functions/domain-info) subsystem
|
||||||
* [application services](services-application/) Mostly stateless gateways providing access to the core services.
|
* [application services](services-application/) Mostly stateless gateways providing access to the core services.
|
||||||
* * [api](services-application/api-service) - public API
|
* * [api](services-application/api-service) - public API gateway
|
||||||
* * [search](services-application/search-service) - marginalia search application
|
* * [search](services-application/search-service) - marginalia search application
|
||||||
* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
|
* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
|
||||||
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
|
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
|
||||||
* an [internal API](api/)
|
|
||||||
|
The system uses a service registry to find the services. The service registry is based on zookeeper,
|
||||||
|
and is a separate service. The registry doesn't keep track of processes, but APIs. This means that
|
||||||
|
the system is flexible to reconfiguration. The same code can in principle be run as a micro-service
|
||||||
|
mesh or as a monolith.
|
||||||
|
|
||||||
|
This is an unusual architecture, but it has the benefit that you don't need to think too much about
|
||||||
|
the layout of the system. You can just request an API and talk to it. Because of this, several of the
|
||||||
|
services have almost no code of their own. They merely import a library and expose it as a service.
|
||||||
|
|
||||||
|
These skeleton services are marked with (S).
|
||||||
|
|
||||||
|
Services that expose HTTP endpoints tend to have more code. They are marked with (G).
|
||||||
|
|
||||||
### Processes
|
### Processes
|
||||||
|
|
||||||
@ -55,7 +75,6 @@ but isolated.
|
|||||||
* [features-search](features-search)
|
* [features-search](features-search)
|
||||||
* [features-crawl](features-crawl)
|
* [features-crawl](features-crawl)
|
||||||
* [features-convert](features-convert)
|
* [features-convert](features-convert)
|
||||||
* [features-index](features-index)
|
|
||||||
|
|
||||||
### Libraries and primitives
|
### Libraries and primitives
|
||||||
|
|
||||||
|
@ -4,8 +4,7 @@ The control service provides an operator's user interface. By default, this int
|
|||||||
exposed on port 8081. It does not offer any sort of access control or authentication.
|
exposed on port 8081. It does not offer any sort of access control or authentication.
|
||||||
|
|
||||||
The control service will itself execute tasks that affect the entire system, but delegate
|
The control service will itself execute tasks that affect the entire system, but delegate
|
||||||
node-specific tasks to the corresponding [executor-service](../executor-service) via the
|
node-specific tasks to the corresponding to the [execution subsystem](../../execution).
|
||||||
[executor-api](../../api/executor-api).
|
|
||||||
|
|
||||||
Conceptually the application is broken into three parts:
|
Conceptually the application is broken into three parts:
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
|
||||||
implementation project(':code:process-models:crawl-spec')
|
implementation project(':code:process-models:crawl-spec')
|
||||||
implementation project(':code:process-models:crawling-model')
|
implementation project(':code:process-models:crawling-model')
|
||||||
|
@ -1,16 +1,10 @@
|
|||||||
The executor service is a partitioned service responsible for executing and keeping
|
The executor service is a partitioned service responsible for executing and keeping
|
||||||
track of long running maintenance and operational tasks, such as crawling or data
|
track of long-running maintenance and operational tasks, such as crawling or data
|
||||||
processing.
|
processing.
|
||||||
|
|
||||||
It accomplishes this using the [message queue and actor library](../../libraries/message-queue/),
|
The executor service is closely linked to the [control-service](../control-service),
|
||||||
which permits program state to survive crashes and reboots. The executor service is closely
|
which provides a user interface for much of the executor's functionality.
|
||||||
linked to the [control-service](../control-service), which provides a user interface for
|
|
||||||
much of the executor's functionality.
|
|
||||||
|
|
||||||
## Central Classes
|
The service it itself relatively bare of code, but imports and exposes the [execution subsystem](../../execution),
|
||||||
|
which is responsible for the actual execution of tasks.
|
||||||
|
|
||||||
* [ExecutorActorControlService](java/nu/marginalia/actor/ExecutorActorControlService.java)
|
|
||||||
|
|
||||||
## See Also
|
|
||||||
|
|
||||||
* [api/executor-api](../../api/executor-api)
|
|
@ -46,8 +46,8 @@ dependencies {
|
|||||||
implementation project(':code:common:linkdb')
|
implementation project(':code:common:linkdb')
|
||||||
|
|
||||||
implementation project(':code:index')
|
implementation project(':code:index')
|
||||||
implementation project(':code:functions:domain-links:partition')
|
implementation project(':code:functions:link-graph:partition')
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
implementation project(':code:functions:search-query:api')
|
implementation project(':code:functions:search-query:api')
|
||||||
implementation project(':code:index:api')
|
implementation project(':code:index:api')
|
||||||
|
|
||||||
|
@ -4,8 +4,8 @@ import com.google.inject.AbstractModule;
|
|||||||
import com.google.inject.Provides;
|
import com.google.inject.Provides;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
import nu.marginalia.linkgraph.DomainLinks;
|
||||||
import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb;
|
import nu.marginalia.linkgraph.impl.DelayingDomainLinks;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.IndexLocations;
|
import nu.marginalia.IndexLocations;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -26,13 +26,13 @@ public class IndexModule extends AbstractModule {
|
|||||||
|
|
||||||
@Provides
|
@Provides
|
||||||
@Singleton
|
@Singleton
|
||||||
public DomainLinkDb domainLinkDb (
|
public DomainLinks domainLinkDb (
|
||||||
FileStorageService storageService
|
FileStorageService storageService
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
||||||
|
|
||||||
return new DelayingDomainLinkDb(path);
|
return new DelayingDomainLinks(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Provides
|
@Provides
|
||||||
|
@ -3,9 +3,9 @@ package nu.marginalia.index;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.IndexLocations;
|
import nu.marginalia.IndexLocations;
|
||||||
import nu.marginalia.functions.domainlinks.PartitionDomainLinksService;
|
import nu.marginalia.linkgraph.PartitionLinkGraphService;
|
||||||
import nu.marginalia.index.index.StatefulIndex;
|
import nu.marginalia.index.index.StatefulIndex;
|
||||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
import nu.marginalia.linkgraph.DomainLinks;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.index.api.IndexMqEndpoints;
|
import nu.marginalia.index.api.IndexMqEndpoints;
|
||||||
@ -34,7 +34,7 @@ public class IndexService extends Service {
|
|||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
private final DocumentDbReader documentDbReader;
|
private final DocumentDbReader documentDbReader;
|
||||||
|
|
||||||
private final DomainLinkDb domainLinkDb;
|
private final DomainLinks domainLinks;
|
||||||
private final ServiceEventLog eventLog;
|
private final ServiceEventLog eventLog;
|
||||||
|
|
||||||
|
|
||||||
@ -46,21 +46,21 @@ public class IndexService extends Service {
|
|||||||
StatefulIndex statefulIndex,
|
StatefulIndex statefulIndex,
|
||||||
FileStorageService fileStorageService,
|
FileStorageService fileStorageService,
|
||||||
DocumentDbReader documentDbReader,
|
DocumentDbReader documentDbReader,
|
||||||
DomainLinkDb domainLinkDb,
|
DomainLinks domainLinks,
|
||||||
PartitionDomainLinksService partitionDomainLinksService,
|
PartitionLinkGraphService partitionLinkGraphService,
|
||||||
ServiceEventLog eventLog)
|
ServiceEventLog eventLog)
|
||||||
{
|
{
|
||||||
super(params,
|
super(params,
|
||||||
ServicePartition.partition(params.configuration.node()),
|
ServicePartition.partition(params.configuration.node()),
|
||||||
List.of(indexQueryService,
|
List.of(indexQueryService,
|
||||||
partitionDomainLinksService)
|
partitionLinkGraphService)
|
||||||
);
|
);
|
||||||
|
|
||||||
this.opsService = opsService;
|
this.opsService = opsService;
|
||||||
this.statefulIndex = statefulIndex;
|
this.statefulIndex = statefulIndex;
|
||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
this.documentDbReader = documentDbReader;
|
this.documentDbReader = documentDbReader;
|
||||||
this.domainLinkDb = domainLinkDb;
|
this.domainLinks = domainLinks;
|
||||||
this.eventLog = eventLog;
|
this.eventLog = eventLog;
|
||||||
|
|
||||||
this.init = params.initialization;
|
this.init = params.initialization;
|
||||||
@ -106,7 +106,7 @@ public class IndexService extends Service {
|
|||||||
|
|
||||||
if (Files.exists(newPathDomains)) {
|
if (Files.exists(newPathDomains)) {
|
||||||
eventLog.logEvent("INDEX-SWITCH-DOMAIN-LINKDB", "");
|
eventLog.logEvent("INDEX-SWITCH-DOMAIN-LINKDB", "");
|
||||||
domainLinkDb.switchInput(newPathDomains);
|
domainLinks.switchInput(newPathDomains);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,8 +50,8 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:functions:search-query')
|
implementation project(':code:functions:search-query')
|
||||||
implementation project(':code:functions:search-query:api')
|
implementation project(':code:functions:search-query:api')
|
||||||
implementation project(':code:functions:domain-links:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
implementation project(':code:functions:domain-links:aggregate')
|
implementation project(':code:functions:link-graph:aggregate')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ package nu.marginalia.query;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.functions.domainlinks.AggregateDomainLinksService;
|
import nu.marginalia.linkgraph.AggregateLinkGraphService;
|
||||||
import nu.marginalia.functions.searchquery.QueryGRPCService;
|
import nu.marginalia.functions.searchquery.QueryGRPCService;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
@ -17,7 +17,7 @@ public class QueryService extends Service {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Inject
|
@Inject
|
||||||
public QueryService(BaseServiceParams params,
|
public QueryService(BaseServiceParams params,
|
||||||
AggregateDomainLinksService domainLinksService,
|
AggregateLinkGraphService domainLinksService,
|
||||||
QueryGRPCService queryGRPCService,
|
QueryGRPCService queryGRPCService,
|
||||||
QueryBasicInterface queryBasicInterface)
|
QueryBasicInterface queryBasicInterface)
|
||||||
{
|
{
|
||||||
|
@ -17,9 +17,9 @@ include 'code:functions:math:api'
|
|||||||
include 'code:functions:domain-info'
|
include 'code:functions:domain-info'
|
||||||
include 'code:functions:domain-info:api'
|
include 'code:functions:domain-info:api'
|
||||||
|
|
||||||
include 'code:functions:domain-links:partition'
|
include 'code:functions:link-graph:partition'
|
||||||
include 'code:functions:domain-links:aggregate'
|
include 'code:functions:link-graph:aggregate'
|
||||||
include 'code:functions:domain-links:api'
|
include 'code:functions:link-graph:api'
|
||||||
|
|
||||||
include 'code:functions:search-query'
|
include 'code:functions:search-query'
|
||||||
include 'code:functions:search-query:api'
|
include 'code:functions:search-query:api'
|
||||||
|
Loading…
Reference in New Issue
Block a user