diff --git a/code/common/linkdb/readme.md b/code/common/linkdb/readme.md index b5a4e8fe..9b3a82a0 100644 --- a/code/common/linkdb/readme.md +++ b/code/common/linkdb/readme.md @@ -1,15 +1,3 @@ -## Domain Link Database - -The domain link database contains information about links -between domains. It is a static in-memory database loaded -from a binary file. - -* [DomainLinkDb](java/nu/marginalia/linkdb/DomainLinkDb.java) -* * [FileDomainLinkDb](java/nu/marginalia/linkdb/FileDomainLinkDb.java) -* * [SqlDomainLinkDb](java/nu/marginalia/linkdb/SqlDomainLinkDb.java) -* [DomainLinkDbWriter](java/nu/marginalia/linkdb/DomainLinkDbWriter.java) -* [DomainLinkDbLoader](java/nu/marginalia/linkdb/DomainLinkDbLoader.java) - ## Document Database The document database contains information about links, @@ -21,10 +9,10 @@ is not in the MariaDB database is that this would make updates to this information take effect in production immediately, even before the information was searchable. -* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/DocumentDbWriter.java) -* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/DocumentDbReader.java) +* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/docs/DocumentDbWriter.java) +* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/docs/DocumentDbReader.java) ## See Also -These databases are constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service). \ No newline at end of file +The database is constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service). \ No newline at end of file diff --git a/code/common/readme.md b/code/common/readme.md index 120d55f9..b6329457 100644 --- a/code/common/readme.md +++ b/code/common/readme.md @@ -7,6 +7,5 @@ as shared models. * [config](config/) contains some `@Inject`ables. * [renderer](renderer/) contains utility code for rendering website templates. * [service](service/) is the shared base classes for main methods and web services. -* [service-client](service-client/) is the shared base class for RPC. -* [service-discovery](service-discovery) contains tools that lets the services find each other. +* [service-discovery](service-discovery) contains tools that lets the services find each other and communicate. * [process](process/) contains boiler plate for batch processes. diff --git a/code/execution/build.gradle b/code/execution/build.gradle index 842bcab7..a3cec39f 100644 --- a/code/execution/build.gradle +++ b/code/execution/build.gradle @@ -34,7 +34,7 @@ dependencies { implementation project(':code:libraries:message-queue') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:execution:api') implementation project(':code:process-models:crawl-spec') diff --git a/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java b/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java index b3d9b0a2..0a5d1056 100644 --- a/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java +++ b/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java @@ -6,7 +6,7 @@ import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.actor.prototype.RecordActorPrototype; import nu.marginalia.actor.state.ActorStep; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageId; import nu.marginalia.storage.model.FileStorageType; @@ -32,7 +32,7 @@ public class ExportDataActor extends RecordActorPrototype { private final FileStorageService storageService; private final HikariDataSource dataSource; private final Logger logger = LoggerFactory.getLogger(getClass()); - private final AggregateDomainLinksClient domainLinksClient; + private final AggregateLinkGraphClient linkGraphClient; public record Export() implements ActorStep {} public record ExportBlacklist(FileStorageId fid) implements ActorStep {} @@ -114,7 +114,7 @@ public class ExportDataActor extends RecordActorPrototype { var tmpFile = Files.createTempFile(storage.asPath(), "export", ".csv.gz", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); - var allLinks = domainLinksClient.getAllDomainLinks(); + var allLinks = linkGraphClient.getAllDomainLinks(); try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING))))) { @@ -155,12 +155,12 @@ public class ExportDataActor extends RecordActorPrototype { public ExportDataActor(Gson gson, FileStorageService storageService, HikariDataSource dataSource, - AggregateDomainLinksClient domainLinksClient) + AggregateLinkGraphClient linkGraphClient) { super(gson); this.storageService = storageService; this.dataSource = dataSource; - this.domainLinksClient = domainLinksClient; + this.linkGraphClient = linkGraphClient; } } diff --git a/code/execution/readme.md b/code/execution/readme.md new file mode 100644 index 00000000..d819c023 --- /dev/null +++ b/code/execution/readme.md @@ -0,0 +1,12 @@ +The execution subsystem is responsible for the execution of long running tasks on each +index node. It lives in the [executor-service](../services-core/executor-service) module. + +It accomplishes this using the [message queue and actor library](../libraries/message-queue/), +which permits program state to survive crashes and reboots. + +The subsystem exposes four [APIs](api/src/main/protobuf/executor-api.proto): + +* Execution API - for starting and stopping tasks, also contains miscellaneous commands +* Crawl API - for managing the crawl workflow +* Sideload API - for sideloading data +* Export API - for exporting data \ No newline at end of file diff --git a/code/features-convert/stackexchange-xml/readme.md b/code/features-convert/stackexchange-xml/readme.md index 8af6d05a..1701ad7f 100644 --- a/code/features-convert/stackexchange-xml/readme.md +++ b/code/features-convert/stackexchange-xml/readme.md @@ -16,4 +16,3 @@ holistically, not by question or answer, it is necessary to re-arrange the data (which is very large). SQLite does a decent job of enabling this task. -See [tools/stackexchange-converter](../../tools/stackexchange-converter). \ No newline at end of file diff --git a/code/functions/domain-info/build.gradle b/code/functions/domain-info/build.gradle index 1aa2da16..4858e935 100644 --- a/code/functions/domain-info/build.gradle +++ b/code/functions/domain-info/build.gradle @@ -15,7 +15,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation project(':code:functions:domain-info:api') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java index bf8ad9df..1aeffae6 100644 --- a/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java +++ b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java @@ -2,7 +2,7 @@ package nu.marginalia.functions.domains; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.api.domains.RpcDomainInfoResponse; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.geoip.GeoIpDictionary; import nu.marginalia.model.EdgeDomain; import nu.marginalia.db.DbDomainQueries; @@ -21,7 +21,7 @@ public class DomainInformationService { private final GeoIpDictionary geoIpDictionary; private DbDomainQueries dbDomainQueries; - private final AggregateDomainLinksClient domainLinksClient; + private final AggregateLinkGraphClient linkGraphClient; private HikariDataSource dataSource; private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -29,11 +29,11 @@ public class DomainInformationService { public DomainInformationService( DbDomainQueries dbDomainQueries, GeoIpDictionary geoIpDictionary, - AggregateDomainLinksClient domainLinksClient, + AggregateLinkGraphClient linkGraphClient, HikariDataSource dataSource) { this.dbDomainQueries = dbDomainQueries; this.geoIpDictionary = geoIpDictionary; - this.domainLinksClient = domainLinksClient; + this.linkGraphClient = linkGraphClient; this.dataSource = dataSource; } @@ -84,8 +84,8 @@ public class DomainInformationService { inCrawlQueue = rs.next(); builder.setInCrawlQueue(inCrawlQueue); - builder.setIncomingLinks(domainLinksClient.countLinksToDomain(domainId)); - builder.setOutboundLinks(domainLinksClient.countLinksFromDomain(domainId)); + builder.setIncomingLinks(linkGraphClient.countLinksToDomain(domainId)); + builder.setOutboundLinks(linkGraphClient.countLinksFromDomain(domainId)); rs = stmt.executeQuery(STR.""" SELECT KNOWN_URLS, GOOD_URLS, VISITED_URLS FROM DOMAIN_METADATA WHERE ID=\{domainId} diff --git a/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java b/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java index 9be481b7..8d6cd70e 100644 --- a/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java +++ b/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java @@ -11,7 +11,7 @@ import gnu.trove.set.hash.TIntHashSet; import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap; import nu.marginalia.api.domains.*; import nu.marginalia.api.domains.model.SimilarDomain; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.model.EdgeDomain; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; @@ -20,7 +20,6 @@ import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; -import java.util.BitSet; import java.util.List; import java.util.concurrent.Executors; import java.util.stream.IntStream; @@ -29,7 +28,7 @@ public class SimilarDomainsService { private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class); private final HikariDataSource dataSource; - private final AggregateDomainLinksClient domainLinksClient; + private final AggregateLinkGraphClient linkGraphClient; private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000); private volatile int[] domainIdxToId; @@ -45,9 +44,9 @@ public class SimilarDomainsService { volatile boolean isReady = false; @Inject - public SimilarDomainsService(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) { + public SimilarDomainsService(HikariDataSource dataSource, AggregateLinkGraphClient linkGraphClient) { this.dataSource = dataSource; - this.domainLinksClient = domainLinksClient; + this.linkGraphClient = linkGraphClient; Executors.newSingleThreadExecutor().submit(this::init); } @@ -262,7 +261,7 @@ public class SimilarDomainsService { private TIntSet getLinkingIdsDToS(int domainIdx) { var items = new TIntHashSet(); - for (int id : domainLinksClient.getLinksFromDomain(domainIdxToId[domainIdx])) { + for (int id : linkGraphClient.getLinksFromDomain(domainIdxToId[domainIdx])) { items.add(domainIdToIdx.get(id)); } @@ -272,7 +271,7 @@ public class SimilarDomainsService { private TIntSet getLinkingIdsSToD(int domainIdx) { var items = new TIntHashSet(); - for (int id : domainLinksClient.getLinksToDomain(domainIdxToId[domainIdx])) { + for (int id : linkGraphClient.getLinksToDomain(domainIdxToId[domainIdx])) { items.add(domainIdToIdx.get(id)); } diff --git a/code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/PartitionDomainLinksClient.java b/code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/PartitionDomainLinksClient.java deleted file mode 100644 index 6e561b12..00000000 --- a/code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/PartitionDomainLinksClient.java +++ /dev/null @@ -1,30 +0,0 @@ -package nu.marginalia.api.indexdomainlinks; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import nu.marginalia.api.domainlink.DomainLinksApiGrpc; -import nu.marginalia.service.client.GrpcChannelPoolFactory; -import nu.marginalia.service.client.GrpcMultiNodeChannelPool; -import nu.marginalia.service.discovery.property.ServiceKey; -import nu.marginalia.service.discovery.property.ServicePartition; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Singleton -public class PartitionDomainLinksClient { - private static final Logger logger = LoggerFactory.getLogger(PartitionDomainLinksClient.class); - - private final GrpcMultiNodeChannelPool channelPool; - - @Inject - public PartitionDomainLinksClient(GrpcChannelPoolFactory factory) { - this.channelPool = factory.createMulti( - ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.multi()), - DomainLinksApiGrpc::newBlockingStub); - } - - public GrpcMultiNodeChannelPool getChannelPool() { - return channelPool; - } - -} diff --git a/code/functions/domain-links/aggregate/build.gradle b/code/functions/link-graph/aggregate/build.gradle similarity index 92% rename from code/functions/domain-links/aggregate/build.gradle rename to code/functions/link-graph/aggregate/build.gradle index 11260f1f..52be585f 100644 --- a/code/functions/domain-links/aggregate/build.gradle +++ b/code/functions/link-graph/aggregate/build.gradle @@ -14,7 +14,7 @@ java { apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/functions/domain-links/aggregate/java/nu/marginalia/functions/domainlinks/AggregateDomainLinksService.java b/code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java similarity index 71% rename from code/functions/domain-links/aggregate/java/nu/marginalia/functions/domainlinks/AggregateDomainLinksService.java rename to code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java index ff842075..7731b335 100644 --- a/code/functions/domain-links/aggregate/java/nu/marginalia/functions/domainlinks/AggregateDomainLinksService.java +++ b/code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java @@ -1,20 +1,25 @@ -package nu.marginalia.functions.domainlinks; +package nu.marginalia.linkgraph; import com.google.inject.Inject; import io.grpc.stub.StreamObserver; -import nu.marginalia.api.domainlink.*; -import nu.marginalia.api.indexdomainlinks.PartitionDomainLinksClient; +import nu.marginalia.api.linkgraph.*; +import nu.marginalia.api.linkgraph.PartitionLinkGraphClient; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc.LinkGraphApiBlockingStub; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; -public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase { - private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksService.class); - private final PartitionDomainLinksClient client; +/** This class is responsible for aggregating the link graph data from the partitioned link graph + * services. + */ +public class AggregateLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase { + private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphService.class); + private final PartitionLinkGraphClient client; @Inject - public AggregateDomainLinksService(PartitionDomainLinksClient client) { + public AggregateLinkGraphService(PartitionLinkGraphClient client) { this.client = client; } @@ -22,7 +27,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void getAllLinks(Empty request, StreamObserver responseObserver) { - client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks) + client.getChannelPool().call(LinkGraphApiBlockingStub::getAllLinks) .run(Empty.getDefaultInstance()) .forEach(iter -> iter.forEachRemaining(responseObserver::onNext)); @@ -34,7 +39,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA StreamObserver responseObserver) { var rspBuilder = RpcDomainIdList.newBuilder(); - client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain) + client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksFromDomain) .run(request) .stream() .map(RpcDomainIdList::getDomainIdList) @@ -51,7 +56,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA var rspBuilder = RpcDomainIdList.newBuilder(); - client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain) + client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksToDomain) .run(request) .stream() .map(RpcDomainIdList::getDomainIdList) @@ -65,7 +70,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA @Override public void countLinksFromDomain(RpcDomainId request, StreamObserver responseObserver) { - int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain) + int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksFromDomain) .run(request) .stream() .mapToInt(RpcDomainIdCount::getIdCount) @@ -81,7 +86,7 @@ public class AggregateDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void countLinksToDomain(RpcDomainId request, StreamObserver responseObserver) { - int sum = client.getChannelPool().call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain) + int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksToDomain) .run(request) .stream() .mapToInt(RpcDomainIdCount::getIdCount) diff --git a/code/functions/link-graph/aggregate/readme.md b/code/functions/link-graph/aggregate/readme.md new file mode 100644 index 00000000..a9429ca5 --- /dev/null +++ b/code/functions/link-graph/aggregate/readme.md @@ -0,0 +1,3 @@ +This module is responsible for aggregating the link graph from the partitioned services, and exposing a unified +view of the link graph. It does not keep any data or state, but instead delegates to the partitioned +services. \ No newline at end of file diff --git a/code/functions/domain-links/api/build.gradle b/code/functions/link-graph/api/build.gradle similarity index 95% rename from code/functions/domain-links/api/build.gradle rename to code/functions/link-graph/api/build.gradle index 3232a623..ffeab7f8 100644 --- a/code/functions/domain-links/api/build.gradle +++ b/code/functions/link-graph/api/build.gradle @@ -11,7 +11,7 @@ java { } } -jar.archiveBaseName = 'index-domain-links-api' +jar.archiveBaseName = 'link-graph-api' apply from: "$rootProject.projectDir/protobuf.gradle" apply from: "$rootProject.projectDir/srcsets.gradle" diff --git a/code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/AggregateDomainLinksClient.java b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java similarity index 76% rename from code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/AggregateDomainLinksClient.java rename to code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java index c8f5c5ec..4e9c9a3d 100644 --- a/code/functions/domain-links/api/java/nu/marginalia/api/indexdomainlinks/AggregateDomainLinksClient.java +++ b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java @@ -1,10 +1,8 @@ -package nu.marginalia.api.indexdomainlinks; +package nu.marginalia.api.linkgraph; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.api.domainlink.DomainLinksApiGrpc; -import nu.marginalia.api.domainlink.Empty; -import nu.marginalia.api.domainlink.RpcDomainId; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; import nu.marginalia.service.client.GrpcChannelPoolFactory; import nu.marginalia.service.client.GrpcSingleNodeChannelPool; import nu.marginalia.service.discovery.property.ServiceKey; @@ -17,24 +15,26 @@ import org.slf4j.LoggerFactory; import java.time.Duration; import java.util.List; -@Singleton -public class AggregateDomainLinksClient { - private static final Logger logger = LoggerFactory.getLogger(AggregateDomainLinksClient.class); +import static nu.marginalia.api.linkgraph.LinkGraphApiGrpc.*; - private final GrpcSingleNodeChannelPool channelPool; +@Singleton +public class AggregateLinkGraphClient { + private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphClient.class); + + private final GrpcSingleNodeChannelPool channelPool; @Inject - public AggregateDomainLinksClient(GrpcChannelPoolFactory factory) { + public AggregateLinkGraphClient(GrpcChannelPoolFactory factory) { this.channelPool = factory.createSingle( - ServiceKey.forGrpcApi(DomainLinksApiGrpc.class, ServicePartition.any()), - DomainLinksApiGrpc::newBlockingStub); + ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.any()), + LinkGraphApiGrpc::newBlockingStub); } public AllLinks getAllDomainLinks() { AllLinks links = new AllLinks(); - channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getAllLinks) + channelPool.call(LinkGraphApiBlockingStub::getAllLinks) .run(Empty.getDefaultInstance()) .forEachRemaining(pairs -> { for (int i = 0; i < pairs.getDestIdsCount(); i++) { @@ -47,7 +47,7 @@ public class AggregateDomainLinksClient { public List getLinksToDomain(int domainId) { try { - return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksToDomain) + return channelPool.call(LinkGraphApiBlockingStub::getLinksToDomain) .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) .getDomainIdList() .stream() @@ -62,7 +62,7 @@ public class AggregateDomainLinksClient { public List getLinksFromDomain(int domainId) { try { - return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::getLinksFromDomain) + return channelPool.call(LinkGraphApiBlockingStub::getLinksFromDomain) .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) .getDomainIdList() .stream() @@ -78,7 +78,7 @@ public class AggregateDomainLinksClient { public int countLinksToDomain(int domainId) { try { - return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksToDomain) + return channelPool.call(LinkGraphApiBlockingStub::countLinksToDomain) .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) .getIdCount(); @@ -91,7 +91,7 @@ public class AggregateDomainLinksClient { public int countLinksFromDomain(int domainId) { try { - return channelPool.call(DomainLinksApiGrpc.DomainLinksApiBlockingStub::countLinksFromDomain) + return channelPool.call(LinkGraphApiBlockingStub::countLinksFromDomain) .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) .getIdCount(); } diff --git a/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java new file mode 100644 index 00000000..e6fa5ce8 --- /dev/null +++ b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java @@ -0,0 +1,29 @@ +package nu.marginalia.api.linkgraph; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Singleton +public class PartitionLinkGraphClient { + private static final Logger logger = LoggerFactory.getLogger(PartitionLinkGraphClient.class); + + private final GrpcMultiNodeChannelPool channelPool; + + @Inject + public PartitionLinkGraphClient(GrpcChannelPoolFactory factory) { + this.channelPool = factory.createMulti( + ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.multi()), + LinkGraphApiGrpc::newBlockingStub); + } + + public GrpcMultiNodeChannelPool getChannelPool() { + return channelPool; + } + +} diff --git a/code/functions/domain-links/api/src/main/protobuf/domain-links.proto b/code/functions/link-graph/api/src/main/protobuf/link-graph.proto similarity index 85% rename from code/functions/domain-links/api/src/main/protobuf/domain-links.proto rename to code/functions/link-graph/api/src/main/protobuf/link-graph.proto index 2a31bbf7..753c8e1a 100644 --- a/code/functions/domain-links/api/src/main/protobuf/domain-links.proto +++ b/code/functions/link-graph/api/src/main/protobuf/link-graph.proto @@ -1,10 +1,10 @@ syntax="proto3"; -package nu.marginalia.api.domainlinks; +package nu.marginalia.api.linkgraph; -option java_package="nu.marginalia.api.domainlink"; +option java_package="nu.marginalia.api.linkgraph"; option java_multiple_files=true; -service DomainLinksApi { +service LinkGraphApi { rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {} rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {} rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {} diff --git a/code/functions/domain-links/partition/build.gradle b/code/functions/link-graph/partition/build.gradle similarity index 94% rename from code/functions/domain-links/partition/build.gradle rename to code/functions/link-graph/partition/build.gradle index 940cecfe..43a2e654 100644 --- a/code/functions/domain-links/partition/build.gradle +++ b/code/functions/link-graph/partition/build.gradle @@ -14,7 +14,7 @@ java { apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java similarity index 78% rename from code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java index ed6f0a1d..ed0be39e 100644 --- a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java @@ -1,13 +1,13 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph; import gnu.trove.list.array.TIntArrayList; import java.nio.file.Path; -/** A database of source-destination pairs of domain IDs. The database is loaded into memory from - * a source. The database is then kept in memory, reloading it upon switchInput(). +/** A repository of source-destination pairs of domain IDs. The database is loaded into memory from + * a source. The data is then kept in memory, reloading it upon switchInput(). */ -public interface DomainLinkDb { +public interface DomainLinks { /** Replace the current db file with the provided file. The provided file will be deleted. * The in-memory database MAY be updated to reflect the change. * */ diff --git a/code/functions/domain-links/partition/java/nu/marginalia/functions/domainlinks/PartitionDomainLinksService.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java similarity index 73% rename from code/functions/domain-links/partition/java/nu/marginalia/functions/domainlinks/PartitionDomainLinksService.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java index bb89fc6b..814f7ffb 100644 --- a/code/functions/domain-links/partition/java/nu/marginalia/functions/domainlinks/PartitionDomainLinksService.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java @@ -1,25 +1,28 @@ -package nu.marginalia.functions.domainlinks; +package nu.marginalia.linkgraph; import com.google.inject.Inject; import io.grpc.stub.StreamObserver; -import nu.marginalia.api.domainlink.*; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; +import nu.marginalia.api.linkgraph.*; +import nu.marginalia.api.linkgraph.Empty; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; -/** GRPC service for interrogating domain links +/** GRPC service for interrogating domain links for a single partition. For accessing the data + * in the application, the AggregateLinkGraphService should be used instead via the + * AggregateLinkGraphClient. */ -public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksApiImplBase { - private final DomainLinkDb domainLinkDb; +public class PartitionLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase { + private final DomainLinks domainLinks; @Inject - public PartitionDomainLinksService(DomainLinkDb domainLinkDb) { - this.domainLinkDb = domainLinkDb; + public PartitionLinkGraphService(DomainLinks domainLinks) { + this.domainLinks = domainLinks; } public void getAllLinks(Empty request, io.grpc.stub.StreamObserver responseObserver) { try (var idsConverter = new AllIdsResponseConverter(responseObserver)) { - domainLinkDb.forEach(idsConverter::accept); + domainLinks.forEach(idsConverter::accept); } responseObserver.onCompleted(); @@ -58,7 +61,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void getLinksFromDomain(RpcDomainId request, StreamObserver responseObserver) { - var links = domainLinkDb.findDestinations(request.getDomainId()); + var links = domainLinks.findDestinations(request.getDomainId()); var rspBuilder = RpcDomainIdList.newBuilder(); for (int i = 0; i < links.size(); i++) { @@ -73,7 +76,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void getLinksToDomain(RpcDomainId request, StreamObserver responseObserver) { - var links = domainLinkDb.findSources(request.getDomainId()); + var links = domainLinks.findSources(request.getDomainId()); var rspBuilder = RpcDomainIdList.newBuilder(); for (int i = 0; i < links.size(); i++) { @@ -87,7 +90,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void countLinksFromDomain(RpcDomainId request, StreamObserver responseObserver) { responseObserver.onNext(RpcDomainIdCount.newBuilder() - .setIdCount(domainLinkDb.countDestinations(request.getDomainId())) + .setIdCount(domainLinks.countDestinations(request.getDomainId())) .build()); responseObserver.onCompleted(); } @@ -95,7 +98,7 @@ public class PartitionDomainLinksService extends DomainLinksApiGrpc.DomainLinksA public void countLinksToDomain(RpcDomainId request, StreamObserver responseObserver) { responseObserver.onNext(RpcDomainIdCount.newBuilder() - .setIdCount(domainLinkDb.countSources(request.getDomainId())) + .setIdCount(domainLinks.countSources(request.getDomainId())) .build()); responseObserver.onCompleted(); } diff --git a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java similarity index 85% rename from code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java index 3d2c7270..cf0ad162 100644 --- a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java @@ -1,7 +1,8 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.impl; import com.google.inject.name.Named; import gnu.trove.list.array.TIntArrayList; +import nu.marginalia.linkgraph.DomainLinks; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,13 +15,13 @@ import java.nio.file.StandardCopyOption; * is not yet loaded. This speeds up the startup of the index service, as the database is * loaded in a separate thread. */ -public class DelayingDomainLinkDb implements DomainLinkDb { - private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class); +public class DelayingDomainLinks implements DomainLinks { + private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinks.class); - private volatile DomainLinkDb currentDb; + private volatile DomainLinks currentDb; private final Path filename; - public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) { + public DelayingDomainLinks(@Named("domain-linkdb-file") Path filename) { this.filename = filename; // Load the database in a separate thread, so that the constructor can return @@ -29,7 +30,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb { Thread.ofPlatform().start(() -> { try { - currentDb = new FileDomainLinkDb(filename); + currentDb = new FileDomainLinks(filename); logger.info("Loaded linkdb"); } catch (Exception e) { logger.error("Failed to load linkdb", e); @@ -43,7 +44,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb { Thread.ofPlatform().start(() -> { try { - currentDb = new FileDomainLinkDb(filename); + currentDb = new FileDomainLinks(filename); } catch (IOException e) { logger.error("Failed to load linkdb", e); } diff --git a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java similarity index 90% rename from code/common/linkdb/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java index 0fda3467..68281229 100644 --- a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java @@ -1,7 +1,9 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.impl; import com.google.inject.name.Named; import gnu.trove.list.array.TIntArrayList; +import nu.marginalia.linkgraph.DomainLinks; +import nu.marginalia.linkgraph.io.DomainLinksLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,13 +16,13 @@ import java.util.Arrays; /** Canonical DomainLinkDb implementation. The database is loaded into memory from * a file. The database is then kept in memory, reloading it upon switchInput(). */ -public class FileDomainLinkDb implements DomainLinkDb { - private static final Logger logger = LoggerFactory.getLogger(FileDomainLinkDb.class); +public class FileDomainLinks implements DomainLinks { + private static final Logger logger = LoggerFactory.getLogger(FileDomainLinks.class); private final Path filename; private volatile long[] sourceToDest = new long[0]; private volatile long[] destToSource = new long[0]; - public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException { + public FileDomainLinks(@Named("domain-linkdb-file") Path filename) throws IOException { this.filename = filename; if (Files.exists(filename)) { @@ -35,7 +37,7 @@ public class FileDomainLinkDb implements DomainLinkDb { } public void loadInput(Path filename) throws IOException { - try (var loader = new DomainLinkDbLoader(filename)) { + try (var loader = new DomainLinksLoader(filename)) { int size = loader.size(); var newSourceToDest = new long[size]; diff --git a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java similarity index 84% rename from code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java index 83af733d..4ab10414 100644 --- a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java @@ -1,17 +1,17 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.io; import java.io.DataInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -public class DomainLinkDbLoader implements AutoCloseable { +public class DomainLinksLoader implements AutoCloseable { private final DataInputStream stream; private final Path filename; private long nextVal; - public DomainLinkDbLoader(Path filename) throws IOException { + public DomainLinksLoader(Path filename) throws IOException { this.stream = new DataInputStream(Files.newInputStream(filename)); this.filename = filename; } diff --git a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java similarity index 82% rename from code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java index 99830443..0744bcf6 100644 --- a/code/common/linkdb/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java @@ -1,4 +1,4 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.io; import java.io.DataOutputStream; import java.io.IOException; @@ -6,10 +6,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -public class DomainLinkDbWriter implements AutoCloseable { +public class DomainLinksWriter implements AutoCloseable { private final DataOutputStream stream; - public DomainLinkDbWriter(Path fileName) throws IOException { + public DomainLinksWriter(Path fileName) throws IOException { this.stream = new DataOutputStream(Files.newOutputStream(fileName, StandardOpenOption.CREATE, StandardOpenOption.WRITE, diff --git a/code/functions/link-graph/partition/readme.md b/code/functions/link-graph/partition/readme.md new file mode 100644 index 00000000..cb116bc0 --- /dev/null +++ b/code/functions/link-graph/partition/readme.md @@ -0,0 +1,11 @@ +The link graph partition module is responsible for knowledge about the link graph +for a single index node. It's based on in-memory data structures, and is updated +atomically from file. + +## Central Classes + +* [PartitionLinkGraphService](java/nu/marginalia/linkgraph/PartitionLinkGraphService.java) +* [DomainLink](java/nu/marginalia/linkgraph/DomainLinks.java) +* * [FileDomainLinks](java/nu/marginalia/linkgraph/impl/FileDomainLinks.java) +* [DomainLinksWriter](java/nu/marginalia/linkgraph/io/DomainLinksWriter.java) +* [DomainLinksLoader](java/nu/marginalia/linkgraph/io/DomainLinksLoader.java) \ No newline at end of file diff --git a/code/common/linkdb/test/nu/marginalia/linkdb/DomainLinkDbTest.java b/code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java similarity index 84% rename from code/common/linkdb/test/nu/marginalia/linkdb/DomainLinkDbTest.java rename to code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java index 6db4a8cf..e8b0d3d3 100644 --- a/code/common/linkdb/test/nu/marginalia/linkdb/DomainLinkDbTest.java +++ b/code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java @@ -1,7 +1,7 @@ -package nu.marginalia.linkdb; +package nu.marginalia.linkgraph; -import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksLoader; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -24,7 +24,7 @@ public class DomainLinkDbTest { @Test public void testWriteRead() { - try (var writer = new DomainLinkDbWriter(fileName)) { + try (var writer = new DomainLinksWriter(fileName)) { writer.write(1, 2); writer.write(2, 3); writer.write(3, 4); @@ -33,7 +33,7 @@ public class DomainLinkDbTest { throw new RuntimeException(ex); } - try (var reader = new DomainLinkDbLoader(fileName)) { + try (var reader = new DomainLinksLoader(fileName)) { Assertions.assertTrue(reader.next()); Assertions.assertEquals(1, reader.getSource()); Assertions.assertEquals(2, reader.getDest()); diff --git a/code/functions/link-graph/readme.md b/code/functions/link-graph/readme.md new file mode 100644 index 00000000..b906978d --- /dev/null +++ b/code/functions/link-graph/readme.md @@ -0,0 +1,9 @@ +The link graph subsystem is responsible for knowledge about the link graph. + +A SQL database is not very well suited for this, principally it's too slow to update, +instead the link graph is stored in memory, and atomically updated from file. The storage +aspect is handled by the [common/linkdb](../../common/linkdb/) component. + +The link graph subsystem has two components, one which injects into the partitioned services, +e.g. index or execution, and one which aggregates the results from the partitioned services, +and exposes a unified view of the link graph. \ No newline at end of file diff --git a/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java b/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java index 5234b62d..008b17b3 100644 --- a/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java +++ b/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java @@ -5,7 +5,10 @@ import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.BitSet; +// It's unclear why this exists, we should probably use a BitSet instead? +// Chesterton's fence? public class DenseBitMap { public static final long MAX_CAPACITY_2GB_16BN_ITEMS=(1L<<34)-8; diff --git a/code/functions/search-query/readme.md b/code/functions/search-query/readme.md new file mode 100644 index 00000000..54022655 --- /dev/null +++ b/code/functions/search-query/readme.md @@ -0,0 +1,4 @@ +The search query subsystem is responsible for parsing a query, +translating it to a request, and then dispatching it to the +appropriate index nodes and translating the responses back again. + diff --git a/code/index/build.gradle b/code/index/build.gradle index 403da54a..7d52facc 100644 --- a/code/index/build.gradle +++ b/code/index/build.gradle @@ -17,7 +17,7 @@ dependencies { implementation project(':third-party:commons-codec') implementation project(':code:index:api') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:libraries:array') implementation project(':code:libraries:btree') diff --git a/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java index a3863f6b..8562cdce 100644 --- a/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java @@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains.data; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; @@ -13,12 +13,12 @@ import org.jgrapht.graph.DefaultEdge; * which is the same as the regular graph except * the direction of the links have been inverted */ public class InvertedLinkGraphSource extends AbstractGraphSource { - private final AggregateDomainLinksClient queryClient; + private final AggregateLinkGraphClient graphClient; @Inject - public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient queryClient) { + public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) { super(dataSource); - this.queryClient = queryClient; + this.graphClient = graphClient; } @SneakyThrows @Override @@ -27,7 +27,7 @@ public class InvertedLinkGraphSource extends AbstractGraphSource { addVertices(graph); - var allLinks = queryClient.getAllDomainLinks(); + var allLinks = graphClient.getAllDomainLinks(); var iter = allLinks.iterator(); while (iter.advance()) { if (!graph.containsVertex(iter.dest())) { diff --git a/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java index 7c4bcb55..341a77df 100644 --- a/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java @@ -3,19 +3,19 @@ package nu.marginalia.ranking.domains.data; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; /** A source for the regular link graph. */ public class LinkGraphSource extends AbstractGraphSource { - private final AggregateDomainLinksClient domainLinksClient; + private final AggregateLinkGraphClient graphClient; @Inject - public LinkGraphSource(HikariDataSource dataSource, AggregateDomainLinksClient domainLinksClient) { + public LinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) { super(dataSource); - this.domainLinksClient = domainLinksClient; + this.graphClient = graphClient; } @SneakyThrows @@ -25,7 +25,7 @@ public class LinkGraphSource extends AbstractGraphSource { addVertices(graph); - var allLinks = domainLinksClient.getAllDomainLinks(); + var allLinks = graphClient.getAllDomainLinks(); var iter = allLinks.iterator(); while (iter.advance()) { if (!graph.containsVertex(iter.dest())) { diff --git a/code/index/readme.md b/code/index/readme.md index bc44c7d8..2254c2a2 100644 --- a/code/index/readme.md +++ b/code/index/readme.md @@ -1,6 +1,6 @@ # Index -This module contains the components that make up the search index. +This index subsystem contains the components that make up the search index. It exposes an API for querying the index, and contains the logic for ranking search results. It does not parse the query, that is @@ -10,9 +10,9 @@ the responsibility of the [search-query](../functions/search-query) module. There are two indexes with accompanying tools for constructing them. -* [index-reverse](reverse-index/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first. +* [index-reverse](index-reverse/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first. -* [index-forward](forward-index/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results. +* [index-forward](index-forward/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results. Additionally, the [index-journal](index-journal/) contains code for constructing a journal of the index, which is used to keep the index up to date. diff --git a/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java index c25b818a..f748465a 100644 --- a/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java +++ b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java @@ -3,7 +3,7 @@ package nu.marginalia.ranking.domains; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.ranking.domains.data.InvertedLinkGraphSource; import nu.marginalia.ranking.domains.data.LinkGraphSource; import nu.marginalia.ranking.domains.data.SimilarityGraphSource; @@ -36,8 +36,8 @@ public class RankingAlgorithmsContainerTest { static HikariDataSource dataSource; - AggregateDomainLinksClient domainLinksClient; - AggregateDomainLinksClient.AllLinks allLinks; + AggregateLinkGraphClient domainLinksClient; + AggregateLinkGraphClient.AllLinks allLinks; @BeforeAll public static void setup() { @@ -66,8 +66,8 @@ public class RankingAlgorithmsContainerTest { @BeforeEach public void setupQueryClient() { - domainLinksClient = Mockito.mock(AggregateDomainLinksClient.class); - allLinks = new AggregateDomainLinksClient.AllLinks(); + domainLinksClient = Mockito.mock(AggregateLinkGraphClient.class); + allLinks = new AggregateLinkGraphClient.AllLinks(); when(domainLinksClient.getAllDomainLinks()).thenReturn(allLinks); try (var conn = dataSource.getConnection(); diff --git a/code/libraries/language-processing/readme.md b/code/libraries/language-processing/readme.md index 5b12a27d..7b8ee049 100644 --- a/code/libraries/language-processing/readme.md +++ b/code/libraries/language-processing/readme.md @@ -11,6 +11,4 @@ its words, how they stem, POS tags, and so on. ## See Also [features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords -are important. - -[features-qs/query-parser](../../features-qs/query-parser) also does some language processing. \ No newline at end of file +are important. \ No newline at end of file diff --git a/code/libraries/term-frequency-dict/readme.md b/code/libraries/term-frequency-dict/readme.md index 810c3751..1c1e9c67 100644 --- a/code/libraries/term-frequency-dict/readme.md +++ b/code/libraries/term-frequency-dict/readme.md @@ -6,7 +6,3 @@ the TF-IDF score of a keyword. ## Central Classes * [TermFrequencyDict](java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java) - -## See Also - -* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file \ No newline at end of file diff --git a/code/processes/index-constructor-process/readme.md b/code/processes/index-constructor-process/readme.md index ecf791b8..6e7a46ee 100644 --- a/code/processes/index-constructor-process/readme.md +++ b/code/processes/index-constructor-process/readme.md @@ -10,8 +10,8 @@ There are three types of indexes: This is a very light-weight module that delegates the actual work to the modules: -* [features-index/index-reverse](../../features-index/index-reverse) -* [features-index/index-forward](../../features-index/index-forward) +* [features-index/index-reverse](../../index/index-reverse) +* [features-index/index-forward](../../index/index-forward) Their respective readme files contain more information about the indexes themselves and how they are constructed. diff --git a/code/processes/loading-process/build.gradle b/code/processes/loading-process/build.gradle index cf3ef16b..4a2afc68 100644 --- a/code/processes/loading-process/build.gradle +++ b/code/processes/loading-process/build.gradle @@ -40,6 +40,8 @@ dependencies { implementation project(':code:process-models:work-log') implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:functions:link-graph:partition') + implementation libs.bundles.slf4j implementation libs.guice diff --git a/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java index 4f9765a4..8b4719e6 100644 --- a/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java +++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java @@ -9,7 +9,7 @@ import com.google.inject.name.Names; import nu.marginalia.LanguageModels; import nu.marginalia.WmsaHome; import nu.marginalia.IndexLocations; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import nu.marginalia.storage.FileStorageService; import nu.marginalia.linkdb.docs.DocumentDbWriter; import nu.marginalia.model.gson.GsonFactory; @@ -45,7 +45,7 @@ public class LoaderModule extends AbstractModule { } @Inject @Provides @Singleton - private DomainLinkDbWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException { + private DomainLinksWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException { Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME); @@ -53,7 +53,7 @@ public class LoaderModule extends AbstractModule { Files.delete(dbPath); } - return new DomainLinkDbWriter(dbPath); + return new DomainLinksWriter(dbPath); } private Gson createGson() { diff --git a/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java index 8cf42218..06bf4c95 100644 --- a/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java +++ b/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java @@ -4,7 +4,7 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import lombok.SneakyThrows; import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import nu.marginalia.loading.LoaderInputData; import nu.marginalia.loading.domains.DomainIdRegistry; import nu.marginalia.model.processed.DomainLinkRecord; @@ -20,10 +20,10 @@ public class DomainLinksLoaderService { private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class); - private final DomainLinkDbWriter domainLinkDbWriter; + private final DomainLinksWriter domainLinkDbWriter; @Inject - public DomainLinksLoaderService(DomainLinkDbWriter domainLinkDbWriter) { + public DomainLinksLoaderService(DomainLinksWriter domainLinkDbWriter) { this.domainLinkDbWriter = domainLinkDbWriter; } diff --git a/code/processes/readme.md b/code/processes/readme.md index acfe5a39..3bdc0970 100644 --- a/code/processes/readme.md +++ b/code/processes/readme.md @@ -17,7 +17,7 @@ described in [processed-data](../process-models/processed-data/). The [loading-process](loading-process/) reads the processed data. -It has creates an [index journal](../features-index/index-journal), +It has creates an [index journal](../index/index-journal), a [link database](../common/linkdb), and loads domains and domain-links into the [MariaDB database](../common/db). diff --git a/code/processes/website-adjacencies-calculator/build.gradle b/code/processes/website-adjacencies-calculator/build.gradle index 7c4b9623..6019d1dd 100644 --- a/code/processes/website-adjacencies-calculator/build.gradle +++ b/code/processes/website-adjacencies-calculator/build.gradle @@ -25,7 +25,7 @@ dependencies { implementation project(':code:common:process') implementation project(':code:common:service-discovery') implementation project(':code:common:service') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation libs.bundles.slf4j diff --git a/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java index d0886b9f..f52a3161 100644 --- a/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java +++ b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java @@ -4,7 +4,7 @@ import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.set.hash.TIntHashSet; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +35,7 @@ public class AdjacenciesData { return ret; } - public AdjacenciesData(AggregateDomainLinksClient linksClient, + public AdjacenciesData(AggregateLinkGraphClient linksClient, DomainAliases aliases) { logger.info("Loading adjacency data"); diff --git a/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java index 1a40c354..92aae06a 100644 --- a/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java +++ b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; import nu.marginalia.ProcessConfiguration; -import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.EdgeDomain; import nu.marginalia.process.control.ProcessHeartbeat; @@ -32,7 +32,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass { private static final Logger logger = LoggerFactory.getLogger(WebsiteAdjacenciesCalculator.class); float[] weights; - public WebsiteAdjacenciesCalculator(AggregateDomainLinksClient domainLinksClient, HikariDataSource dataSource) throws SQLException { + public WebsiteAdjacenciesCalculator(AggregateLinkGraphClient domainLinksClient, HikariDataSource dataSource) throws SQLException { this.dataSource = dataSource; domainAliases = new DomainAliases(dataSource); @@ -154,7 +154,7 @@ public class WebsiteAdjacenciesCalculator extends ProcessMainClass { var dataSource = injector.getInstance(HikariDataSource.class); - var lc = injector.getInstance(AggregateDomainLinksClient.class); + var lc = injector.getInstance(AggregateLinkGraphClient.class); if (!lc.waitReady(Duration.ofSeconds(30))) { throw new IllegalStateException("Failed to connect to domain-links"); diff --git a/code/readme.md b/code/readme.md index 1792ec5f..d75912b2 100644 --- a/code/readme.md +++ b/code/readme.md @@ -23,18 +23,38 @@ eligible index services. The control service is responsible for distributing co service, and for monitoring the health of the system. It also offers a web interface for operating the system. ### Services + * [core services](services-core/) Most of these services are stateful, memory hungry, and doing heavy lifting. * * [control](services-core/control-service) * * [query](services-core/query-service) +* * * Exposes the [functions/link-graph](functions/link-graph) subsystem +* * * Exposes the [functions/search-query](functions/search-query) subsystem * * [index](services-core/index-service) +* * * Exposes the [index](index) subsystem +* * * Exposes the [functions/link-graph](functions/link-graph) subsystem * * [executor](services-core/executor-service) +* * * Exposes the [execution](execution) subsystem * * [assistant](services-core/assistant-service) +* * * Exposes the [functions/math](functions/math) subsystem +* * * Exposes the [functions/domain-info](functions/domain-info) subsystem * [application services](services-application/) Mostly stateless gateways providing access to the core services. -* * [api](services-application/api-service) - public API +* * [api](services-application/api-service) - public API gateway * * [search](services-application/search-service) - marginalia search application -* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/) -* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/) -* an [internal API](api/) +* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/) +* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/) + +The system uses a service registry to find the services. The service registry is based on zookeeper, +and is a separate service. The registry doesn't keep track of processes, but APIs. This means that +the system is flexible to reconfiguration. The same code can in principle be run as a micro-service +mesh or as a monolith. + +This is an unusual architecture, but it has the benefit that you don't need to think too much about +the layout of the system. You can just request an API and talk to it. Because of this, several of the +services have almost no code of their own. They merely import a library and expose it as a service. + +These skeleton services are marked with (S). + +Services that expose HTTP endpoints tend to have more code. They are marked with (G). ### Processes @@ -55,7 +75,6 @@ but isolated. * [features-search](features-search) * [features-crawl](features-crawl) * [features-convert](features-convert) -* [features-index](features-index) ### Libraries and primitives diff --git a/code/services-core/control-service/readme.md b/code/services-core/control-service/readme.md index 5da87273..24958d95 100644 --- a/code/services-core/control-service/readme.md +++ b/code/services-core/control-service/readme.md @@ -4,8 +4,7 @@ The control service provides an operator's user interface. By default, this int exposed on port 8081. It does not offer any sort of access control or authentication. The control service will itself execute tasks that affect the entire system, but delegate -node-specific tasks to the corresponding [executor-service](../executor-service) via the -[executor-api](../../api/executor-api). +node-specific tasks to the corresponding to the [execution subsystem](../../execution). Conceptually the application is broken into three parts: diff --git a/code/services-core/executor-service/build.gradle b/code/services-core/executor-service/build.gradle index 43c0a23e..a6a88251 100644 --- a/code/services-core/executor-service/build.gradle +++ b/code/services-core/executor-service/build.gradle @@ -59,7 +59,7 @@ dependencies { implementation project(':code:libraries:message-queue') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:api') implementation project(':code:process-models:crawl-spec') implementation project(':code:process-models:crawling-model') diff --git a/code/services-core/executor-service/readme.md b/code/services-core/executor-service/readme.md index 1f05c3a4..280defd1 100644 --- a/code/services-core/executor-service/readme.md +++ b/code/services-core/executor-service/readme.md @@ -1,16 +1,10 @@ The executor service is a partitioned service responsible for executing and keeping -track of long running maintenance and operational tasks, such as crawling or data -processing. +track of long-running maintenance and operational tasks, such as crawling or data +processing. -It accomplishes this using the [message queue and actor library](../../libraries/message-queue/), -which permits program state to survive crashes and reboots. The executor service is closely -linked to the [control-service](../control-service), which provides a user interface for -much of the executor's functionality. +The executor service is closely linked to the [control-service](../control-service), +which provides a user interface for much of the executor's functionality. -## Central Classes +The service it itself relatively bare of code, but imports and exposes the [execution subsystem](../../execution), +which is responsible for the actual execution of tasks. -* [ExecutorActorControlService](java/nu/marginalia/actor/ExecutorActorControlService.java) - -## See Also - -* [api/executor-api](../../api/executor-api) \ No newline at end of file diff --git a/code/services-core/index-service/build.gradle b/code/services-core/index-service/build.gradle index 05aa3af9..f68d093a 100644 --- a/code/services-core/index-service/build.gradle +++ b/code/services-core/index-service/build.gradle @@ -46,8 +46,8 @@ dependencies { implementation project(':code:common:linkdb') implementation project(':code:index') - implementation project(':code:functions:domain-links:partition') - implementation project(':code:functions:domain-links:api') + implementation project(':code:functions:link-graph:partition') + implementation project(':code:functions:link-graph:api') implementation project(':code:functions:search-query:api') implementation project(':code:index:api') diff --git a/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java b/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java index 526e34bd..f998dfcf 100644 --- a/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java @@ -4,8 +4,8 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; import com.google.inject.name.Named; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; -import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb; +import nu.marginalia.linkgraph.DomainLinks; +import nu.marginalia.linkgraph.impl.DelayingDomainLinks; import nu.marginalia.storage.FileStorageService; import nu.marginalia.IndexLocations; import org.slf4j.Logger; @@ -26,13 +26,13 @@ public class IndexModule extends AbstractModule { @Provides @Singleton - public DomainLinkDb domainLinkDb ( + public DomainLinks domainLinkDb ( FileStorageService storageService ) { Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME); - return new DelayingDomainLinkDb(path); + return new DelayingDomainLinks(path); } @Provides diff --git a/code/services-core/index-service/java/nu/marginalia/index/IndexService.java b/code/services-core/index-service/java/nu/marginalia/index/IndexService.java index ee96e9d4..cf9187d0 100644 --- a/code/services-core/index-service/java/nu/marginalia/index/IndexService.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexService.java @@ -3,9 +3,9 @@ package nu.marginalia.index; import com.google.inject.Inject; import lombok.SneakyThrows; import nu.marginalia.IndexLocations; -import nu.marginalia.functions.domainlinks.PartitionDomainLinksService; +import nu.marginalia.linkgraph.PartitionLinkGraphService; import nu.marginalia.index.index.StatefulIndex; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; +import nu.marginalia.linkgraph.DomainLinks; import nu.marginalia.service.discovery.property.ServicePartition; import nu.marginalia.storage.FileStorageService; import nu.marginalia.index.api.IndexMqEndpoints; @@ -34,7 +34,7 @@ public class IndexService extends Service { private final FileStorageService fileStorageService; private final DocumentDbReader documentDbReader; - private final DomainLinkDb domainLinkDb; + private final DomainLinks domainLinks; private final ServiceEventLog eventLog; @@ -46,21 +46,21 @@ public class IndexService extends Service { StatefulIndex statefulIndex, FileStorageService fileStorageService, DocumentDbReader documentDbReader, - DomainLinkDb domainLinkDb, - PartitionDomainLinksService partitionDomainLinksService, + DomainLinks domainLinks, + PartitionLinkGraphService partitionLinkGraphService, ServiceEventLog eventLog) { super(params, ServicePartition.partition(params.configuration.node()), List.of(indexQueryService, - partitionDomainLinksService) + partitionLinkGraphService) ); this.opsService = opsService; this.statefulIndex = statefulIndex; this.fileStorageService = fileStorageService; this.documentDbReader = documentDbReader; - this.domainLinkDb = domainLinkDb; + this.domainLinks = domainLinks; this.eventLog = eventLog; this.init = params.initialization; @@ -106,7 +106,7 @@ public class IndexService extends Service { if (Files.exists(newPathDomains)) { eventLog.logEvent("INDEX-SWITCH-DOMAIN-LINKDB", ""); - domainLinkDb.switchInput(newPathDomains); + domainLinks.switchInput(newPathDomains); } } diff --git a/code/services-core/query-service/build.gradle b/code/services-core/query-service/build.gradle index 96022efd..591ef884 100644 --- a/code/services-core/query-service/build.gradle +++ b/code/services-core/query-service/build.gradle @@ -50,8 +50,8 @@ dependencies { implementation project(':code:functions:search-query') implementation project(':code:functions:search-query:api') - implementation project(':code:functions:domain-links:api') - implementation project(':code:functions:domain-links:aggregate') + implementation project(':code:functions:link-graph:api') + implementation project(':code:functions:link-graph:aggregate') implementation libs.bundles.slf4j diff --git a/code/services-core/query-service/java/nu/marginalia/query/QueryService.java b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java index b8a4c6c5..b7dcc04c 100644 --- a/code/services-core/query-service/java/nu/marginalia/query/QueryService.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java @@ -2,7 +2,7 @@ package nu.marginalia.query; import com.google.inject.Inject; import lombok.SneakyThrows; -import nu.marginalia.functions.domainlinks.AggregateDomainLinksService; +import nu.marginalia.linkgraph.AggregateLinkGraphService; import nu.marginalia.functions.searchquery.QueryGRPCService; import nu.marginalia.service.discovery.property.ServicePartition; import nu.marginalia.service.server.BaseServiceParams; @@ -17,7 +17,7 @@ public class QueryService extends Service { @SneakyThrows @Inject public QueryService(BaseServiceParams params, - AggregateDomainLinksService domainLinksService, + AggregateLinkGraphService domainLinksService, QueryGRPCService queryGRPCService, QueryBasicInterface queryBasicInterface) { diff --git a/settings.gradle b/settings.gradle index 0dbc4376..fca2091c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -17,9 +17,9 @@ include 'code:functions:math:api' include 'code:functions:domain-info' include 'code:functions:domain-info:api' -include 'code:functions:domain-links:partition' -include 'code:functions:domain-links:aggregate' -include 'code:functions:domain-links:api' +include 'code:functions:link-graph:partition' +include 'code:functions:link-graph:aggregate' +include 'code:functions:link-graph:api' include 'code:functions:search-query' include 'code:functions:search-query:api'