mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(search) Move site information out of the search service and into assistant.
This reduces the impact of restarting the search service, as the site information takes a few minutes to load during which it's not available. It also permits exposing this information via API in the future if there is interest in this. The assistant service was also modified to do a late load of the suggestions trie, as this is a major contributor to its start-up time. Finally, some changes were made to the client library, a new get() method was added that takes a TypeToken to allow deserialization of generics such as List<Foo>, and the scheduler was also modified to use virtual threads.
This commit is contained in:
parent
5c46af0edb
commit
8ef34883a8
@ -1,12 +1,14 @@
|
|||||||
package nu.marginalia.assistant.client;
|
package nu.marginalia.assistant.client;
|
||||||
|
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import io.reactivex.rxjava3.core.Observable;
|
import io.reactivex.rxjava3.core.Observable;
|
||||||
import nu.marginalia.assistant.client.model.DictionaryResponse;
|
import nu.marginalia.assistant.client.model.DictionaryResponse;
|
||||||
|
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||||
|
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||||
import nu.marginalia.client.AbstractDynamicClient;
|
import nu.marginalia.client.AbstractDynamicClient;
|
||||||
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
||||||
import nu.marginalia.WmsaHome;
|
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||||
import nu.marginalia.service.id.ServiceId;
|
import nu.marginalia.service.id.ServiceId;
|
||||||
@ -14,6 +16,7 @@ import nu.marginalia.client.Context;
|
|||||||
|
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
@ -59,4 +62,31 @@ public class AssistantClient extends AbstractDynamicClient {
|
|||||||
return Observable.empty();
|
return Observable.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Observable<ArrayList<SimilarDomain>> similarDomains(Context ctx, int domainId, int count) {
|
||||||
|
try {
|
||||||
|
return super.get(ctx, 0, STR."/domain/\{domainId}/similar?count=\{count}", new TypeToken<ArrayList<SimilarDomain>>() {});
|
||||||
|
}
|
||||||
|
catch (RouteNotConfiguredException ex) {
|
||||||
|
return Observable.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Observable<ArrayList<SimilarDomain>> linkedDomains(Context ctx, int domainId, int count) {
|
||||||
|
try {
|
||||||
|
return super.get(ctx, 0, STR."/domain/\{domainId}/linking?count=\{count}", new TypeToken<ArrayList<SimilarDomain>>() {});
|
||||||
|
}
|
||||||
|
catch (RouteNotConfiguredException ex) {
|
||||||
|
return Observable.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Observable<DomainInformation> domainInformation(Context ctx, int domainId) {
|
||||||
|
try {
|
||||||
|
return super.get(ctx, 0, STR."/domain/\{domainId}/info", DomainInformation.class);
|
||||||
|
}
|
||||||
|
catch (RouteNotConfiguredException ex) {
|
||||||
|
return Observable.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.search.model;
|
package nu.marginalia.assistant.client.model;
|
||||||
|
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
@ -24,5 +24,4 @@ public class DomainInformation {
|
|||||||
boolean unknownDomain;
|
boolean unknownDomain;
|
||||||
|
|
||||||
String state;
|
String state;
|
||||||
List<EdgeDomain> linkingDomains;
|
|
||||||
}
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
package nu.marginalia.assistant.client.model;
|
||||||
|
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
|
||||||
|
public record SimilarDomain(EdgeUrl url,
|
||||||
|
int domainId,
|
||||||
|
double relatedness,
|
||||||
|
double rank,
|
||||||
|
boolean indexed,
|
||||||
|
boolean active,
|
||||||
|
boolean screenshot,
|
||||||
|
LinkType linkType) {
|
||||||
|
|
||||||
|
public String getRankSymbols() {
|
||||||
|
if (rank > 90) {
|
||||||
|
return "★★★★★";
|
||||||
|
}
|
||||||
|
if (rank > 70) {
|
||||||
|
return "★★★★";
|
||||||
|
}
|
||||||
|
if (rank > 50) {
|
||||||
|
return "★★★";
|
||||||
|
}
|
||||||
|
if (rank > 30) {
|
||||||
|
return "★★";
|
||||||
|
}
|
||||||
|
if (rank > 10) {
|
||||||
|
return "★";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum LinkType {
|
||||||
|
BACKWARD,
|
||||||
|
FOWARD,
|
||||||
|
BIDIRECTIONAL,
|
||||||
|
NONE;
|
||||||
|
|
||||||
|
public static LinkType find(boolean linkStod,
|
||||||
|
boolean linkDtos) {
|
||||||
|
if (linkDtos && linkStod)
|
||||||
|
return BIDIRECTIONAL;
|
||||||
|
if (linkDtos)
|
||||||
|
return FOWARD;
|
||||||
|
if (linkStod)
|
||||||
|
return BACKWARD;
|
||||||
|
|
||||||
|
return NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return switch (this) {
|
||||||
|
case FOWARD -> "→";
|
||||||
|
case BACKWARD -> "←";
|
||||||
|
case BIDIRECTIONAL -> "⇆";
|
||||||
|
case NONE -> "-";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return switch (this) {
|
||||||
|
case BACKWARD -> "Backward Link";
|
||||||
|
case FOWARD -> "Forward Link";
|
||||||
|
case BIDIRECTIONAL -> "Mutual Link";
|
||||||
|
case NONE -> "No Link";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,5 @@
|
|||||||
package nu.marginalia.client;
|
package nu.marginalia.client;
|
||||||
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
||||||
import io.reactivex.rxjava3.core.Scheduler;
|
import io.reactivex.rxjava3.core.Scheduler;
|
||||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -10,26 +9,16 @@ import javax.annotation.Nonnull;
|
|||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.ThreadFactory;
|
|
||||||
|
|
||||||
public class AbortingScheduler {
|
public class AbortingScheduler {
|
||||||
private final ThreadFactory threadFactory;
|
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private ExecutorService executorService;
|
private ExecutorService executorService;
|
||||||
|
|
||||||
public AbortingScheduler(String name) {
|
public AbortingScheduler() {
|
||||||
threadFactory = new ThreadFactoryBuilder()
|
|
||||||
.setNameFormat(name+"client--%d")
|
|
||||||
.setUncaughtExceptionHandler(this::handleException)
|
|
||||||
.build();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleException(Thread thread, Throwable throwable) {
|
|
||||||
logger.error("Uncaught exception during Client IO in thread {}", thread.getName(), throwable);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized Scheduler get() {
|
public synchronized Scheduler get() {
|
||||||
return Schedulers.from(getExecutorService(),
|
return Schedulers.from(getExecutorService(),
|
||||||
@ -40,14 +29,14 @@ public class AbortingScheduler {
|
|||||||
public synchronized void abort() {
|
public synchronized void abort() {
|
||||||
if (null != executorService) {
|
if (null != executorService) {
|
||||||
executorService.shutdownNow();
|
executorService.shutdownNow();
|
||||||
executorService = Executors.newFixedThreadPool(16, threadFactory);
|
executorService = Executors.newVirtualThreadPerTaskExecutor();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private synchronized ExecutorService getExecutorService() {
|
private synchronized ExecutorService getExecutorService() {
|
||||||
if (null == executorService) {
|
if (null == executorService) {
|
||||||
executorService = Executors.newFixedThreadPool(16, threadFactory);
|
executorService = Executors.newVirtualThreadPerTaskExecutor();
|
||||||
}
|
}
|
||||||
return executorService;
|
return executorService;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.client;
|
package nu.marginalia.client;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
import com.google.protobuf.GeneratedMessageV3;
|
import com.google.protobuf.GeneratedMessageV3;
|
||||||
import io.reactivex.rxjava3.core.Observable;
|
import io.reactivex.rxjava3.core.Observable;
|
||||||
import io.reactivex.rxjava3.core.ObservableSource;
|
import io.reactivex.rxjava3.core.ObservableSource;
|
||||||
@ -20,6 +21,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
import spark.utils.IOUtils;
|
import spark.utils.IOUtils;
|
||||||
|
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.lang.reflect.Type;
|
||||||
import java.net.ConnectException;
|
import java.net.ConnectException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
@ -233,6 +235,22 @@ public abstract class AbstractClient implements AutoCloseable {
|
|||||||
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected synchronized <T> Observable<T> get(Context ctx, int node, String endpoint, TypeToken<T> type) {
|
||||||
|
ensureAlive(node);
|
||||||
|
|
||||||
|
var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build();
|
||||||
|
|
||||||
|
return Observable.just(client.newCall(req))
|
||||||
|
.subscribeOn(scheduler().get())
|
||||||
|
.map(this::logInbound)
|
||||||
|
.map(Call::execute)
|
||||||
|
.map(this::logOutbound)
|
||||||
|
.map(rsp -> validateResponseStatus(rsp, req, 200))
|
||||||
|
.map(rsp -> getEntity(rsp, type))
|
||||||
|
.retryWhen(this::retryHandler)
|
||||||
|
.timeout(timeout, TimeUnit.SECONDS)
|
||||||
|
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||||
|
}
|
||||||
protected synchronized Observable<Integer> get(Context ctx, int node, String endpoint, OutputStream outputStream) {
|
protected synchronized Observable<Integer> get(Context ctx, int node, String endpoint, OutputStream outputStream) {
|
||||||
ensureAlive(node);
|
ensureAlive(node);
|
||||||
|
|
||||||
@ -388,6 +406,15 @@ public abstract class AbstractClient implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
|
private <T> T getEntity(Response response, TypeToken<T> clazz) {
|
||||||
|
try (response) {
|
||||||
|
return gson.fromJson(response.body().charStream(), clazz);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@SneakyThrows
|
||||||
private String getText(Response response) {
|
private String getText(Response response) {
|
||||||
try (response) {
|
try (response) {
|
||||||
return response.body().string();
|
return response.body().string();
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
package nu.marginalia.client;
|
package nu.marginalia.client;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import nu.marginalia.client.route.RouteProvider;
|
|
||||||
import nu.marginalia.client.route.ServiceRoute;
|
|
||||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
@ -20,7 +18,7 @@ public class AbstractDynamicClient extends AbstractClient {
|
|||||||
);
|
);
|
||||||
|
|
||||||
this.service = service;
|
this.service = service;
|
||||||
this.scheduler = new AbortingScheduler(name());
|
this.scheduler = new AbortingScheduler();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -42,7 +42,7 @@ public class AbstractClientTest {
|
|||||||
client = new AbstractClient(new RouteProvider(new ServiceDescriptor(ServiceId.Api, "localhost")), 1, Gson::new) {
|
client = new AbstractClient(new RouteProvider(new ServiceDescriptor(ServiceId.Api, "localhost")), 1, Gson::new) {
|
||||||
@Override
|
@Override
|
||||||
public AbortingScheduler scheduler() {
|
public AbortingScheduler scheduler() {
|
||||||
return new AbortingScheduler(name());
|
return new AbortingScheduler();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1,16 +1,16 @@
|
|||||||
package nu.marginalia.search.svc;
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.assistant.client.AssistantClient;
|
||||||
|
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.search.SearchOperator;
|
import nu.marginalia.search.SearchOperator;
|
||||||
import nu.marginalia.search.model.DomainInformation;
|
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.siteinfo.DomainInformationService;
|
|
||||||
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
@ -23,22 +23,19 @@ import java.util.Map;
|
|||||||
public class SearchSiteInfoService {
|
public class SearchSiteInfoService {
|
||||||
|
|
||||||
private final SearchOperator searchOperator;
|
private final SearchOperator searchOperator;
|
||||||
private final SimilarDomainsService similarDomains;
|
private final AssistantClient assistantClient;
|
||||||
private final DomainInformationService domainInformationService;
|
|
||||||
private final SearchFlagSiteService flagSiteService;
|
private final SearchFlagSiteService flagSiteService;
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
private final MustacheRenderer<Object> renderer;
|
private final MustacheRenderer<Object> renderer;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchSiteInfoService(SearchOperator searchOperator,
|
public SearchSiteInfoService(SearchOperator searchOperator,
|
||||||
SimilarDomainsService similarDomains,
|
AssistantClient assistantClient,
|
||||||
DomainInformationService domainInformationService,
|
|
||||||
RendererFactory rendererFactory,
|
RendererFactory rendererFactory,
|
||||||
SearchFlagSiteService flagSiteService,
|
SearchFlagSiteService flagSiteService,
|
||||||
DbDomainQueries domainQueries) throws IOException {
|
DbDomainQueries domainQueries) throws IOException {
|
||||||
this.searchOperator = searchOperator;
|
this.searchOperator = searchOperator;
|
||||||
this.similarDomains = similarDomains;
|
this.assistantClient = assistantClient;
|
||||||
this.domainInformationService = domainInformationService;
|
|
||||||
this.flagSiteService = flagSiteService;
|
this.flagSiteService = flagSiteService;
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
|
|
||||||
@ -108,13 +105,6 @@ public class SearchSiteInfoService {
|
|||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DomainInformation dummyInformation(String domainName) {
|
|
||||||
return DomainInformation.builder()
|
|
||||||
.domain(new EdgeDomain(domainName))
|
|
||||||
.suggestForCrawling(true)
|
|
||||||
.unknownDomain(true)
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Backlinks listLinks(Context ctx, String domainName) {
|
private Backlinks listLinks(Context ctx, String domainName) {
|
||||||
return new Backlinks(domainName,
|
return new Backlinks(domainName,
|
||||||
@ -126,13 +116,24 @@ public class SearchSiteInfoService {
|
|||||||
|
|
||||||
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
|
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
|
||||||
|
|
||||||
final DomainInformation domainInfo = domainInformationService.domainInfo(domainName)
|
final DomainInformation domainInfo;
|
||||||
.orElseGet(() -> dummyInformation(domainName));
|
final List<SimilarDomain> similarSet;
|
||||||
|
final List<SimilarDomain> linkingDomains;
|
||||||
|
|
||||||
final List<SimilarDomainsService.SimilarDomain> similarSet =
|
if (domainId < 0 || !assistantClient.isAccepting()) {
|
||||||
similarDomains.getSimilarDomains(domainId, 100);
|
domainInfo = createDummySiteInfo(domainName);
|
||||||
final List<SimilarDomainsService.SimilarDomain> linkingDomains =
|
similarSet = List.of();
|
||||||
similarDomains.getLinkingDomains(domainId, 100);
|
linkingDomains = List.of();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
domainInfo = assistantClient.domainInformation(ctx, domainId).blockingFirst();
|
||||||
|
similarSet = assistantClient
|
||||||
|
.similarDomains(ctx, domainId, 100)
|
||||||
|
.blockingFirst();
|
||||||
|
linkingDomains = assistantClient
|
||||||
|
.linkedDomains(ctx, domainId, 100)
|
||||||
|
.blockingFirst();
|
||||||
|
}
|
||||||
|
|
||||||
return new SiteInfoWithContext(domainName,
|
return new SiteInfoWithContext(domainName,
|
||||||
domainId,
|
domainId,
|
||||||
@ -141,6 +142,15 @@ public class SearchSiteInfoService {
|
|||||||
linkingDomains
|
linkingDomains
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private DomainInformation createDummySiteInfo(String domainName) {
|
||||||
|
return DomainInformation.builder()
|
||||||
|
.domain(new EdgeDomain(domainName))
|
||||||
|
.suggestForCrawling(true)
|
||||||
|
.unknownDomain(true)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
private Docs listDocs(Context ctx, String domainName) {
|
private Docs listDocs(Context ctx, String domainName) {
|
||||||
return new Docs(domainName,
|
return new Docs(domainName,
|
||||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
@ -181,13 +191,13 @@ public class SearchSiteInfoService {
|
|||||||
String domain,
|
String domain,
|
||||||
long domainId,
|
long domainId,
|
||||||
DomainInformation domainInformation,
|
DomainInformation domainInformation,
|
||||||
List<SimilarDomainsService.SimilarDomain> similar,
|
List<SimilarDomain> similar,
|
||||||
List<SimilarDomainsService.SimilarDomain> linking) {
|
List<SimilarDomain> linking) {
|
||||||
public SiteInfoWithContext(String domain,
|
public SiteInfoWithContext(String domain,
|
||||||
long domainId,
|
long domainId,
|
||||||
DomainInformation domainInformation,
|
DomainInformation domainInformation,
|
||||||
List<SimilarDomainsService.SimilarDomain> similar,
|
List<SimilarDomain> similar,
|
||||||
List<SimilarDomainsService.SimilarDomain> linking
|
List<SimilarDomain> linking
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this(Map.of("info", true),
|
this(Map.of("info", true),
|
||||||
|
@ -27,6 +27,7 @@ dependencies {
|
|||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:service-discovery')
|
implementation project(':code:common:service-discovery')
|
||||||
implementation project(':code:common:service-client')
|
implementation project(':code:common:service-client')
|
||||||
|
|
||||||
|
@ -3,6 +3,8 @@ package nu.marginalia.assistant;
|
|||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
import nu.marginalia.assistant.domains.DomainInformationService;
|
||||||
|
import nu.marginalia.assistant.domains.SimilarDomainsService;
|
||||||
import nu.marginalia.assistant.eval.Units;
|
import nu.marginalia.assistant.eval.Units;
|
||||||
import nu.marginalia.assistant.suggest.Suggestions;
|
import nu.marginalia.assistant.suggest.Suggestions;
|
||||||
import nu.marginalia.assistant.eval.MathParser;
|
import nu.marginalia.assistant.eval.MathParser;
|
||||||
@ -16,11 +18,16 @@ import spark.Request;
|
|||||||
import spark.Response;
|
import spark.Response;
|
||||||
import spark.Spark;
|
import spark.Spark;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
public class AssistantService extends Service {
|
public class AssistantService extends Service {
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final Gson gson = GsonFactory.get();
|
private final Gson gson = GsonFactory.get();
|
||||||
private final Units units;
|
private final Units units;
|
||||||
private final MathParser mathParser;
|
private final MathParser mathParser;
|
||||||
|
private final SimilarDomainsService similarDomainsService;
|
||||||
|
private final DomainInformationService domainInformationService;
|
||||||
private final Suggestions suggestions;
|
private final Suggestions suggestions;
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@ -30,12 +37,16 @@ public class AssistantService extends Service {
|
|||||||
MathParser mathParser,
|
MathParser mathParser,
|
||||||
Units units,
|
Units units,
|
||||||
ScreenshotService screenshotService,
|
ScreenshotService screenshotService,
|
||||||
|
SimilarDomainsService similarDomainsService,
|
||||||
|
DomainInformationService domainInformationService,
|
||||||
Suggestions suggestions)
|
Suggestions suggestions)
|
||||||
{
|
{
|
||||||
super(params);
|
super(params);
|
||||||
|
|
||||||
this.mathParser = mathParser;
|
this.mathParser = mathParser;
|
||||||
this.units = units;
|
this.units = units;
|
||||||
|
this.similarDomainsService = similarDomainsService;
|
||||||
|
this.domainInformationService = domainInformationService;
|
||||||
this.suggestions = suggestions;
|
this.suggestions = suggestions;
|
||||||
|
|
||||||
Spark.staticFiles.expireTime(600);
|
Spark.staticFiles.expireTime(600);
|
||||||
@ -56,12 +67,50 @@ public class AssistantService extends Service {
|
|||||||
rsp,
|
rsp,
|
||||||
req.queryParams("value")
|
req.queryParams("value")
|
||||||
));
|
));
|
||||||
|
Spark.get("/domain/:id/similar", this::getSimilarDomains, this::convertToJson);
|
||||||
|
Spark.get("/domain/:id/linking", this::getLinkingDomains, this::convertToJson);
|
||||||
|
Spark.get("/domain/:id/info", this::getDomainInformation, this::convertToJson);
|
||||||
Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson);
|
Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson);
|
||||||
|
|
||||||
Spark.awaitInitialization();
|
Spark.awaitInitialization();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Object getSimilarDomains(Request request, Response response) {
|
||||||
|
int domainId = Integer.parseInt(request.params("id"));
|
||||||
|
int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25"));
|
||||||
|
|
||||||
|
response.type("application/json");
|
||||||
|
|
||||||
|
if (!similarDomainsService.isReady()) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return similarDomainsService.getSimilarDomains(domainId, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object getLinkingDomains(Request request, Response response) {
|
||||||
|
int domainId = Integer.parseInt(request.params("id"));
|
||||||
|
int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25"));
|
||||||
|
|
||||||
|
response.type("application/json");
|
||||||
|
if (!similarDomainsService.isReady()) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return similarDomainsService.getLinkingDomains(domainId, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object getDomainInformation(Request request, Response response) {
|
||||||
|
int domainId = Integer.parseInt(request.params("id"));
|
||||||
|
|
||||||
|
response.type("application/json");
|
||||||
|
|
||||||
|
var maybeDomainInfo = domainInformationService.domainInfo(domainId);
|
||||||
|
if (maybeDomainInfo.isEmpty()) {
|
||||||
|
Spark.halt(404);
|
||||||
|
}
|
||||||
|
return maybeDomainInfo.get();
|
||||||
|
}
|
||||||
|
|
||||||
private Object getSuggestions(Request request, Response response) {
|
private Object getSuggestions(Request request, Response response) {
|
||||||
response.type("application/json");
|
response.type("application/json");
|
||||||
var param = request.queryParams("partial");
|
var param = request.queryParams("partial");
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
package nu.marginalia.search.siteinfo;
|
package nu.marginalia.assistant.domains;
|
||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.search.model.DomainInformation;
|
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -36,13 +36,7 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Optional<DomainInformation> domainInfo(String site) {
|
public Optional<DomainInformation> domainInfo(int domainId) {
|
||||||
|
|
||||||
OptionalInt maybeDomainId = getDomainFromPartial(site);
|
|
||||||
if (maybeDomainId.isEmpty()) {
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
int domainId = maybeDomainId.getAsInt();
|
|
||||||
|
|
||||||
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
||||||
if (domain.isEmpty()) {
|
if (domain.isEmpty()) {
|
||||||
@ -61,7 +55,6 @@ public class DomainInformationService {
|
|||||||
double rank = Math.round(10000.0*(1.0-getRank(domainId)))/100;
|
double rank = Math.round(10000.0*(1.0-getRank(domainId)))/100;
|
||||||
|
|
||||||
DomainIndexingState state = getDomainState(domainId);
|
DomainIndexingState state = getDomainState(domainId);
|
||||||
List<EdgeDomain> linkingDomains = getLinkingDomains(domainId);
|
|
||||||
|
|
||||||
var di = DomainInformation.builder()
|
var di = DomainInformation.builder()
|
||||||
.domain(domain.get())
|
.domain(domain.get())
|
||||||
@ -73,7 +66,6 @@ public class DomainInformationService {
|
|||||||
.outboundLinks(outboundLinks)
|
.outboundLinks(outboundLinks)
|
||||||
.ranking(rank)
|
.ranking(rank)
|
||||||
.state(state.desc)
|
.state(state.desc)
|
||||||
.linkingDomains(linkingDomains)
|
|
||||||
.inCrawlQueue(inCrawlQueue)
|
.inCrawlQueue(inCrawlQueue)
|
||||||
.nodeAffinity(nodeAffinity)
|
.nodeAffinity(nodeAffinity)
|
||||||
.suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue))
|
.suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue))
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.search.svc;
|
package nu.marginalia.assistant.domains;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
@ -6,11 +6,10 @@ import gnu.trove.list.TIntList;
|
|||||||
import gnu.trove.list.array.TIntArrayList;
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
import gnu.trove.map.hash.TIntDoubleHashMap;
|
import gnu.trove.map.hash.TIntDoubleHashMap;
|
||||||
import gnu.trove.map.hash.TIntIntHashMap;
|
import gnu.trove.map.hash.TIntIntHashMap;
|
||||||
import gnu.trove.map.hash.TLongDoubleHashMap;
|
|
||||||
import gnu.trove.set.TIntSet;
|
import gnu.trove.set.TIntSet;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
|
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -40,6 +39,8 @@ public class SimilarDomainsService {
|
|||||||
public volatile double[] domainRanks = null;
|
public volatile double[] domainRanks = null;
|
||||||
public volatile String[] domainNames = null;
|
public volatile String[] domainNames = null;
|
||||||
|
|
||||||
|
volatile boolean isReady = false;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SimilarDomainsService(HikariDataSource dataSource) {
|
public SimilarDomainsService(HikariDataSource dataSource) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
@ -167,6 +168,7 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
logger.info("Loaded {} domains", domainRanks.length);
|
logger.info("Loaded {} domains", domainRanks.length);
|
||||||
logger.info("All done!");
|
logger.info("All done!");
|
||||||
|
isReady = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (SQLException throwables) {
|
catch (SQLException throwables) {
|
||||||
@ -174,7 +176,11 @@ public class SimilarDomainsService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double getRelatedness(int a, int b) {
|
public boolean isReady() {
|
||||||
|
return isReady;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getRelatedness(int a, int b) {
|
||||||
int lowerIndex = Math.min(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
int lowerIndex = Math.min(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
||||||
int higherIndex = Math.max(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
int higherIndex = Math.max(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
||||||
|
|
||||||
@ -233,14 +239,14 @@ public class SimilarDomainsService {
|
|||||||
indexedDomains.get(idx),
|
indexedDomains.get(idx),
|
||||||
activeDomains.get(idx),
|
activeDomains.get(idx),
|
||||||
screenshotDomains.get(idx),
|
screenshotDomains.get(idx),
|
||||||
LinkType.find(
|
SimilarDomain.LinkType.find(
|
||||||
linkingIdsStoD.contains(idx),
|
linkingIdsStoD.contains(idx),
|
||||||
linkingIdsDtoS.contains(idx)
|
linkingIdsDtoS.contains(idx)
|
||||||
)
|
)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
domains.removeIf(d -> d.url.domain.toString().length() > 32);
|
domains.removeIf(d -> d.url().domain.toString().length() > 32);
|
||||||
|
|
||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
@ -319,84 +325,16 @@ public class SimilarDomainsService {
|
|||||||
indexedDomains.get(idx),
|
indexedDomains.get(idx),
|
||||||
activeDomains.get(idx),
|
activeDomains.get(idx),
|
||||||
screenshotDomains.get(idx),
|
screenshotDomains.get(idx),
|
||||||
LinkType.find(
|
SimilarDomain.LinkType.find(
|
||||||
linkingIdsStoD.contains(idx),
|
linkingIdsStoD.contains(idx),
|
||||||
linkingIdsDtoS.contains(idx)
|
linkingIdsDtoS.contains(idx)
|
||||||
)
|
)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
domains.removeIf(d -> d.url.domain.toString().length() > 32);
|
domains.removeIf(d -> d.url().domain.toString().length() > 32);
|
||||||
|
|
||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
|
|
||||||
public record SimilarDomain(EdgeUrl url,
|
|
||||||
int domainId,
|
|
||||||
double relatedness,
|
|
||||||
double rank,
|
|
||||||
boolean indexed,
|
|
||||||
boolean active,
|
|
||||||
boolean screenshot,
|
|
||||||
LinkType linkType)
|
|
||||||
{
|
|
||||||
|
|
||||||
public String getRankSymbols() {
|
|
||||||
if (rank > 90) {
|
|
||||||
return "★★★★★";
|
|
||||||
}
|
|
||||||
if (rank > 70) {
|
|
||||||
return "★★★★";
|
|
||||||
}
|
|
||||||
if (rank > 50) {
|
|
||||||
return "★★★";
|
|
||||||
}
|
|
||||||
if (rank > 30) {
|
|
||||||
return "★★";
|
|
||||||
}
|
|
||||||
if (rank > 10) {
|
|
||||||
return "★";
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum LinkType {
|
|
||||||
BACKWARD,
|
|
||||||
FOWARD,
|
|
||||||
BIDIRECTIONAL,
|
|
||||||
NONE;
|
|
||||||
|
|
||||||
public static LinkType find(boolean linkStod,
|
|
||||||
boolean linkDtos)
|
|
||||||
{
|
|
||||||
if (linkDtos && linkStod)
|
|
||||||
return BIDIRECTIONAL;
|
|
||||||
if (linkDtos)
|
|
||||||
return FOWARD;
|
|
||||||
if (linkStod)
|
|
||||||
return BACKWARD;
|
|
||||||
|
|
||||||
return NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return switch (this) {
|
|
||||||
case FOWARD -> "→";
|
|
||||||
case BACKWARD -> "←";
|
|
||||||
case BIDIRECTIONAL -> "⇆";
|
|
||||||
case NONE -> "-";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDescription() {
|
|
||||||
return switch (this) {
|
|
||||||
case BACKWARD -> "Backward Link";
|
|
||||||
case FOWARD -> "Forward Link";
|
|
||||||
case BIDIRECTIONAL -> "Mutual Link";
|
|
||||||
case NONE -> "No Link";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
}
|
@ -20,8 +20,9 @@ import java.util.stream.Collectors;
|
|||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class Suggestions {
|
public class Suggestions {
|
||||||
private final PatriciaTrie<String> suggestionsTrie;
|
private PatriciaTrie<String> suggestionsTrie = null;
|
||||||
private final TermFrequencyDict termFrequencyDict;
|
private TermFrequencyDict termFrequencyDict = null;
|
||||||
|
private volatile boolean ready = false;
|
||||||
private final SpellChecker spellChecker;
|
private final SpellChecker spellChecker;
|
||||||
|
|
||||||
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
|
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
|
||||||
@ -35,10 +36,12 @@ public class Suggestions {
|
|||||||
) {
|
) {
|
||||||
this.spellChecker = spellChecker;
|
this.spellChecker = spellChecker;
|
||||||
|
|
||||||
|
Thread.ofPlatform().start(() -> {
|
||||||
suggestionsTrie = loadSuggestions(suggestionsFile);
|
suggestionsTrie = loadSuggestions(suggestionsFile);
|
||||||
termFrequencyDict = dict;
|
termFrequencyDict = dict;
|
||||||
|
ready = true;
|
||||||
logger.info("Loaded {} suggestions", suggestionsTrie.size());
|
logger.info("Loaded {} suggestions", suggestionsTrie.size());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static PatriciaTrie<String> loadSuggestions(Path file) {
|
private static PatriciaTrie<String> loadSuggestions(Path file) {
|
||||||
@ -71,6 +74,9 @@ public class Suggestions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getSuggestions(int count, String searchWord) {
|
public List<String> getSuggestions(int count, String searchWord) {
|
||||||
|
if (!ready)
|
||||||
|
return Collections.emptyList();
|
||||||
|
|
||||||
if (searchWord.length() < MIN_SUGGEST_LENGTH) {
|
if (searchWord.length() < MIN_SUGGEST_LENGTH) {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
@ -126,6 +132,9 @@ public class Suggestions {
|
|||||||
|
|
||||||
|
|
||||||
public Stream<String> getSuggestionsForKeyword(int count, String prefix) {
|
public Stream<String> getSuggestionsForKeyword(int count, String prefix) {
|
||||||
|
if (!ready)
|
||||||
|
return Stream.empty();
|
||||||
|
|
||||||
if (prefix.length() < MIN_SUGGEST_LENGTH) {
|
if (prefix.length() < MIN_SUGGEST_LENGTH) {
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user