mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(search) Move site information out of the search service and into assistant.
This reduces the impact of restarting the search service, as the site information takes a few minutes to load during which it's not available. It also permits exposing this information via API in the future if there is interest in this. The assistant service was also modified to do a late load of the suggestions trie, as this is a major contributor to its start-up time. Finally, some changes were made to the client library, a new get() method was added that takes a TypeToken to allow deserialization of generics such as List<Foo>, and the scheduler was also modified to use virtual threads.
This commit is contained in:
parent
5c46af0edb
commit
8ef34883a8
@ -1,12 +1,14 @@
|
||||
package nu.marginalia.assistant.client;
|
||||
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.assistant.client.model.DictionaryResponse;
|
||||
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
@ -14,6 +16,7 @@ import nu.marginalia.client.Context;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
@ -59,4 +62,31 @@ public class AssistantClient extends AbstractDynamicClient {
|
||||
return Observable.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public Observable<ArrayList<SimilarDomain>> similarDomains(Context ctx, int domainId, int count) {
|
||||
try {
|
||||
return super.get(ctx, 0, STR."/domain/\{domainId}/similar?count=\{count}", new TypeToken<ArrayList<SimilarDomain>>() {});
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public Observable<ArrayList<SimilarDomain>> linkedDomains(Context ctx, int domainId, int count) {
|
||||
try {
|
||||
return super.get(ctx, 0, STR."/domain/\{domainId}/linking?count=\{count}", new TypeToken<ArrayList<SimilarDomain>>() {});
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public Observable<DomainInformation> domainInformation(Context ctx, int domainId) {
|
||||
try {
|
||||
return super.get(ctx, 0, STR."/domain/\{domainId}/info", DomainInformation.class);
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.model;
|
||||
package nu.marginalia.assistant.client.model;
|
||||
|
||||
import lombok.*;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
@ -24,5 +24,4 @@ public class DomainInformation {
|
||||
boolean unknownDomain;
|
||||
|
||||
String state;
|
||||
List<EdgeDomain> linkingDomains;
|
||||
}
|
@ -0,0 +1,69 @@
|
||||
package nu.marginalia.assistant.client.model;
|
||||
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
public record SimilarDomain(EdgeUrl url,
|
||||
int domainId,
|
||||
double relatedness,
|
||||
double rank,
|
||||
boolean indexed,
|
||||
boolean active,
|
||||
boolean screenshot,
|
||||
LinkType linkType) {
|
||||
|
||||
public String getRankSymbols() {
|
||||
if (rank > 90) {
|
||||
return "★★★★★";
|
||||
}
|
||||
if (rank > 70) {
|
||||
return "★★★★";
|
||||
}
|
||||
if (rank > 50) {
|
||||
return "★★★";
|
||||
}
|
||||
if (rank > 30) {
|
||||
return "★★";
|
||||
}
|
||||
if (rank > 10) {
|
||||
return "★";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public enum LinkType {
|
||||
BACKWARD,
|
||||
FOWARD,
|
||||
BIDIRECTIONAL,
|
||||
NONE;
|
||||
|
||||
public static LinkType find(boolean linkStod,
|
||||
boolean linkDtos) {
|
||||
if (linkDtos && linkStod)
|
||||
return BIDIRECTIONAL;
|
||||
if (linkDtos)
|
||||
return FOWARD;
|
||||
if (linkStod)
|
||||
return BACKWARD;
|
||||
|
||||
return NONE;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return switch (this) {
|
||||
case FOWARD -> "→";
|
||||
case BACKWARD -> "←";
|
||||
case BIDIRECTIONAL -> "⇆";
|
||||
case NONE -> "-";
|
||||
};
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return switch (this) {
|
||||
case BACKWARD -> "Backward Link";
|
||||
case FOWARD -> "Forward Link";
|
||||
case BIDIRECTIONAL -> "Mutual Link";
|
||||
case NONE -> "No Link";
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import io.reactivex.rxjava3.core.Scheduler;
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import org.slf4j.Logger;
|
||||
@ -10,26 +9,16 @@ import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
||||
public class AbortingScheduler {
|
||||
private final ThreadFactory threadFactory;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@Nullable
|
||||
private ExecutorService executorService;
|
||||
|
||||
public AbortingScheduler(String name) {
|
||||
threadFactory = new ThreadFactoryBuilder()
|
||||
.setNameFormat(name+"client--%d")
|
||||
.setUncaughtExceptionHandler(this::handleException)
|
||||
.build();
|
||||
public AbortingScheduler() {
|
||||
}
|
||||
|
||||
private void handleException(Thread thread, Throwable throwable) {
|
||||
logger.error("Uncaught exception during Client IO in thread {}", thread.getName(), throwable);
|
||||
}
|
||||
|
||||
public synchronized Scheduler get() {
|
||||
return Schedulers.from(getExecutorService(),
|
||||
@ -40,14 +29,14 @@ public class AbortingScheduler {
|
||||
public synchronized void abort() {
|
||||
if (null != executorService) {
|
||||
executorService.shutdownNow();
|
||||
executorService = Executors.newFixedThreadPool(16, threadFactory);
|
||||
executorService = Executors.newVirtualThreadPerTaskExecutor();
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private synchronized ExecutorService getExecutorService() {
|
||||
if (null == executorService) {
|
||||
executorService = Executors.newFixedThreadPool(16, threadFactory);
|
||||
executorService = Executors.newVirtualThreadPerTaskExecutor();
|
||||
}
|
||||
return executorService;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import com.google.protobuf.GeneratedMessageV3;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import io.reactivex.rxjava3.core.ObservableSource;
|
||||
@ -20,6 +21,7 @@ import org.slf4j.LoggerFactory;
|
||||
import spark.utils.IOUtils;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Type;
|
||||
import java.net.ConnectException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Supplier;
|
||||
@ -233,6 +235,22 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||
}
|
||||
|
||||
protected synchronized <T> Observable<T> get(Context ctx, int node, String endpoint, TypeToken<T> type) {
|
||||
ensureAlive(node);
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build();
|
||||
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
.map(this::logOutbound)
|
||||
.map(rsp -> validateResponseStatus(rsp, req, 200))
|
||||
.map(rsp -> getEntity(rsp, type))
|
||||
.retryWhen(this::retryHandler)
|
||||
.timeout(timeout, TimeUnit.SECONDS)
|
||||
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||
}
|
||||
protected synchronized Observable<Integer> get(Context ctx, int node, String endpoint, OutputStream outputStream) {
|
||||
ensureAlive(node);
|
||||
|
||||
@ -388,6 +406,15 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
@SneakyThrows
|
||||
private <T> T getEntity(Response response, TypeToken<T> clazz) {
|
||||
try (response) {
|
||||
return gson.fromJson(response.body().charStream(), clazz);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
@SneakyThrows
|
||||
private String getText(Response response) {
|
||||
try (response) {
|
||||
return response.body().string();
|
||||
|
@ -1,8 +1,6 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.client.route.RouteProvider;
|
||||
import nu.marginalia.client.route.ServiceRoute;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
@ -20,7 +18,7 @@ public class AbstractDynamicClient extends AbstractClient {
|
||||
);
|
||||
|
||||
this.service = service;
|
||||
this.scheduler = new AbortingScheduler(name());
|
||||
this.scheduler = new AbortingScheduler();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -42,7 +42,7 @@ public class AbstractClientTest {
|
||||
client = new AbstractClient(new RouteProvider(new ServiceDescriptor(ServiceId.Api, "localhost")), 1, Gson::new) {
|
||||
@Override
|
||||
public AbortingScheduler scheduler() {
|
||||
return new AbortingScheduler(name());
|
||||
return new AbortingScheduler();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1,16 +1,16 @@
|
||||
package nu.marginalia.search.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.assistant.client.AssistantClient;
|
||||
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.search.SearchOperator;
|
||||
import nu.marginalia.search.model.DomainInformation;
|
||||
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.siteinfo.DomainInformationService;
|
||||
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
@ -23,22 +23,19 @@ import java.util.Map;
|
||||
public class SearchSiteInfoService {
|
||||
|
||||
private final SearchOperator searchOperator;
|
||||
private final SimilarDomainsService similarDomains;
|
||||
private final DomainInformationService domainInformationService;
|
||||
private final AssistantClient assistantClient;
|
||||
private final SearchFlagSiteService flagSiteService;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final MustacheRenderer<Object> renderer;
|
||||
|
||||
@Inject
|
||||
public SearchSiteInfoService(SearchOperator searchOperator,
|
||||
SimilarDomainsService similarDomains,
|
||||
DomainInformationService domainInformationService,
|
||||
AssistantClient assistantClient,
|
||||
RendererFactory rendererFactory,
|
||||
SearchFlagSiteService flagSiteService,
|
||||
DbDomainQueries domainQueries) throws IOException {
|
||||
this.searchOperator = searchOperator;
|
||||
this.similarDomains = similarDomains;
|
||||
this.domainInformationService = domainInformationService;
|
||||
this.assistantClient = assistantClient;
|
||||
this.flagSiteService = flagSiteService;
|
||||
this.domainQueries = domainQueries;
|
||||
|
||||
@ -108,13 +105,6 @@ public class SearchSiteInfoService {
|
||||
false);
|
||||
}
|
||||
|
||||
private DomainInformation dummyInformation(String domainName) {
|
||||
return DomainInformation.builder()
|
||||
.domain(new EdgeDomain(domainName))
|
||||
.suggestForCrawling(true)
|
||||
.unknownDomain(true)
|
||||
.build();
|
||||
}
|
||||
|
||||
private Backlinks listLinks(Context ctx, String domainName) {
|
||||
return new Backlinks(domainName,
|
||||
@ -126,13 +116,24 @@ public class SearchSiteInfoService {
|
||||
|
||||
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
|
||||
|
||||
final DomainInformation domainInfo = domainInformationService.domainInfo(domainName)
|
||||
.orElseGet(() -> dummyInformation(domainName));
|
||||
final DomainInformation domainInfo;
|
||||
final List<SimilarDomain> similarSet;
|
||||
final List<SimilarDomain> linkingDomains;
|
||||
|
||||
final List<SimilarDomainsService.SimilarDomain> similarSet =
|
||||
similarDomains.getSimilarDomains(domainId, 100);
|
||||
final List<SimilarDomainsService.SimilarDomain> linkingDomains =
|
||||
similarDomains.getLinkingDomains(domainId, 100);
|
||||
if (domainId < 0 || !assistantClient.isAccepting()) {
|
||||
domainInfo = createDummySiteInfo(domainName);
|
||||
similarSet = List.of();
|
||||
linkingDomains = List.of();
|
||||
}
|
||||
else {
|
||||
domainInfo = assistantClient.domainInformation(ctx, domainId).blockingFirst();
|
||||
similarSet = assistantClient
|
||||
.similarDomains(ctx, domainId, 100)
|
||||
.blockingFirst();
|
||||
linkingDomains = assistantClient
|
||||
.linkedDomains(ctx, domainId, 100)
|
||||
.blockingFirst();
|
||||
}
|
||||
|
||||
return new SiteInfoWithContext(domainName,
|
||||
domainId,
|
||||
@ -141,6 +142,15 @@ public class SearchSiteInfoService {
|
||||
linkingDomains
|
||||
);
|
||||
}
|
||||
|
||||
private DomainInformation createDummySiteInfo(String domainName) {
|
||||
return DomainInformation.builder()
|
||||
.domain(new EdgeDomain(domainName))
|
||||
.suggestForCrawling(true)
|
||||
.unknownDomain(true)
|
||||
.build();
|
||||
}
|
||||
|
||||
private Docs listDocs(Context ctx, String domainName) {
|
||||
return new Docs(domainName,
|
||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||
@ -181,13 +191,13 @@ public class SearchSiteInfoService {
|
||||
String domain,
|
||||
long domainId,
|
||||
DomainInformation domainInformation,
|
||||
List<SimilarDomainsService.SimilarDomain> similar,
|
||||
List<SimilarDomainsService.SimilarDomain> linking) {
|
||||
List<SimilarDomain> similar,
|
||||
List<SimilarDomain> linking) {
|
||||
public SiteInfoWithContext(String domain,
|
||||
long domainId,
|
||||
DomainInformation domainInformation,
|
||||
List<SimilarDomainsService.SimilarDomain> similar,
|
||||
List<SimilarDomainsService.SimilarDomain> linking
|
||||
List<SimilarDomain> similar,
|
||||
List<SimilarDomain> linking
|
||||
)
|
||||
{
|
||||
this(Map.of("info", true),
|
||||
|
@ -27,6 +27,7 @@ dependencies {
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
|
||||
|
@ -3,6 +3,8 @@ package nu.marginalia.assistant;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.assistant.domains.DomainInformationService;
|
||||
import nu.marginalia.assistant.domains.SimilarDomainsService;
|
||||
import nu.marginalia.assistant.eval.Units;
|
||||
import nu.marginalia.assistant.suggest.Suggestions;
|
||||
import nu.marginalia.assistant.eval.MathParser;
|
||||
@ -16,11 +18,16 @@ import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Objects;
|
||||
|
||||
public class AssistantService extends Service {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Gson gson = GsonFactory.get();
|
||||
private final Units units;
|
||||
private final MathParser mathParser;
|
||||
private final SimilarDomainsService similarDomainsService;
|
||||
private final DomainInformationService domainInformationService;
|
||||
private final Suggestions suggestions;
|
||||
|
||||
@SneakyThrows
|
||||
@ -30,12 +37,16 @@ public class AssistantService extends Service {
|
||||
MathParser mathParser,
|
||||
Units units,
|
||||
ScreenshotService screenshotService,
|
||||
SimilarDomainsService similarDomainsService,
|
||||
DomainInformationService domainInformationService,
|
||||
Suggestions suggestions)
|
||||
{
|
||||
super(params);
|
||||
|
||||
this.mathParser = mathParser;
|
||||
this.units = units;
|
||||
this.similarDomainsService = similarDomainsService;
|
||||
this.domainInformationService = domainInformationService;
|
||||
this.suggestions = suggestions;
|
||||
|
||||
Spark.staticFiles.expireTime(600);
|
||||
@ -56,12 +67,50 @@ public class AssistantService extends Service {
|
||||
rsp,
|
||||
req.queryParams("value")
|
||||
));
|
||||
|
||||
Spark.get("/domain/:id/similar", this::getSimilarDomains, this::convertToJson);
|
||||
Spark.get("/domain/:id/linking", this::getLinkingDomains, this::convertToJson);
|
||||
Spark.get("/domain/:id/info", this::getDomainInformation, this::convertToJson);
|
||||
Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson);
|
||||
|
||||
Spark.awaitInitialization();
|
||||
}
|
||||
|
||||
private Object getSimilarDomains(Request request, Response response) {
|
||||
int domainId = Integer.parseInt(request.params("id"));
|
||||
int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25"));
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
if (!similarDomainsService.isReady()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return similarDomainsService.getSimilarDomains(domainId, count);
|
||||
}
|
||||
|
||||
private Object getLinkingDomains(Request request, Response response) {
|
||||
int domainId = Integer.parseInt(request.params("id"));
|
||||
int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25"));
|
||||
|
||||
response.type("application/json");
|
||||
if (!similarDomainsService.isReady()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return similarDomainsService.getLinkingDomains(domainId, count);
|
||||
}
|
||||
|
||||
private Object getDomainInformation(Request request, Response response) {
|
||||
int domainId = Integer.parseInt(request.params("id"));
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
var maybeDomainInfo = domainInformationService.domainInfo(domainId);
|
||||
if (maybeDomainInfo.isEmpty()) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
return maybeDomainInfo.get();
|
||||
}
|
||||
|
||||
private Object getSuggestions(Request request, Response response) {
|
||||
response.type("application/json");
|
||||
var param = request.queryParams("partial");
|
||||
|
@ -1,11 +1,11 @@
|
||||
package nu.marginalia.search.siteinfo;
|
||||
package nu.marginalia.assistant.domains;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.search.model.DomainInformation;
|
||||
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -36,13 +36,7 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
|
||||
public Optional<DomainInformation> domainInfo(String site) {
|
||||
|
||||
OptionalInt maybeDomainId = getDomainFromPartial(site);
|
||||
if (maybeDomainId.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
int domainId = maybeDomainId.getAsInt();
|
||||
public Optional<DomainInformation> domainInfo(int domainId) {
|
||||
|
||||
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
||||
if (domain.isEmpty()) {
|
||||
@ -61,7 +55,6 @@ public class DomainInformationService {
|
||||
double rank = Math.round(10000.0*(1.0-getRank(domainId)))/100;
|
||||
|
||||
DomainIndexingState state = getDomainState(domainId);
|
||||
List<EdgeDomain> linkingDomains = getLinkingDomains(domainId);
|
||||
|
||||
var di = DomainInformation.builder()
|
||||
.domain(domain.get())
|
||||
@ -73,7 +66,6 @@ public class DomainInformationService {
|
||||
.outboundLinks(outboundLinks)
|
||||
.ranking(rank)
|
||||
.state(state.desc)
|
||||
.linkingDomains(linkingDomains)
|
||||
.inCrawlQueue(inCrawlQueue)
|
||||
.nodeAffinity(nodeAffinity)
|
||||
.suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue))
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.svc;
|
||||
package nu.marginalia.assistant.domains;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
@ -6,11 +6,10 @@ import gnu.trove.list.TIntList;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import gnu.trove.map.hash.TIntDoubleHashMap;
|
||||
import gnu.trove.map.hash.TIntIntHashMap;
|
||||
import gnu.trove.map.hash.TLongDoubleHashMap;
|
||||
import gnu.trove.set.TIntSet;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -40,6 +39,8 @@ public class SimilarDomainsService {
|
||||
public volatile double[] domainRanks = null;
|
||||
public volatile String[] domainNames = null;
|
||||
|
||||
volatile boolean isReady = false;
|
||||
|
||||
@Inject
|
||||
public SimilarDomainsService(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
@ -167,6 +168,7 @@ public class SimilarDomainsService {
|
||||
|
||||
logger.info("Loaded {} domains", domainRanks.length);
|
||||
logger.info("All done!");
|
||||
isReady = true;
|
||||
}
|
||||
}
|
||||
catch (SQLException throwables) {
|
||||
@ -174,7 +176,11 @@ public class SimilarDomainsService {
|
||||
}
|
||||
}
|
||||
|
||||
double getRelatedness(int a, int b) {
|
||||
public boolean isReady() {
|
||||
return isReady;
|
||||
}
|
||||
|
||||
private double getRelatedness(int a, int b) {
|
||||
int lowerIndex = Math.min(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
||||
int higherIndex = Math.max(domainIdToIdx.get(a), domainIdToIdx.get(b));
|
||||
|
||||
@ -233,14 +239,14 @@ public class SimilarDomainsService {
|
||||
indexedDomains.get(idx),
|
||||
activeDomains.get(idx),
|
||||
screenshotDomains.get(idx),
|
||||
LinkType.find(
|
||||
SimilarDomain.LinkType.find(
|
||||
linkingIdsStoD.contains(idx),
|
||||
linkingIdsDtoS.contains(idx)
|
||||
)
|
||||
));
|
||||
}
|
||||
|
||||
domains.removeIf(d -> d.url.domain.toString().length() > 32);
|
||||
domains.removeIf(d -> d.url().domain.toString().length() > 32);
|
||||
|
||||
return domains;
|
||||
}
|
||||
@ -319,84 +325,16 @@ public class SimilarDomainsService {
|
||||
indexedDomains.get(idx),
|
||||
activeDomains.get(idx),
|
||||
screenshotDomains.get(idx),
|
||||
LinkType.find(
|
||||
SimilarDomain.LinkType.find(
|
||||
linkingIdsStoD.contains(idx),
|
||||
linkingIdsDtoS.contains(idx)
|
||||
)
|
||||
));
|
||||
}
|
||||
|
||||
domains.removeIf(d -> d.url.domain.toString().length() > 32);
|
||||
domains.removeIf(d -> d.url().domain.toString().length() > 32);
|
||||
|
||||
return domains;
|
||||
}
|
||||
|
||||
public record SimilarDomain(EdgeUrl url,
|
||||
int domainId,
|
||||
double relatedness,
|
||||
double rank,
|
||||
boolean indexed,
|
||||
boolean active,
|
||||
boolean screenshot,
|
||||
LinkType linkType)
|
||||
{
|
||||
|
||||
public String getRankSymbols() {
|
||||
if (rank > 90) {
|
||||
return "★★★★★";
|
||||
}
|
||||
if (rank > 70) {
|
||||
return "★★★★";
|
||||
}
|
||||
if (rank > 50) {
|
||||
return "★★★";
|
||||
}
|
||||
if (rank > 30) {
|
||||
return "★★";
|
||||
}
|
||||
if (rank > 10) {
|
||||
return "★";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
enum LinkType {
|
||||
BACKWARD,
|
||||
FOWARD,
|
||||
BIDIRECTIONAL,
|
||||
NONE;
|
||||
|
||||
public static LinkType find(boolean linkStod,
|
||||
boolean linkDtos)
|
||||
{
|
||||
if (linkDtos && linkStod)
|
||||
return BIDIRECTIONAL;
|
||||
if (linkDtos)
|
||||
return FOWARD;
|
||||
if (linkStod)
|
||||
return BACKWARD;
|
||||
|
||||
return NONE;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return switch (this) {
|
||||
case FOWARD -> "→";
|
||||
case BACKWARD -> "←";
|
||||
case BIDIRECTIONAL -> "⇆";
|
||||
case NONE -> "-";
|
||||
};
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return switch (this) {
|
||||
case BACKWARD -> "Backward Link";
|
||||
case FOWARD -> "Forward Link";
|
||||
case BIDIRECTIONAL -> "Mutual Link";
|
||||
case NONE -> "No Link";
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -20,8 +20,9 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class Suggestions {
|
||||
private final PatriciaTrie<String> suggestionsTrie;
|
||||
private final TermFrequencyDict termFrequencyDict;
|
||||
private PatriciaTrie<String> suggestionsTrie = null;
|
||||
private TermFrequencyDict termFrequencyDict = null;
|
||||
private volatile boolean ready = false;
|
||||
private final SpellChecker spellChecker;
|
||||
|
||||
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
|
||||
@ -35,10 +36,12 @@ public class Suggestions {
|
||||
) {
|
||||
this.spellChecker = spellChecker;
|
||||
|
||||
suggestionsTrie = loadSuggestions(suggestionsFile);
|
||||
termFrequencyDict = dict;
|
||||
|
||||
logger.info("Loaded {} suggestions", suggestionsTrie.size());
|
||||
Thread.ofPlatform().start(() -> {
|
||||
suggestionsTrie = loadSuggestions(suggestionsFile);
|
||||
termFrequencyDict = dict;
|
||||
ready = true;
|
||||
logger.info("Loaded {} suggestions", suggestionsTrie.size());
|
||||
});
|
||||
}
|
||||
|
||||
private static PatriciaTrie<String> loadSuggestions(Path file) {
|
||||
@ -71,6 +74,9 @@ public class Suggestions {
|
||||
}
|
||||
|
||||
public List<String> getSuggestions(int count, String searchWord) {
|
||||
if (!ready)
|
||||
return Collections.emptyList();
|
||||
|
||||
if (searchWord.length() < MIN_SUGGEST_LENGTH) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
@ -126,6 +132,9 @@ public class Suggestions {
|
||||
|
||||
|
||||
public Stream<String> getSuggestionsForKeyword(int count, String prefix) {
|
||||
if (!ready)
|
||||
return Stream.empty();
|
||||
|
||||
if (prefix.length() < MIN_SUGGEST_LENGTH) {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user