mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Merge pull request #127 from MarginaliaSearch/serp-redesign
Web UI redesign
This commit is contained in:
commit
be6382e0d0
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,3 +7,4 @@ build/
|
|||||||
lombok.config
|
lombok.config
|
||||||
Dockerfile
|
Dockerfile
|
||||||
run
|
run
|
||||||
|
jte-classes
|
@ -48,6 +48,7 @@ ext {
|
|||||||
dockerImageTag='latest'
|
dockerImageTag='latest'
|
||||||
dockerImageRegistry='marginalia'
|
dockerImageRegistry='marginalia'
|
||||||
jibVersion = '3.4.3'
|
jibVersion = '3.4.3'
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
idea {
|
idea {
|
||||||
|
@ -28,7 +28,7 @@ public class DbDomainQueries {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Integer getDomainId(EdgeDomain domain) {
|
public Integer getDomainId(EdgeDomain domain) throws NoSuchElementException {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
return domainIdCache.get(domain, () -> {
|
return domainIdCache.get(domain, () -> {
|
||||||
@ -42,6 +42,9 @@ public class DbDomainQueries {
|
|||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
catch (UncheckedExecutionException ex) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
catch (ExecutionException ex) {
|
catch (ExecutionException ex) {
|
||||||
throw new RuntimeException(ex.getCause());
|
throw new RuntimeException(ex.getCause());
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,12 @@ dependencies {
|
|||||||
implementation libs.bundles.curator
|
implementation libs.bundles.curator
|
||||||
implementation libs.bundles.flyway
|
implementation libs.bundles.flyway
|
||||||
|
|
||||||
|
libs.bundles.jooby.get().each {
|
||||||
|
implementation dependencies.create(it) {
|
||||||
|
exclude group: 'org.slf4j'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
testImplementation libs.bundles.slf4j.test
|
testImplementation libs.bundles.slf4j.test
|
||||||
implementation libs.bundles.mariadb
|
implementation libs.bundles.mariadb
|
||||||
|
|
||||||
|
@ -0,0 +1,178 @@
|
|||||||
|
package nu.marginalia.service.server;
|
||||||
|
|
||||||
|
import io.jooby.*;
|
||||||
|
import io.prometheus.client.Counter;
|
||||||
|
import nu.marginalia.mq.inbox.MqInboxIf;
|
||||||
|
import nu.marginalia.service.client.ServiceNotAvailableException;
|
||||||
|
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||||
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import nu.marginalia.service.server.jte.JteModule;
|
||||||
|
import nu.marginalia.service.server.mq.ServiceMqSubscription;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.slf4j.Marker;
|
||||||
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class JoobyService {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
// Marker for filtering out sensitive content from the persistent logs
|
||||||
|
private final Marker httpMarker = MarkerFactory.getMarker("HTTP");
|
||||||
|
|
||||||
|
private final Initialization initialization;
|
||||||
|
|
||||||
|
private final static Counter request_counter = Counter.build("wmsa_request_counter", "Request Counter")
|
||||||
|
.labelNames("service", "node")
|
||||||
|
.register();
|
||||||
|
private final static Counter request_counter_good = Counter.build("wmsa_request_counter_good", "Good Requests")
|
||||||
|
.labelNames("service", "node")
|
||||||
|
.register();
|
||||||
|
private final static Counter request_counter_bad = Counter.build("wmsa_request_counter_bad", "Bad Requests")
|
||||||
|
.labelNames("service", "node")
|
||||||
|
.register();
|
||||||
|
private final static Counter request_counter_err = Counter.build("wmsa_request_counter_err", "Error Requests")
|
||||||
|
.labelNames("service", "node")
|
||||||
|
.register();
|
||||||
|
private final String serviceName;
|
||||||
|
private static volatile boolean initialized = false;
|
||||||
|
|
||||||
|
protected final MqInboxIf messageQueueInbox;
|
||||||
|
private final int node;
|
||||||
|
private GrpcServer grpcServer;
|
||||||
|
|
||||||
|
private ServiceConfiguration config;
|
||||||
|
private final List<MvcExtension> joobyServices;
|
||||||
|
private final ServiceEndpoint restEndpoint;
|
||||||
|
|
||||||
|
public JoobyService(BaseServiceParams params,
|
||||||
|
ServicePartition partition,
|
||||||
|
List<DiscoverableService> grpcServices,
|
||||||
|
List<MvcExtension> joobyServices
|
||||||
|
) throws Exception {
|
||||||
|
|
||||||
|
this.joobyServices = joobyServices;
|
||||||
|
this.initialization = params.initialization;
|
||||||
|
config = params.configuration;
|
||||||
|
node = config.node();
|
||||||
|
|
||||||
|
String inboxName = config.serviceName();
|
||||||
|
logger.info("Inbox name: {}", inboxName);
|
||||||
|
|
||||||
|
var serviceRegistry = params.serviceRegistry;
|
||||||
|
|
||||||
|
restEndpoint = serviceRegistry.registerService(ServiceKey.forRest(config.serviceId(), config.node()),
|
||||||
|
config.instanceUuid(), config.externalAddress());
|
||||||
|
|
||||||
|
var mqInboxFactory = params.messageQueueInboxFactory;
|
||||||
|
messageQueueInbox = mqInboxFactory.createSynchronousInbox(inboxName, config.node(), config.instanceUuid());
|
||||||
|
messageQueueInbox.subscribe(new ServiceMqSubscription(this));
|
||||||
|
|
||||||
|
serviceName = System.getProperty("service-name");
|
||||||
|
|
||||||
|
initialization.addCallback(params.heartbeat::start);
|
||||||
|
initialization.addCallback(messageQueueInbox::start);
|
||||||
|
initialization.addCallback(() -> params.eventLog.logEvent("SVC-INIT", serviceName + ":" + config.node()));
|
||||||
|
initialization.addCallback(() -> serviceRegistry.announceInstance(config.instanceUuid()));
|
||||||
|
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler((t, e) -> {
|
||||||
|
if (e instanceof ServiceNotAvailableException) {
|
||||||
|
// reduce log spam for this common case
|
||||||
|
logger.error("Service not available: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.error("Uncaught exception", e);
|
||||||
|
}
|
||||||
|
request_counter_err.labels(serviceName, Integer.toString(node)).inc();
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!initialization.isReady() && ! initialized ) {
|
||||||
|
initialized = true;
|
||||||
|
grpcServer = new GrpcServer(config, serviceRegistry, partition, grpcServices);
|
||||||
|
grpcServer.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startJooby(Jooby jooby) {
|
||||||
|
|
||||||
|
logger.info("{} Listening to {}:{} ({})", getClass().getSimpleName(),
|
||||||
|
restEndpoint.host(),
|
||||||
|
restEndpoint.port(),
|
||||||
|
config.externalAddress());
|
||||||
|
|
||||||
|
// FIXME: This won't work outside of docker, may need to submit a PR to jooby to allow classpaths here
|
||||||
|
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
|
||||||
|
jooby.assets("/*", Paths.get("/app/resources/static"));
|
||||||
|
|
||||||
|
var options = new ServerOptions();
|
||||||
|
options.setHost(config.bindAddress());
|
||||||
|
options.setPort(restEndpoint.port());
|
||||||
|
|
||||||
|
// Enable gzip compression of response data, but set compression to the lowest level
|
||||||
|
// since it doesn't really save much more space to dial it up. It's typically a
|
||||||
|
// single digit percentage difference since HTML already compresses very well with level = 1.
|
||||||
|
options.setCompressionLevel(1);
|
||||||
|
|
||||||
|
|
||||||
|
jooby.setServerOptions(options);
|
||||||
|
|
||||||
|
jooby.get("/internal/ping", ctx -> "pong");
|
||||||
|
jooby.get("/internal/started", this::isInitialized);
|
||||||
|
jooby.get("/internal/ready", this::isReady);
|
||||||
|
|
||||||
|
for (var service : joobyServices) {
|
||||||
|
jooby.mvc(service);
|
||||||
|
}
|
||||||
|
|
||||||
|
jooby.before(this::auditRequestIn);
|
||||||
|
jooby.after(this::auditRequestOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object isInitialized(Context ctx) {
|
||||||
|
if (initialization.isReady()) {
|
||||||
|
return "ok";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ctx.setResponseCode(StatusCode.FAILED_DEPENDENCY_CODE);
|
||||||
|
return "bad";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isReady() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String isReady(Context ctx) {
|
||||||
|
if (isReady()) {
|
||||||
|
return "ok";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ctx.setResponseCode(StatusCode.FAILED_DEPENDENCY_CODE);
|
||||||
|
return "bad";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditRequestIn(Context ctx) {
|
||||||
|
request_counter.labels(serviceName, Integer.toString(node)).inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditRequestOut(Context ctx, Object result, Throwable failure) {
|
||||||
|
if (ctx.getResponseCode().value() < 400) {
|
||||||
|
request_counter_good.labels(serviceName, Integer.toString(node)).inc();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
request_counter_bad.labels(serviceName, Integer.toString(node)).inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (failure != null) {
|
||||||
|
logger.error("Request failed " + ctx.getMethod() + " " + ctx.getRequestURL(), failure);
|
||||||
|
request_counter_err.labels(serviceName, Integer.toString(node)).inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -16,7 +16,7 @@ import spark.Spark;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class Service {
|
public class SparkService {
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
// Marker for filtering out sensitive content from the persistent logs
|
// Marker for filtering out sensitive content from the persistent logs
|
||||||
@ -43,7 +43,7 @@ public class Service {
|
|||||||
private final int node;
|
private final int node;
|
||||||
private GrpcServer grpcServer;
|
private GrpcServer grpcServer;
|
||||||
|
|
||||||
public Service(BaseServiceParams params,
|
public SparkService(BaseServiceParams params,
|
||||||
Runnable configureStaticFiles,
|
Runnable configureStaticFiles,
|
||||||
ServicePartition partition,
|
ServicePartition partition,
|
||||||
List<DiscoverableService> grpcServices) throws Exception {
|
List<DiscoverableService> grpcServices) throws Exception {
|
||||||
@ -126,18 +126,18 @@ public class Service {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Service(BaseServiceParams params,
|
public SparkService(BaseServiceParams params,
|
||||||
ServicePartition partition,
|
ServicePartition partition,
|
||||||
List<DiscoverableService> grpcServices) throws Exception {
|
List<DiscoverableService> grpcServices) throws Exception {
|
||||||
this(params,
|
this(params,
|
||||||
Service::defaultSparkConfig,
|
SparkService::defaultSparkConfig,
|
||||||
partition,
|
partition,
|
||||||
grpcServices);
|
grpcServices);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Service(BaseServiceParams params) throws Exception {
|
public SparkService(BaseServiceParams params) throws Exception {
|
||||||
this(params,
|
this(params,
|
||||||
Service::defaultSparkConfig,
|
SparkService::defaultSparkConfig,
|
||||||
ServicePartition.any(),
|
ServicePartition.any(),
|
||||||
List.of());
|
List.of());
|
||||||
}
|
}
|
@ -0,0 +1,61 @@
|
|||||||
|
package nu.marginalia.service.server.jte;
|
||||||
|
|
||||||
|
import edu.umd.cs.findbugs.annotations.NonNull;
|
||||||
|
import edu.umd.cs.findbugs.annotations.Nullable;
|
||||||
|
import gg.jte.ContentType;
|
||||||
|
import gg.jte.TemplateEngine;
|
||||||
|
import gg.jte.resolve.DirectoryCodeResolver;
|
||||||
|
import io.jooby.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
// Temporary workaround for a bug
|
||||||
|
// APL-2.0 https://github.com/jooby-project/jooby
|
||||||
|
public class JteModule implements Extension {
|
||||||
|
private Path sourceDirectory;
|
||||||
|
private Path classDirectory;
|
||||||
|
private TemplateEngine templateEngine;
|
||||||
|
|
||||||
|
public JteModule(@NonNull Path sourceDirectory, @NonNull Path classDirectory) {
|
||||||
|
this.sourceDirectory = (Path)Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||||
|
this.classDirectory = (Path)Objects.requireNonNull(classDirectory, "Class directory is required.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public JteModule(@NonNull Path sourceDirectory) {
|
||||||
|
this.sourceDirectory = (Path)Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public JteModule(@NonNull TemplateEngine templateEngine) {
|
||||||
|
this.templateEngine = (TemplateEngine)Objects.requireNonNull(templateEngine, "Template engine is required.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void install(@NonNull Jooby application) {
|
||||||
|
if (this.templateEngine == null) {
|
||||||
|
this.templateEngine = create(application.getEnvironment(), this.sourceDirectory, this.classDirectory);
|
||||||
|
}
|
||||||
|
|
||||||
|
ServiceRegistry services = application.getServices();
|
||||||
|
services.put(TemplateEngine.class, this.templateEngine);
|
||||||
|
application.encoder(MediaType.html, new JteTemplateEngine(this.templateEngine));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TemplateEngine create(@NonNull Environment environment, @NonNull Path sourceDirectory, @Nullable Path classDirectory) {
|
||||||
|
boolean dev = environment.isActive("dev", new String[]{"test"});
|
||||||
|
if (dev) {
|
||||||
|
Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||||
|
Path requiredClassDirectory = (Path)Optional.ofNullable(classDirectory).orElseGet(() -> sourceDirectory.resolve("jte-classes"));
|
||||||
|
TemplateEngine engine = TemplateEngine.create(new DirectoryCodeResolver(sourceDirectory), requiredClassDirectory, ContentType.Html, environment.getClassLoader());
|
||||||
|
Optional<List<String>> var10000 = Optional.ofNullable(System.getProperty("jooby.run.classpath")).map((it) -> it.split(File.pathSeparator)).map(Stream::of).map(Stream::toList);
|
||||||
|
Objects.requireNonNull(engine);
|
||||||
|
var10000.ifPresent(engine::setClassPath);
|
||||||
|
return engine;
|
||||||
|
} else {
|
||||||
|
return classDirectory == null ? TemplateEngine.createPrecompiled(ContentType.Html) : TemplateEngine.createPrecompiled(classDirectory, ContentType.Html);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,48 @@
|
|||||||
|
package nu.marginalia.service.server.jte;
|
||||||
|
|
||||||
|
import edu.umd.cs.findbugs.annotations.NonNull;
|
||||||
|
import gg.jte.TemplateEngine;
|
||||||
|
import io.jooby.Context;
|
||||||
|
import io.jooby.MapModelAndView;
|
||||||
|
import io.jooby.ModelAndView;
|
||||||
|
import io.jooby.buffer.DataBuffer;
|
||||||
|
import io.jooby.internal.jte.DataBufferOutput;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
// Temporary workaround for a bug
|
||||||
|
// APL-2.0 https://github.com/jooby-project/jooby
|
||||||
|
class JteTemplateEngine implements io.jooby.TemplateEngine {
|
||||||
|
private final TemplateEngine jte;
|
||||||
|
private final List<String> extensions;
|
||||||
|
|
||||||
|
public JteTemplateEngine(TemplateEngine jte) {
|
||||||
|
this.jte = jte;
|
||||||
|
this.extensions = List.of(".jte", ".kte");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@NonNull @Override
|
||||||
|
public List<String> extensions() {
|
||||||
|
return extensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataBuffer render(Context ctx, ModelAndView modelAndView) {
|
||||||
|
var buffer = ctx.getBufferFactory().allocateBuffer();
|
||||||
|
var output = new DataBufferOutput(buffer, StandardCharsets.UTF_8);
|
||||||
|
var attributes = ctx.getAttributes();
|
||||||
|
if (modelAndView instanceof MapModelAndView mapModelAndView) {
|
||||||
|
var mapModel = new HashMap<String, Object>();
|
||||||
|
mapModel.putAll(attributes);
|
||||||
|
mapModel.putAll(mapModelAndView.getModel());
|
||||||
|
jte.render(modelAndView.getView(), mapModel, output);
|
||||||
|
} else {
|
||||||
|
jte.render(modelAndView.getView(), modelAndView.getModel(), output);
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
}
|
@ -3,7 +3,6 @@ package nu.marginalia.service.server.mq;
|
|||||||
import nu.marginalia.mq.MqMessage;
|
import nu.marginalia.mq.MqMessage;
|
||||||
import nu.marginalia.mq.inbox.MqInboxResponse;
|
import nu.marginalia.mq.inbox.MqInboxResponse;
|
||||||
import nu.marginalia.mq.inbox.MqSubscription;
|
import nu.marginalia.mq.inbox.MqSubscription;
|
||||||
import nu.marginalia.service.server.Service;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -15,10 +14,10 @@ import java.util.Map;
|
|||||||
public class ServiceMqSubscription implements MqSubscription {
|
public class ServiceMqSubscription implements MqSubscription {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(ServiceMqSubscription.class);
|
private static final Logger logger = LoggerFactory.getLogger(ServiceMqSubscription.class);
|
||||||
private final Map<String, Method> requests = new HashMap<>();
|
private final Map<String, Method> requests = new HashMap<>();
|
||||||
private final Service service;
|
private final Object service;
|
||||||
|
|
||||||
|
|
||||||
public ServiceMqSubscription(Service service) {
|
public ServiceMqSubscription(Object service) {
|
||||||
this.service = service;
|
this.service = service;
|
||||||
|
|
||||||
/* Wire up all methods annotated with @MqRequest and @MqNotification
|
/* Wire up all methods annotated with @MqRequest and @MqNotification
|
||||||
|
@ -6,4 +6,8 @@ public record BrowseResultSet(Collection<BrowseResult> results, String focusDoma
|
|||||||
public BrowseResultSet(Collection<BrowseResult> results) {
|
public BrowseResultSet(Collection<BrowseResult> results) {
|
||||||
this(results, "");
|
this(results, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasFocusDomain() {
|
||||||
|
return focusDomain != null && !focusDomain.isBlank();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@ public class DomainsProtobufCodec {
|
|||||||
sd.getIndexed(),
|
sd.getIndexed(),
|
||||||
sd.getActive(),
|
sd.getActive(),
|
||||||
sd.getScreenshot(),
|
sd.getScreenshot(),
|
||||||
|
sd.getFeed(),
|
||||||
SimilarDomain.LinkType.valueOf(sd.getLinkType().name())
|
SimilarDomain.LinkType.valueOf(sd.getLinkType().name())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -71,6 +71,23 @@ public class DomainInformation {
|
|||||||
return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar));
|
return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getAsnFlag() {
|
||||||
|
if (asnCountry == null || asnCountry.codePointCount(0, asnCountry.length()) != 2) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
String country = asnCountry;
|
||||||
|
|
||||||
|
if ("UK".equals(country)) {
|
||||||
|
country = "GB";
|
||||||
|
}
|
||||||
|
|
||||||
|
int offset = 0x1F1E6;
|
||||||
|
int asciiOffset = 0x41;
|
||||||
|
int firstChar = Character.codePointAt(country, 0) - asciiOffset + offset;
|
||||||
|
int secondChar = Character.codePointAt(country, 1) - asciiOffset + offset;
|
||||||
|
return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar));
|
||||||
|
}
|
||||||
|
|
||||||
public EdgeDomain getDomain() {
|
public EdgeDomain getDomain() {
|
||||||
return this.domain;
|
return this.domain;
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ public record SimilarDomain(EdgeUrl url,
|
|||||||
boolean indexed,
|
boolean indexed,
|
||||||
boolean active,
|
boolean active,
|
||||||
boolean screenshot,
|
boolean screenshot,
|
||||||
|
boolean feed,
|
||||||
LinkType linkType) {
|
LinkType linkType) {
|
||||||
|
|
||||||
public String getRankSymbols() {
|
public String getRankSymbols() {
|
||||||
@ -52,12 +53,12 @@ public record SimilarDomain(EdgeUrl url,
|
|||||||
return NONE;
|
return NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String faIcon() {
|
||||||
return switch (this) {
|
return switch (this) {
|
||||||
case FOWARD -> "→";
|
case FOWARD -> "fa-solid fa-arrow-right";
|
||||||
case BACKWARD -> "←";
|
case BACKWARD -> "fa-solid fa-arrow-left";
|
||||||
case BIDIRECTIONAL -> "⇆";
|
case BIDIRECTIONAL -> "fa-solid fa-arrow-right-arrow-left";
|
||||||
case NONE -> "-";
|
case NONE -> "";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,4 +7,8 @@ public record DictionaryResponse(String word, List<DictionaryEntry> entries) {
|
|||||||
this.word = word;
|
this.word = word;
|
||||||
this.entries = entries.stream().toList(); // Make an immutable copy
|
this.entries = entries.stream().toList(); // Make an immutable copy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasEntries() {
|
||||||
|
return !entries.isEmpty();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ import nu.marginalia.api.svc.RateLimiterService;
|
|||||||
import nu.marginalia.api.svc.ResponseCache;
|
import nu.marginalia.api.svc.ResponseCache;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.Service;
|
import nu.marginalia.service.server.SparkService;
|
||||||
import nu.marginalia.service.server.mq.MqRequest;
|
import nu.marginalia.service.server.mq.MqRequest;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -21,7 +21,7 @@ import spark.Request;
|
|||||||
import spark.Response;
|
import spark.Response;
|
||||||
import spark.Spark;
|
import spark.Spark;
|
||||||
|
|
||||||
public class ApiService extends Service {
|
public class ApiService extends SparkService {
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final Gson gson = GsonFactory.get();
|
private final Gson gson = GsonFactory.get();
|
||||||
@ -69,7 +69,7 @@ public class ApiService extends Service {
|
|||||||
this.searchOperator = searchOperator;
|
this.searchOperator = searchOperator;
|
||||||
|
|
||||||
Spark.get("/api/", (rq, rsp) -> {
|
Spark.get("/api/", (rq, rsp) -> {
|
||||||
rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi");
|
rsp.redirect("https://about.marginalia-search.com/article/api/");
|
||||||
return "";
|
return "";
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import nu.marginalia.renderer.MustacheRenderer;
|
|||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.Service;
|
import nu.marginalia.service.server.SparkService;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
@ -18,7 +18,7 @@ import spark.Spark;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
public class DatingService extends Service {
|
public class DatingService extends SparkService {
|
||||||
private final DomainBlacklist blacklist;
|
private final DomainBlacklist blacklist;
|
||||||
private final DbBrowseDomainsSimilarCosine browseSimilarCosine;
|
private final DbBrowseDomainsSimilarCosine browseSimilarCosine;
|
||||||
private final DbBrowseDomainsRandom browseRandom;
|
private final DbBrowseDomainsRandom browseRandom;
|
||||||
|
@ -5,7 +5,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.Service;
|
import nu.marginalia.service.server.SparkService;
|
||||||
import nu.marginalia.service.server.StaticResources;
|
import nu.marginalia.service.server.StaticResources;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
@ -15,7 +15,7 @@ import spark.Spark;
|
|||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class ExplorerService extends Service {
|
public class ExplorerService extends SparkService {
|
||||||
|
|
||||||
private final MustacheRenderer<Object> renderer;
|
private final MustacheRenderer<Object> renderer;
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
|
94
code/services-application/search-service-legacy/build.gradle
Normal file
94
code/services-application/search-service-legacy/build.gradle
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'io.freefair.sass-base' version '8.4'
|
||||||
|
id 'io.freefair.sass-java' version '8.4'
|
||||||
|
id 'application'
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
|
||||||
|
id 'com.google.cloud.tools.jib' version '3.4.3'
|
||||||
|
}
|
||||||
|
|
||||||
|
application {
|
||||||
|
mainClass = 'nu.marginalia.search.SearchMain'
|
||||||
|
applicationName = 'search-service-legacy'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.distZip.enabled = false
|
||||||
|
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sass {
|
||||||
|
sourceMapEnabled = true
|
||||||
|
sourceMapEmbed = true
|
||||||
|
outputStyle = EXPANDED
|
||||||
|
}
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
apply from: "$rootProject.projectDir/docker.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:index:query')
|
||||||
|
|
||||||
|
implementation project(':code:libraries:easy-lsh')
|
||||||
|
implementation project(':code:libraries:language-processing')
|
||||||
|
implementation project(':code:libraries:braille-block-punch-cards')
|
||||||
|
implementation project(':code:libraries:term-frequency-dict')
|
||||||
|
|
||||||
|
implementation project(':code:functions:live-capture:api')
|
||||||
|
implementation project(':code:functions:math:api')
|
||||||
|
implementation project(':code:functions:domain-info:api')
|
||||||
|
implementation project(':code:functions:search-query:api')
|
||||||
|
|
||||||
|
|
||||||
|
implementation project(':code:index:api')
|
||||||
|
implementation project(':code:common:renderer')
|
||||||
|
|
||||||
|
implementation project(':code:features-search:screenshots')
|
||||||
|
implementation project(':code:features-search:random-websites')
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
implementation libs.roaringbitmap
|
||||||
|
implementation libs.prometheus
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.guava
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation libs.handlebars
|
||||||
|
implementation dependencies.create(libs.spark.get()) {
|
||||||
|
exclude group: 'org.eclipse.jetty'
|
||||||
|
}
|
||||||
|
implementation libs.bundles.jetty
|
||||||
|
implementation libs.opencsv
|
||||||
|
implementation libs.trove
|
||||||
|
implementation libs.fastutil
|
||||||
|
implementation libs.bundles.gson
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
implementation libs.bundles.nlp
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||||
|
testImplementation libs.commons.codec
|
||||||
|
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||||
|
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||||
|
testImplementation project(':code:libraries:test-helpers')
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register('paperDoll', Test) {
|
||||||
|
useJUnitPlatform {
|
||||||
|
includeTags "paperdoll"
|
||||||
|
}
|
||||||
|
jvmArgs = [ '-DrunPaperDoll=true', '--enable-preview' ]
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import com.google.inject.Guice;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Injector;
|
||||||
|
import nu.marginalia.service.MainClass;
|
||||||
|
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||||
|
import nu.marginalia.service.ServiceId;
|
||||||
|
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||||
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
import nu.marginalia.service.server.Initialization;
|
||||||
|
import spark.Spark;
|
||||||
|
|
||||||
|
public class SearchMain extends MainClass {
|
||||||
|
private final SearchService service;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchMain(SearchService service) {
|
||||||
|
this.service = service;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String... args) {
|
||||||
|
|
||||||
|
init(ServiceId.Search, args);
|
||||||
|
|
||||||
|
Spark.staticFileLocation("/static/search/");
|
||||||
|
|
||||||
|
Injector injector = Guice.createInjector(
|
||||||
|
new SearchModule(),
|
||||||
|
new ServiceConfigurationModule(ServiceId.Search),
|
||||||
|
new ServiceDiscoveryModule(),
|
||||||
|
new DatabaseModule(false)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
// Orchestrate the boot order for the services
|
||||||
|
var registry = injector.getInstance(ServiceRegistryIf.class);
|
||||||
|
var configuration = injector.getInstance(ServiceConfiguration.class);
|
||||||
|
orchestrateBoot(registry, configuration);
|
||||||
|
|
||||||
|
injector.getInstance(SearchMain.class);
|
||||||
|
injector.getInstance(Initialization.class).setReady();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
import nu.marginalia.LanguageModels;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.renderer.config.HandlebarsConfigurator;
|
||||||
|
|
||||||
|
public class SearchModule extends AbstractModule {
|
||||||
|
|
||||||
|
public void configure() {
|
||||||
|
bind(HandlebarsConfigurator.class).to(SearchHandlebarsConfigurator.class);
|
||||||
|
|
||||||
|
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||||
|
|
||||||
|
bind(WebsiteUrl.class).toInstance(new WebsiteUrl(
|
||||||
|
System.getProperty("search.websiteUrl", "https://search.marginalia.nu/")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,266 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.api.math.MathClient;
|
||||||
|
import nu.marginalia.api.searchquery.QueryClient;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||||
|
import nu.marginalia.bbpc.BrailleBlockPunchCards;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.model.ClusteredUrlDetails;
|
||||||
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
|
import nu.marginalia.search.model.SearchFilters;
|
||||||
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
|
import nu.marginalia.search.results.UrlDeduplicator;
|
||||||
|
import nu.marginalia.search.svc.SearchQueryCountService;
|
||||||
|
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||||
|
import org.apache.logging.log4j.util.Strings;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.slf4j.Marker;
|
||||||
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class SearchOperator {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SearchOperator.class);
|
||||||
|
|
||||||
|
// Marker for filtering out sensitive content from the persistent logs
|
||||||
|
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
|
||||||
|
|
||||||
|
private final MathClient mathClient;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final QueryClient queryClient;
|
||||||
|
private final SearchQueryParamFactory paramFactory;
|
||||||
|
private final WebsiteUrl websiteUrl;
|
||||||
|
private final SearchUnitConversionService searchUnitConversionService;
|
||||||
|
private final SearchQueryCountService searchVisitorCount;
|
||||||
|
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchOperator(MathClient mathClient,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
|
QueryClient queryClient,
|
||||||
|
SearchQueryParamFactory paramFactory,
|
||||||
|
WebsiteUrl websiteUrl,
|
||||||
|
SearchUnitConversionService searchUnitConversionService,
|
||||||
|
SearchQueryCountService searchVisitorCount
|
||||||
|
)
|
||||||
|
{
|
||||||
|
|
||||||
|
this.mathClient = mathClient;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
this.queryClient = queryClient;
|
||||||
|
this.paramFactory = paramFactory;
|
||||||
|
this.websiteUrl = websiteUrl;
|
||||||
|
this.searchUnitConversionService = searchUnitConversionService;
|
||||||
|
this.searchVisitorCount = searchVisitorCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UrlDetails> doSiteSearch(String domain,
|
||||||
|
int domainId,
|
||||||
|
int count) {
|
||||||
|
|
||||||
|
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
|
||||||
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
|
return getResultsFromQuery(queryResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UrlDetails> doBacklinkSearch(String domain) {
|
||||||
|
|
||||||
|
var queryParams = paramFactory.forBacklinkSearch(domain);
|
||||||
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
|
return getResultsFromQuery(queryResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UrlDetails> doLinkSearch(String source, String dest) {
|
||||||
|
var queryParams = paramFactory.forLinkSearch(source, dest);
|
||||||
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
|
return getResultsFromQuery(queryResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResults doSearch(SearchParameters userParams) throws InterruptedException {
|
||||||
|
// The full user-facing search query does additional work to try to evaluate the query
|
||||||
|
// e.g. as a unit conversion query. This is done in parallel with the regular search.
|
||||||
|
|
||||||
|
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
||||||
|
|
||||||
|
// Perform the regular search
|
||||||
|
|
||||||
|
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||||
|
QueryResponse queryResponse = queryClient.search(queryParams);
|
||||||
|
var queryResults = getResultsFromQuery(queryResponse);
|
||||||
|
|
||||||
|
// Cluster the results based on the query response
|
||||||
|
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||||
|
.selectStrategy(queryResponse)
|
||||||
|
.clusterResults(queryResults, 25);
|
||||||
|
|
||||||
|
// Log the query and results
|
||||||
|
|
||||||
|
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||||
|
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||||
|
|
||||||
|
// Get the evaluation result and other data to return to the user
|
||||||
|
String evalResult = getFutureOrDefault(eval, "");
|
||||||
|
|
||||||
|
String focusDomain = queryResponse.domain();
|
||||||
|
int focusDomainId = focusDomain == null
|
||||||
|
? -1
|
||||||
|
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(-1);
|
||||||
|
|
||||||
|
List<String> problems = getProblems(evalResult, queryResults, queryResponse);
|
||||||
|
|
||||||
|
List<DecoratedSearchResults.Page> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
|
||||||
|
.mapToObj(number -> new DecoratedSearchResults.Page(
|
||||||
|
number,
|
||||||
|
number == userParams.page(),
|
||||||
|
userParams.withPage(number).renderUrl(websiteUrl)
|
||||||
|
))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
// Return the results to the user
|
||||||
|
return DecoratedSearchResults.builder()
|
||||||
|
.params(userParams)
|
||||||
|
.problems(problems)
|
||||||
|
.evalResult(evalResult)
|
||||||
|
.results(clusteredResults)
|
||||||
|
.filters(new SearchFilters(websiteUrl, userParams))
|
||||||
|
.focusDomain(focusDomain)
|
||||||
|
.focusDomainId(focusDomainId)
|
||||||
|
.resultPages(resultPages)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
|
||||||
|
final QueryLimits limits = queryResponse.specs().queryLimits;
|
||||||
|
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
|
||||||
|
|
||||||
|
// Update the query count (this is what you see on the front page)
|
||||||
|
searchVisitorCount.registerQuery();
|
||||||
|
|
||||||
|
return queryResponse.results().stream()
|
||||||
|
.filter(deduplicator::shouldRetain)
|
||||||
|
.limit(limits.resultsTotal())
|
||||||
|
.map(SearchOperator::createDetails)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
|
||||||
|
return new UrlDetails(
|
||||||
|
item.documentId(),
|
||||||
|
item.domainId(),
|
||||||
|
cleanUrl(item.url),
|
||||||
|
item.title,
|
||||||
|
item.description,
|
||||||
|
item.format,
|
||||||
|
item.features,
|
||||||
|
DomainIndexingState.ACTIVE,
|
||||||
|
item.rankingScore, // termScore
|
||||||
|
item.resultsFromDomain,
|
||||||
|
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
|
||||||
|
Long.bitCount(item.bestPositions),
|
||||||
|
item.rawIndexResult,
|
||||||
|
item.rawIndexResult.keywordScores
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Replace nuisance domains with replacements where available */
|
||||||
|
private static EdgeUrl cleanUrl(EdgeUrl url) {
|
||||||
|
String topdomain = url.domain.topDomain;
|
||||||
|
String subdomain = url.domain.subDomain;
|
||||||
|
String path = url.path;
|
||||||
|
|
||||||
|
if (topdomain.equals("fandom.com")) {
|
||||||
|
int wikiIndex = path.indexOf("/wiki/");
|
||||||
|
if (wikiIndex >= 0) {
|
||||||
|
return new EdgeUrl("https", new EdgeDomain("breezewiki.com"), null, "/" + subdomain + path.substring(wikiIndex), null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (topdomain.equals("medium.com")) {
|
||||||
|
if (!subdomain.isBlank()) {
|
||||||
|
return new EdgeUrl("https", new EdgeDomain("scribe.rip"), null, path, null);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
String article = path.substring(path.indexOf("/", 1));
|
||||||
|
return new EdgeUrl("https", new EdgeDomain("scribe.rip"), null, article, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getProblems(String evalResult, List<UrlDetails> queryResults, QueryResponse response) throws InterruptedException {
|
||||||
|
|
||||||
|
// We don't debug the query if it's a site search
|
||||||
|
if (response.domain() == null)
|
||||||
|
return List.of();
|
||||||
|
|
||||||
|
final List<String> problems = new ArrayList<>(response.problems());
|
||||||
|
|
||||||
|
if (queryResults.size() <= 5 && null == evalResult) {
|
||||||
|
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results.");
|
||||||
|
|
||||||
|
// Try to spell check the search terms
|
||||||
|
var suggestions = getFutureOrDefault(
|
||||||
|
mathClient.spellCheck(response.searchTermsHuman()),
|
||||||
|
Map.of()
|
||||||
|
);
|
||||||
|
|
||||||
|
suggestions.forEach((term, suggestion) -> {
|
||||||
|
if (suggestion.size() > 1) {
|
||||||
|
String suggestionsStr = "\"%s\" could be spelled %s".formatted(term, suggestion.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(", ")));
|
||||||
|
problems.add(suggestionsStr);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> representativeKeywords = response.getAllKeywords();
|
||||||
|
if (representativeKeywords.size() > 1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
|
||||||
|
{
|
||||||
|
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
|
||||||
|
}
|
||||||
|
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T getFutureOrDefault(@Nullable Future<T> fut, T defaultValue) {
|
||||||
|
return getFutureOrDefault(fut, Duration.ofMillis(50), defaultValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T getFutureOrDefault(@Nullable Future<T> fut, Duration timeout, T defaultValue) {
|
||||||
|
if (fut == null || fut.isCancelled()) {
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return fut.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.warn("Error fetching eval result", ex);
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,104 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
|
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SearchQueryParamFactory {
|
||||||
|
|
||||||
|
public QueryParams forRegularSearch(SearchParameters userParams) {
|
||||||
|
SearchQuery prototype = new SearchQuery();
|
||||||
|
var profile = userParams.profile();
|
||||||
|
|
||||||
|
profile.addTacitTerms(prototype);
|
||||||
|
userParams.js().addTacitTerms(prototype);
|
||||||
|
userParams.adtech().addTacitTerms(prototype);
|
||||||
|
|
||||||
|
return new QueryParams(
|
||||||
|
userParams.query(),
|
||||||
|
null,
|
||||||
|
prototype.searchTermsInclude,
|
||||||
|
prototype.searchTermsExclude,
|
||||||
|
prototype.searchTermsPriority,
|
||||||
|
prototype.searchTermsAdvice,
|
||||||
|
profile.getQualityLimit(),
|
||||||
|
profile.getYearLimit(),
|
||||||
|
profile.getSizeLimit(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(),
|
||||||
|
new QueryLimits(5, 100, 200, 8192),
|
||||||
|
profile.searchSetIdentifier.name(),
|
||||||
|
userParams.strategy(),
|
||||||
|
userParams.temporalBias(),
|
||||||
|
userParams.page()
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParams forSiteSearch(String domain, int domainId, int count) {
|
||||||
|
return new QueryParams("site:"+domain,
|
||||||
|
null,
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(domainId),
|
||||||
|
new QueryLimits(count, count, 100, 512),
|
||||||
|
SearchSetIdentifier.NONE.name(),
|
||||||
|
QueryStrategy.AUTO,
|
||||||
|
ResultRankingParameters.TemporalBias.NONE,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParams forBacklinkSearch(String domain) {
|
||||||
|
return new QueryParams("links:"+domain,
|
||||||
|
null,
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(),
|
||||||
|
new QueryLimits(100, 100, 100, 512),
|
||||||
|
SearchSetIdentifier.NONE.name(),
|
||||||
|
QueryStrategy.AUTO,
|
||||||
|
ResultRankingParameters.TemporalBias.NONE,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParams forLinkSearch(String sourceDomain, String destDomain) {
|
||||||
|
return new QueryParams("site:" + sourceDomain + " links:" + destDomain,
|
||||||
|
null,
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(),
|
||||||
|
new QueryLimits(100, 100, 100, 512),
|
||||||
|
SearchSetIdentifier.NONE.name(),
|
||||||
|
QueryStrategy.AUTO,
|
||||||
|
ResultRankingParameters.TemporalBias.NONE,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||||
|
import nu.marginalia.search.model.ClusteredUrlDetails;
|
||||||
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/** Functions for clustering search results */
|
||||||
|
public class SearchResultClusterer {
|
||||||
|
private SearchResultClusterer() {}
|
||||||
|
|
||||||
|
public interface SearchResultClusterStrategy {
|
||||||
|
List<ClusteredUrlDetails> clusterResults(List<UrlDetails> results, int total);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
|
||||||
|
if (response.domain() != null && !response.domain().isBlank())
|
||||||
|
return SearchResultClusterer::noOp;
|
||||||
|
|
||||||
|
return SearchResultClusterer::byDomain;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** No clustering, just return the results as is */
|
||||||
|
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
|
||||||
|
if (results.isEmpty())
|
||||||
|
return List.of();
|
||||||
|
|
||||||
|
return results.stream()
|
||||||
|
.map(ClusteredUrlDetails::new)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Cluster the results by domain, and return the top "total" clusters
|
||||||
|
* sorted by the relevance of the best result
|
||||||
|
*/
|
||||||
|
private static List<ClusteredUrlDetails> byDomain(List<UrlDetails> results, int total) {
|
||||||
|
if (results.isEmpty())
|
||||||
|
return List.of();
|
||||||
|
|
||||||
|
return results.stream()
|
||||||
|
.collect(
|
||||||
|
Collectors.groupingBy(details -> details.domainId)
|
||||||
|
)
|
||||||
|
.values().stream()
|
||||||
|
.map(ClusteredUrlDetails::new)
|
||||||
|
.sorted()
|
||||||
|
.limit(total)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,128 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import io.prometheus.client.Counter;
|
||||||
|
import io.prometheus.client.Histogram;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.search.svc.*;
|
||||||
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
|
import nu.marginalia.service.server.SparkService;
|
||||||
|
import nu.marginalia.service.server.StaticResources;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
import spark.Route;
|
||||||
|
import spark.Spark;
|
||||||
|
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
public class SearchService extends SparkService {
|
||||||
|
|
||||||
|
private final WebsiteUrl websiteUrl;
|
||||||
|
private final StaticResources staticResources;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
||||||
|
private static final Histogram wmsa_search_service_request_time = Histogram.build()
|
||||||
|
.name("wmsa_search_service_request_time")
|
||||||
|
.linearBuckets(0.05, 0.05, 15)
|
||||||
|
.labelNames("matchedPath", "method")
|
||||||
|
.help("Search service request time (seconds)")
|
||||||
|
.register();
|
||||||
|
private static final Counter wmsa_search_service_error_count = Counter.build()
|
||||||
|
.name("wmsa_search_service_error_count")
|
||||||
|
.labelNames("matchedPath", "method")
|
||||||
|
.help("Search service error count")
|
||||||
|
.register();
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchService(BaseServiceParams params,
|
||||||
|
WebsiteUrl websiteUrl,
|
||||||
|
StaticResources staticResources,
|
||||||
|
SearchFrontPageService frontPageService,
|
||||||
|
SearchErrorPageService errorPageService,
|
||||||
|
SearchAddToCrawlQueueService addToCrawlQueueService,
|
||||||
|
SearchSiteInfoService siteInfoService,
|
||||||
|
SearchCrosstalkService crosstalkService,
|
||||||
|
SearchQueryService searchQueryService)
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
super(params);
|
||||||
|
|
||||||
|
this.websiteUrl = websiteUrl;
|
||||||
|
this.staticResources = staticResources;
|
||||||
|
|
||||||
|
Spark.staticFiles.expireTime(600);
|
||||||
|
|
||||||
|
SearchServiceMetrics.get("/search", searchQueryService::pathSearch);
|
||||||
|
|
||||||
|
SearchServiceMetrics.get("/", frontPageService::render);
|
||||||
|
SearchServiceMetrics.get("/news.xml", frontPageService::renderNewsFeed);
|
||||||
|
SearchServiceMetrics.get("/:resource", this::serveStatic);
|
||||||
|
|
||||||
|
SearchServiceMetrics.post("/site/suggest/", addToCrawlQueueService::suggestCrawling);
|
||||||
|
|
||||||
|
SearchServiceMetrics.get("/site-search/:site/*", this::siteSearchRedir);
|
||||||
|
|
||||||
|
SearchServiceMetrics.get("/site/:site", siteInfoService::handle);
|
||||||
|
SearchServiceMetrics.post("/site/:site", siteInfoService::handlePost);
|
||||||
|
|
||||||
|
SearchServiceMetrics.get("/crosstalk/", crosstalkService::handle);
|
||||||
|
|
||||||
|
Spark.exception(Exception.class, (e,p,q) -> {
|
||||||
|
logger.error("Error during processing", e);
|
||||||
|
wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc();
|
||||||
|
errorPageService.serveError(p, q);
|
||||||
|
});
|
||||||
|
|
||||||
|
Spark.awaitInitialization();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Wraps a route with a timer and a counter */
|
||||||
|
private static class SearchServiceMetrics implements Route {
|
||||||
|
private final Route delegatedRoute;
|
||||||
|
|
||||||
|
static void get(String path, Route route) {
|
||||||
|
Spark.get(path, new SearchServiceMetrics(route));
|
||||||
|
}
|
||||||
|
static void post(String path, Route route) {
|
||||||
|
Spark.post(path, new SearchServiceMetrics(route));
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchServiceMetrics(Route delegatedRoute) {
|
||||||
|
this.delegatedRoute = delegatedRoute;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object handle(Request request, Response response) throws Exception {
|
||||||
|
return wmsa_search_service_request_time
|
||||||
|
.labels(request.matchedPath(), request.requestMethod())
|
||||||
|
.time(() -> delegatedRoute.handle(request, response));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object serveStatic(Request request, Response response) {
|
||||||
|
String resource = request.params("resource");
|
||||||
|
staticResources.serveStatic("search", resource, request, response);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object siteSearchRedir(Request request, Response response) {
|
||||||
|
final String site = request.params("site");
|
||||||
|
final String searchTerms;
|
||||||
|
|
||||||
|
if (request.splat().length == 0) searchTerms = "";
|
||||||
|
else searchTerms = request.splat()[0];
|
||||||
|
|
||||||
|
final String query = URLEncoder.encode(String.format("%s site:%s", searchTerms, site), StandardCharsets.UTF_8).trim();
|
||||||
|
final String profile = request.queryParamOrDefault("profile", "yolo");
|
||||||
|
|
||||||
|
response.redirect(websiteUrl.withPath("search?query="+query+"&profile="+profile));
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.search.command.commands.*;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class CommandEvaluator {
|
||||||
|
|
||||||
|
private final List<SearchCommandInterface> specialCommands = new ArrayList<>();
|
||||||
|
private final SearchCommand defaultCommand;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public CommandEvaluator(
|
||||||
|
BrowseCommand browse,
|
||||||
|
ConvertCommand convert,
|
||||||
|
DefinitionCommand define,
|
||||||
|
BangCommand bang,
|
||||||
|
SiteRedirectCommand siteRedirect,
|
||||||
|
SearchCommand search
|
||||||
|
) {
|
||||||
|
specialCommands.add(browse);
|
||||||
|
specialCommands.add(convert);
|
||||||
|
specialCommands.add(define);
|
||||||
|
specialCommands.add(bang);
|
||||||
|
specialCommands.add(siteRedirect);
|
||||||
|
|
||||||
|
defaultCommand = search;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object eval(Response response, SearchParameters parameters) {
|
||||||
|
for (var cmd : specialCommands) {
|
||||||
|
var maybe = cmd.process(response, parameters);
|
||||||
|
if (maybe.isPresent())
|
||||||
|
return maybe.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultCommand.process(response, parameters).orElse("");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public enum SearchAdtechParameter {
|
||||||
|
DEFAULT("default"),
|
||||||
|
REDUCE("reduce", "special:ads", "special:affiliate");
|
||||||
|
|
||||||
|
public final String value;
|
||||||
|
public final String[] implictExcludeSearchTerms;
|
||||||
|
|
||||||
|
SearchAdtechParameter(String value, String... implictExcludeSearchTerms) {
|
||||||
|
this.value = value;
|
||||||
|
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchAdtechParameter parse(@Nullable String value) {
|
||||||
|
if (REDUCE.value.equals(value)) return REDUCE;
|
||||||
|
|
||||||
|
return DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTacitTerms(SearchQuery subquery) {
|
||||||
|
subquery.searchTermsExclude.addAll(Arrays.asList(implictExcludeSearchTerms));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public interface SearchCommandInterface {
|
||||||
|
Optional<Object> process(Response response, SearchParameters parameters);
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public enum SearchJsParameter {
|
||||||
|
DEFAULT("default"),
|
||||||
|
DENY_JS("no-js", "js:true"),
|
||||||
|
REQUIRE_JS("yes-js", "js:false");
|
||||||
|
|
||||||
|
public final String value;
|
||||||
|
public final String[] implictExcludeSearchTerms;
|
||||||
|
|
||||||
|
SearchJsParameter(String value, String... implictExcludeSearchTerms) {
|
||||||
|
this.value = value;
|
||||||
|
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchJsParameter parse(@Nullable String value) {
|
||||||
|
if (DENY_JS.value.equals(value)) return DENY_JS;
|
||||||
|
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
|
||||||
|
|
||||||
|
return DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTacitTerms(SearchQuery subquery) {
|
||||||
|
subquery.searchTermsExclude.addAll(Arrays.asList(implictExcludeSearchTerms));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,106 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||||
|
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
import nu.marginalia.search.model.SearchProfile;
|
||||||
|
import spark.Request;
|
||||||
|
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import static nu.marginalia.search.command.SearchRecentParameter.RECENT;
|
||||||
|
|
||||||
|
public record SearchParameters(String query,
|
||||||
|
SearchProfile profile,
|
||||||
|
SearchJsParameter js,
|
||||||
|
SearchRecentParameter recent,
|
||||||
|
SearchTitleParameter searchTitle,
|
||||||
|
SearchAdtechParameter adtech,
|
||||||
|
boolean newFilter,
|
||||||
|
int page
|
||||||
|
) {
|
||||||
|
|
||||||
|
public SearchParameters(String queryString, Request request) {
|
||||||
|
this(
|
||||||
|
queryString,
|
||||||
|
SearchProfile.getSearchProfile(request.queryParams("profile")),
|
||||||
|
SearchJsParameter.parse(request.queryParams("js")),
|
||||||
|
SearchRecentParameter.parse(request.queryParams("recent")),
|
||||||
|
SearchTitleParameter.parse(request.queryParams("searchTitle")),
|
||||||
|
SearchAdtechParameter.parse(request.queryParams("adtech")),
|
||||||
|
"true".equals(request.queryParams("newfilter")),
|
||||||
|
Integer.parseInt(Objects.requireNonNullElse(request.queryParams("page"), "1"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String profileStr() {
|
||||||
|
return profile.filterId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters withProfile(SearchProfile profile) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters withJs(SearchJsParameter js) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
|
||||||
|
}
|
||||||
|
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters withRecent(SearchRecentParameter recent) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters withTitle(SearchTitleParameter title) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, title, adtech, true, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters withPage(int page) {
|
||||||
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, false, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String renderUrl(WebsiteUrl baseUrl) {
|
||||||
|
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=%s&page=%d",
|
||||||
|
URLEncoder.encode(query, StandardCharsets.UTF_8),
|
||||||
|
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
|
||||||
|
URLEncoder.encode(js.value, StandardCharsets.UTF_8),
|
||||||
|
URLEncoder.encode(adtech.value, StandardCharsets.UTF_8),
|
||||||
|
URLEncoder.encode(recent.value, StandardCharsets.UTF_8),
|
||||||
|
URLEncoder.encode(searchTitle.value, StandardCharsets.UTF_8),
|
||||||
|
Boolean.valueOf(newFilter).toString(),
|
||||||
|
page
|
||||||
|
);
|
||||||
|
|
||||||
|
return baseUrl.withPath(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ResultRankingParameters.TemporalBias temporalBias() {
|
||||||
|
if (recent == RECENT) {
|
||||||
|
return ResultRankingParameters.TemporalBias.RECENT;
|
||||||
|
}
|
||||||
|
else if (profile == SearchProfile.VINTAGE) {
|
||||||
|
return ResultRankingParameters.TemporalBias.OLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ResultRankingParameters.TemporalBias.NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryStrategy strategy() {
|
||||||
|
if (searchTitle == SearchTitleParameter.TITLE) {
|
||||||
|
return QueryStrategy.REQUIRE_FIELD_TITLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return QueryStrategy.AUTO;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpecificationLimit yearLimit() {
|
||||||
|
if (recent == RECENT)
|
||||||
|
return SpecificationLimit.greaterThan(2018);
|
||||||
|
|
||||||
|
return profile.getYearLimit();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
public enum SearchRecentParameter {
|
||||||
|
DEFAULT("default"),
|
||||||
|
RECENT("recent");
|
||||||
|
|
||||||
|
public final String value;
|
||||||
|
|
||||||
|
SearchRecentParameter(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchRecentParameter parse(@Nullable String value) {
|
||||||
|
if (RECENT.value.equals(value)) return RECENT;
|
||||||
|
|
||||||
|
return DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package nu.marginalia.search.command;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
public enum SearchTitleParameter {
|
||||||
|
DEFAULT("default"),
|
||||||
|
TITLE("title");
|
||||||
|
|
||||||
|
public final String value;
|
||||||
|
|
||||||
|
SearchTitleParameter(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchTitleParameter parse(@Nullable String value) {
|
||||||
|
if (TITLE.value.equals(value)) return TITLE;
|
||||||
|
|
||||||
|
return DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,104 @@
|
|||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.exceptions.RedirectException;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class BangCommand implements SearchCommandInterface {
|
||||||
|
private final Map<String, String> bangsToPattern = new HashMap<>();
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public BangCommand()
|
||||||
|
{
|
||||||
|
bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
|
||||||
|
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
|
||||||
|
bangsToPattern.put("!w", "https://search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Object> process(Response response, SearchParameters parameters) {
|
||||||
|
|
||||||
|
for (var entry : bangsToPattern.entrySet()) {
|
||||||
|
String bangPattern = entry.getKey();
|
||||||
|
String redirectPattern = entry.getValue();
|
||||||
|
|
||||||
|
var match = matchBangPattern(parameters.query(), bangPattern);
|
||||||
|
|
||||||
|
if (match.isPresent()) {
|
||||||
|
var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8));
|
||||||
|
throw new RedirectException(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** If the query contains the bang pattern bangKey, return the query with the bang pattern removed. */
|
||||||
|
Optional<String> matchBangPattern(String query, String bangKey) {
|
||||||
|
var bm = new BangMatcher(query);
|
||||||
|
|
||||||
|
while (bm.findNext(bangKey)) {
|
||||||
|
|
||||||
|
if (!bm.isRelativeSpaceOrInvalid(-1))
|
||||||
|
continue;
|
||||||
|
if (!bm.isRelativeSpaceOrInvalid(bangKey.length()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
String prefix = bm.prefix().trim();
|
||||||
|
String suffix = bm.suffix(bangKey.length()).trim();
|
||||||
|
|
||||||
|
String ret = (prefix + " " + suffix).trim();
|
||||||
|
|
||||||
|
return Optional.of(ret)
|
||||||
|
.filter(s -> !s.isBlank());
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class BangMatcher {
|
||||||
|
private final String str;
|
||||||
|
private int pos;
|
||||||
|
|
||||||
|
public String prefix() {
|
||||||
|
return str.substring(0, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String suffix(int offset) {
|
||||||
|
if (pos+offset < str.length())
|
||||||
|
return str.substring(pos + offset);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public BangMatcher(String str) {
|
||||||
|
this.str = str;
|
||||||
|
this.pos = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean findNext(String pattern) {
|
||||||
|
if (pos + 1 >= str.length())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (pos = str.indexOf(pattern, pos + 1)) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isRelativeSpaceOrInvalid(int offset) {
|
||||||
|
if (offset + pos < 0)
|
||||||
|
return true;
|
||||||
|
if (offset + pos >= str.length())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return Character.isSpaceChar(str.charAt(offset + pos));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,36 @@
|
|||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class ConvertCommand implements SearchCommandInterface {
|
||||||
|
private final SearchUnitConversionService searchUnitConversionService;
|
||||||
|
private final MustacheRenderer<Map<String, String>> conversionRenderer;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public ConvertCommand(SearchUnitConversionService searchUnitConversionService, RendererFactory rendererFactory) throws IOException {
|
||||||
|
this.searchUnitConversionService = searchUnitConversionService;
|
||||||
|
|
||||||
|
conversionRenderer = rendererFactory.renderer("search/conversion-results");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Object> process(Response response, SearchParameters parameters) {
|
||||||
|
var conversion = searchUnitConversionService.tryConversion(parameters.query());
|
||||||
|
return conversion.map(s -> conversionRenderer.render(Map.of(
|
||||||
|
"query", parameters.query(),
|
||||||
|
"result", s,
|
||||||
|
"profile", parameters.profileStr())
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.api.math.MathClient;
|
||||||
|
import nu.marginalia.api.math.model.DictionaryResponse;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class DefinitionCommand implements SearchCommandInterface {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
private final MustacheRenderer<DictionaryResponse> dictionaryRenderer;
|
||||||
|
private final MathClient mathClient;
|
||||||
|
|
||||||
|
|
||||||
|
private final Predicate<String> queryPatternPredicate = Pattern.compile("^define:[A-Za-z\\s-0-9]+$").asPredicate();
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DefinitionCommand(RendererFactory rendererFactory, MathClient mathClient)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
|
||||||
|
dictionaryRenderer = rendererFactory.renderer("search/dictionary-results");
|
||||||
|
this.mathClient = mathClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Object> process(Response response, SearchParameters parameters) {
|
||||||
|
if (!queryPatternPredicate.test(parameters.query())) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var results = lookupDefinition(parameters.query());
|
||||||
|
|
||||||
|
return Optional.of(dictionaryRenderer.render(results,
|
||||||
|
Map.of("query", parameters.query(),
|
||||||
|
"profile", parameters.profileStr())
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private DictionaryResponse lookupDefinition(String humanQuery) {
|
||||||
|
String definePrefix = "define:";
|
||||||
|
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||||
|
|
||||||
|
try {
|
||||||
|
return mathClient
|
||||||
|
.dictionaryLookup(word)
|
||||||
|
.get(250, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Failed to lookup definition for word: " + word, e);
|
||||||
|
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,39 @@
|
|||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.search.SearchOperator;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class SearchCommand implements SearchCommandInterface {
|
||||||
|
private final SearchOperator searchOperator;
|
||||||
|
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
|
||||||
|
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchCommand(SearchOperator searchOperator,
|
||||||
|
RendererFactory rendererFactory) throws IOException {
|
||||||
|
this.searchOperator = searchOperator;
|
||||||
|
|
||||||
|
searchResultsRenderer = rendererFactory.renderer("search/search-results");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Object> process(Response response, SearchParameters parameters) {
|
||||||
|
try {
|
||||||
|
DecoratedSearchResults results = searchOperator.doSearch(parameters);
|
||||||
|
return Optional.of(searchResultsRenderer.render(results));
|
||||||
|
}
|
||||||
|
catch (InterruptedException ex) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class SiteRedirectCommand implements SearchCommandInterface {
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
private final Predicate<String> queryPatternPredicate = Pattern.compile("^(site|links):[.A-Za-z\\-0-9]+$").asPredicate();
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SiteRedirectCommand() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Object> process(Response response, SearchParameters parameters) {
|
||||||
|
if (!queryPatternPredicate.test(parameters.query())) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
int idx = parameters.query().indexOf(':');
|
||||||
|
String prefix = parameters.query().substring(0, idx);
|
||||||
|
String domain = parameters.query().substring(idx + 1).toLowerCase();
|
||||||
|
|
||||||
|
// Use an HTML redirect here, so we can use relative URLs
|
||||||
|
String view = switch (prefix) {
|
||||||
|
case "links" -> "links";
|
||||||
|
default -> "info";
|
||||||
|
};
|
||||||
|
|
||||||
|
return Optional.of("""
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Redirecting...</title>
|
||||||
|
<meta http-equiv="refresh" content="0; url=/site/%s?view=%s">
|
||||||
|
""".formatted(domain, view)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,66 @@
|
|||||||
|
package nu.marginalia.search.db;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
public class DbNearDomainsQuery {
|
||||||
|
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DbNearDomainsQuery(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Integer> getRelatedDomains(String term, Consumer<String> onProblem) {
|
||||||
|
List<Integer> ret = new ArrayList<>();
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
|
||||||
|
var selfStmt = conn.prepareStatement("""
|
||||||
|
SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?
|
||||||
|
""");
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT NEIGHBOR_ID, ND.INDEXED, ND.STATE FROM EC_DOMAIN_NEIGHBORS_2
|
||||||
|
INNER JOIN EC_DOMAIN ND ON ND.ID=NEIGHBOR_ID
|
||||||
|
WHERE DOMAIN_ID=?
|
||||||
|
""")) {
|
||||||
|
ResultSet rsp;
|
||||||
|
selfStmt.setString(1, term);
|
||||||
|
rsp = selfStmt.executeQuery();
|
||||||
|
int domainId = -1;
|
||||||
|
if (rsp.next()) {
|
||||||
|
domainId = rsp.getInt(1);
|
||||||
|
ret.add(domainId);
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.setInt(1, domainId);
|
||||||
|
rsp = stmt.executeQuery();
|
||||||
|
|
||||||
|
while (rsp.next()) {
|
||||||
|
int id = rsp.getInt(1);
|
||||||
|
int indexed = rsp.getInt(2);
|
||||||
|
String state = rsp.getString(3);
|
||||||
|
|
||||||
|
if (indexed > 0 && ("ACTIVE".equalsIgnoreCase(state) || "SOCIAL_MEDIA".equalsIgnoreCase(state) || "SPECIAL".equalsIgnoreCase(state))) {
|
||||||
|
ret.add(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret.isEmpty()) {
|
||||||
|
onProblem.accept("Could not find domains adjacent " + term);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,102 @@
|
|||||||
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.model.idx.WordFlags;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** A class to hold a list of UrlDetails, grouped by domain, where the first one is the main result
|
||||||
|
* and the rest are additional results, for summary display. */
|
||||||
|
public class ClusteredUrlDetails implements Comparable<ClusteredUrlDetails> {
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
public final UrlDetails first;
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
public final List<UrlDetails> rest;
|
||||||
|
|
||||||
|
/** Create a new ClusteredUrlDetails from a collection of UrlDetails,
|
||||||
|
* with the best result as "first", and the others, in descending order
|
||||||
|
* of quality as the "rest"...
|
||||||
|
*
|
||||||
|
* @param details A collection of UrlDetails, which must not be empty.
|
||||||
|
*/
|
||||||
|
public ClusteredUrlDetails(Collection<UrlDetails> details) {
|
||||||
|
var items = new ArrayList<>(details);
|
||||||
|
|
||||||
|
items.sort(Comparator.naturalOrder());
|
||||||
|
|
||||||
|
if (items.isEmpty())
|
||||||
|
throw new IllegalArgumentException("Empty list of details");
|
||||||
|
|
||||||
|
this.first = items.removeFirst();
|
||||||
|
this.rest = items;
|
||||||
|
|
||||||
|
double bestScore = first.termScore;
|
||||||
|
double scoreLimit = Math.min(4.0, bestScore * 1.25);
|
||||||
|
|
||||||
|
this.rest.removeIf(urlDetail -> {
|
||||||
|
if (urlDetail.termScore > scoreLimit)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (var keywordScore : urlDetail.resultItem.keywordScores) {
|
||||||
|
if (keywordScore.isKeywordSpecial())
|
||||||
|
continue;
|
||||||
|
if (keywordScore.hasTermFlag(WordFlags.Title))
|
||||||
|
return false;
|
||||||
|
if (keywordScore.hasTermFlag(WordFlags.ExternalLink))
|
||||||
|
return false;
|
||||||
|
if (keywordScore.hasTermFlag(WordFlags.UrlDomain))
|
||||||
|
return false;
|
||||||
|
if (keywordScore.hasTermFlag(WordFlags.UrlPath))
|
||||||
|
return false;
|
||||||
|
if (keywordScore.hasTermFlag(WordFlags.Subjects))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst) {
|
||||||
|
this.first = onlyFirst;
|
||||||
|
this.rest = Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For renderer use, do not remove
|
||||||
|
public @NotNull UrlDetails getFirst() {
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For renderer use, do not remove
|
||||||
|
public @NotNull List<UrlDetails> getRest() {
|
||||||
|
return rest;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public EdgeDomain getDomain() {
|
||||||
|
return first.url.getDomain();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasMultiple() {
|
||||||
|
return !rest.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the total number of results from the same domain,
|
||||||
|
* including such results that are not included here. */
|
||||||
|
public int totalCount() {
|
||||||
|
return first.resultsFromSameDomain;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int remainingCount() {
|
||||||
|
return totalCount() - 1 - rest.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(@NotNull ClusteredUrlDetails o) {
|
||||||
|
return Objects.compare(first, o.first, UrlDetails::compareTo);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,186 @@
|
|||||||
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class to hold details about the search results,
|
||||||
|
* as used by the handlebars templating engine to render
|
||||||
|
* the search results page.
|
||||||
|
*/
|
||||||
|
public class DecoratedSearchResults {
|
||||||
|
private final SearchParameters params;
|
||||||
|
private final List<String> problems;
|
||||||
|
private final String evalResult;
|
||||||
|
|
||||||
|
public DecoratedSearchResults(SearchParameters params,
|
||||||
|
List<String> problems,
|
||||||
|
String evalResult,
|
||||||
|
List<ClusteredUrlDetails> results,
|
||||||
|
String focusDomain,
|
||||||
|
int focusDomainId,
|
||||||
|
SearchFilters filters,
|
||||||
|
List<Page> resultPages) {
|
||||||
|
this.params = params;
|
||||||
|
this.problems = problems;
|
||||||
|
this.evalResult = evalResult;
|
||||||
|
this.results = results;
|
||||||
|
this.focusDomain = focusDomain;
|
||||||
|
this.focusDomainId = focusDomainId;
|
||||||
|
this.filters = filters;
|
||||||
|
this.resultPages = resultPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final List<ClusteredUrlDetails> results;
|
||||||
|
|
||||||
|
public static DecoratedSearchResultsBuilder builder() {
|
||||||
|
return new DecoratedSearchResultsBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchParameters getParams() {
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getProblems() {
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEvalResult() {
|
||||||
|
return evalResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ClusteredUrlDetails> getResults() {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFocusDomain() {
|
||||||
|
return focusDomain;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFocusDomainId() {
|
||||||
|
return focusDomainId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchFilters getFilters() {
|
||||||
|
return filters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Page> getResultPages() {
|
||||||
|
return resultPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String focusDomain;
|
||||||
|
private final int focusDomainId;
|
||||||
|
private final SearchFilters filters;
|
||||||
|
|
||||||
|
private final List<Page> resultPages;
|
||||||
|
|
||||||
|
public boolean isMultipage() {
|
||||||
|
return resultPages.size() > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Page(int number, boolean current, String href) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are used by the search form, they look unused in the IDE but are used by the mustache template,
|
||||||
|
// DO NOT REMOVE THEM
|
||||||
|
public int getResultCount() {
|
||||||
|
return results.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getQuery() {
|
||||||
|
return params.query();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProfile() {
|
||||||
|
return params.profile().filterId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJs() {
|
||||||
|
return params.js().value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAdtech() {
|
||||||
|
return params.adtech().value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecent() {
|
||||||
|
return params.recent().value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSearchTitle() {
|
||||||
|
return params.searchTitle().value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int page() {
|
||||||
|
return params.page();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean isNewFilter() {
|
||||||
|
return params.newFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static class DecoratedSearchResultsBuilder {
|
||||||
|
private SearchParameters params;
|
||||||
|
private List<String> problems;
|
||||||
|
private String evalResult;
|
||||||
|
private List<ClusteredUrlDetails> results;
|
||||||
|
private String focusDomain;
|
||||||
|
private int focusDomainId;
|
||||||
|
private SearchFilters filters;
|
||||||
|
private List<Page> resultPages;
|
||||||
|
|
||||||
|
DecoratedSearchResultsBuilder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder params(SearchParameters params) {
|
||||||
|
this.params = params;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder problems(List<String> problems) {
|
||||||
|
this.problems = problems;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder evalResult(String evalResult) {
|
||||||
|
this.evalResult = evalResult;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder results(List<ClusteredUrlDetails> results) {
|
||||||
|
this.results = results;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder focusDomain(String focusDomain) {
|
||||||
|
this.focusDomain = focusDomain;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder focusDomainId(int focusDomainId) {
|
||||||
|
this.focusDomainId = focusDomainId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder filters(SearchFilters filters) {
|
||||||
|
this.filters = filters;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResultsBuilder resultPages(List<Page> resultPages) {
|
||||||
|
this.resultPages = resultPages;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DecoratedSearchResults build() {
|
||||||
|
return new DecoratedSearchResults(this.params, this.problems, this.evalResult, this.results, this.focusDomain, this.focusDomainId, this.filters, this.resultPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "DecoratedSearchResults.DecoratedSearchResultsBuilder(params=" + this.params + ", problems=" + this.problems + ", evalResult=" + this.evalResult + ", results=" + this.results + ", focusDomain=" + this.focusDomain + ", focusDomainId=" + this.focusDomainId + ", filters=" + this.filters + ", resultPages=" + this.resultPages + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,223 @@
|
|||||||
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.search.command.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Models the search filters displayed next to the search results */
|
||||||
|
public class SearchFilters {
|
||||||
|
private final WebsiteUrl url;
|
||||||
|
|
||||||
|
public final String currentFilter;
|
||||||
|
|
||||||
|
// These are necessary for the renderer to access the data
|
||||||
|
public final RemoveJsOption removeJsOption;
|
||||||
|
public final ReduceAdtechOption reduceAdtechOption;
|
||||||
|
public final ShowRecentOption showRecentOption;
|
||||||
|
public final SearchTitleOption searchTitleOption;
|
||||||
|
|
||||||
|
public final List<List<Filter>> filterGroups;
|
||||||
|
|
||||||
|
// Getters are for the renderer to access the data
|
||||||
|
|
||||||
|
|
||||||
|
public String getCurrentFilter() {
|
||||||
|
return currentFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RemoveJsOption getRemoveJsOption() {
|
||||||
|
return removeJsOption;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReduceAdtechOption getReduceAdtechOption() {
|
||||||
|
return reduceAdtechOption;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShowRecentOption getShowRecentOption() {
|
||||||
|
return showRecentOption;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchTitleOption getSearchTitleOption() {
|
||||||
|
return searchTitleOption;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<Filter>> getFilterGroups() {
|
||||||
|
return filterGroups;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchFilters(WebsiteUrl url, SearchParameters parameters) {
|
||||||
|
this.url = url;
|
||||||
|
|
||||||
|
removeJsOption = new RemoveJsOption(parameters);
|
||||||
|
reduceAdtechOption = new ReduceAdtechOption(parameters);
|
||||||
|
showRecentOption = new ShowRecentOption(parameters);
|
||||||
|
searchTitleOption = new SearchTitleOption(parameters);
|
||||||
|
|
||||||
|
|
||||||
|
currentFilter = parameters.profile().filterId;
|
||||||
|
|
||||||
|
filterGroups = List.of(
|
||||||
|
List.of(
|
||||||
|
new Filter("No Filter", SearchProfile.NO_FILTER, parameters),
|
||||||
|
// new Filter("Popular", SearchProfile.POPULAR, parameters),
|
||||||
|
new Filter("Small Web", SearchProfile.SMALLWEB, parameters),
|
||||||
|
new Filter("Blogosphere", SearchProfile.BLOGOSPHERE, parameters),
|
||||||
|
new Filter("Academia", SearchProfile.ACADEMIA, parameters)
|
||||||
|
),
|
||||||
|
List.of(
|
||||||
|
new Filter("Vintage", SearchProfile.VINTAGE, parameters),
|
||||||
|
new Filter("Plain Text", SearchProfile.PLAIN_TEXT, parameters),
|
||||||
|
new Filter("~tilde", SearchProfile.TILDE, parameters)
|
||||||
|
),
|
||||||
|
List.of(
|
||||||
|
new Filter("Wiki", SearchProfile.WIKI, parameters),
|
||||||
|
new Filter("Forum", SearchProfile.FORUM, parameters),
|
||||||
|
new Filter("Docs", SearchProfile.DOCS, parameters),
|
||||||
|
new Filter("Recipes", SearchProfile.FOOD, parameters)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public class RemoveJsOption {
|
||||||
|
private final SearchJsParameter value;
|
||||||
|
|
||||||
|
public final String url;
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSet() {
|
||||||
|
return value.equals(SearchJsParameter.DENY_JS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String name() {
|
||||||
|
return "Remove Javascript";
|
||||||
|
}
|
||||||
|
|
||||||
|
public RemoveJsOption(SearchParameters parameters) {
|
||||||
|
this.value = parameters.js();
|
||||||
|
|
||||||
|
var toggledValue = switch (parameters.js()) {
|
||||||
|
case DENY_JS -> SearchJsParameter.DEFAULT;
|
||||||
|
default -> SearchJsParameter.DENY_JS;
|
||||||
|
};
|
||||||
|
|
||||||
|
this.url = parameters.withJs(toggledValue).renderUrl(SearchFilters.this.url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ReduceAdtechOption {
|
||||||
|
private final SearchAdtechParameter value;
|
||||||
|
|
||||||
|
public final String url;
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSet() {
|
||||||
|
return value.equals(SearchAdtechParameter.REDUCE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String name() {
|
||||||
|
return "Reduce Adtech";
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReduceAdtechOption(SearchParameters parameters) {
|
||||||
|
this.value = parameters.adtech();
|
||||||
|
|
||||||
|
var toggledValue = switch (parameters.adtech()) {
|
||||||
|
case REDUCE -> SearchAdtechParameter.DEFAULT;
|
||||||
|
default -> SearchAdtechParameter.REDUCE;
|
||||||
|
};
|
||||||
|
|
||||||
|
this.url = parameters.withAdtech(toggledValue).renderUrl(SearchFilters.this.url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ShowRecentOption {
|
||||||
|
private final SearchRecentParameter value;
|
||||||
|
|
||||||
|
public final String url;
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSet() {
|
||||||
|
return value.equals(SearchRecentParameter.RECENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String name() {
|
||||||
|
return "Recent Results";
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShowRecentOption(SearchParameters parameters) {
|
||||||
|
this.value = parameters.recent();
|
||||||
|
|
||||||
|
var toggledValue = switch (parameters.recent()) {
|
||||||
|
case RECENT -> SearchRecentParameter.DEFAULT;
|
||||||
|
default -> SearchRecentParameter.RECENT;
|
||||||
|
};
|
||||||
|
|
||||||
|
this.url = parameters.withRecent(toggledValue).renderUrl(SearchFilters.this.url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SearchTitleOption {
|
||||||
|
private final SearchTitleParameter value;
|
||||||
|
|
||||||
|
public final String url;
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSet() {
|
||||||
|
return value.equals(SearchTitleParameter.TITLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String name() {
|
||||||
|
return "Search In Title";
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchTitleOption(SearchParameters parameters) {
|
||||||
|
this.value = parameters.searchTitle();
|
||||||
|
|
||||||
|
var toggledValue = switch (parameters.searchTitle()) {
|
||||||
|
case TITLE -> SearchTitleParameter.DEFAULT;
|
||||||
|
default -> SearchTitleParameter.TITLE;
|
||||||
|
};
|
||||||
|
|
||||||
|
this.url = parameters.withTitle(toggledValue).renderUrl(SearchFilters.this.url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Filter {
|
||||||
|
public final SearchProfile profile;
|
||||||
|
|
||||||
|
public final String displayName;
|
||||||
|
public final boolean current;
|
||||||
|
public final String url;
|
||||||
|
|
||||||
|
public Filter(String displayName, SearchProfile profile, SearchParameters parameters) {
|
||||||
|
this.displayName = displayName;
|
||||||
|
this.profile = profile;
|
||||||
|
this.current = profile.equals(parameters.profile());
|
||||||
|
|
||||||
|
this.url = parameters.withProfile(profile).renderUrl(SearchFilters.this.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDisplayName() {
|
||||||
|
return displayName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCurrent() {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUrl() {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,105 @@
|
|||||||
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
import nu.marginalia.model.crawl.HtmlFeature;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public enum SearchProfile {
|
||||||
|
POPULAR("default", SearchSetIdentifier.POPULAR),
|
||||||
|
SMALLWEB("modern", SearchSetIdentifier.SMALLWEB),
|
||||||
|
BLOGOSPHERE("blogosphere", SearchSetIdentifier.BLOGS),
|
||||||
|
NO_FILTER("corpo", SearchSetIdentifier.NONE),
|
||||||
|
VINTAGE("vintage", SearchSetIdentifier.NONE),
|
||||||
|
TILDE("tilde", SearchSetIdentifier.NONE),
|
||||||
|
CORPO_CLEAN("corpo-clean", SearchSetIdentifier.NONE),
|
||||||
|
ACADEMIA("academia", SearchSetIdentifier.NONE),
|
||||||
|
PLAIN_TEXT("plain-text", SearchSetIdentifier.NONE),
|
||||||
|
FOOD("food", SearchSetIdentifier.POPULAR),
|
||||||
|
FORUM("forum", SearchSetIdentifier.NONE),
|
||||||
|
WIKI("wiki", SearchSetIdentifier.NONE),
|
||||||
|
DOCS("docs", SearchSetIdentifier.NONE),
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
public final String filterId;
|
||||||
|
public final SearchSetIdentifier searchSetIdentifier;
|
||||||
|
|
||||||
|
SearchProfile(String filterId, SearchSetIdentifier searchSetIdentifier) {
|
||||||
|
this.filterId = filterId;
|
||||||
|
this.searchSetIdentifier = searchSetIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static SearchProfile[] values = values();
|
||||||
|
public static SearchProfile getSearchProfile(String param) {
|
||||||
|
if (null == param) {
|
||||||
|
return NO_FILTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var profile : values) {
|
||||||
|
if (Objects.equals(profile.filterId, param)) {
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NO_FILTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTacitTerms(SearchQuery subquery) {
|
||||||
|
if (this == ACADEMIA) {
|
||||||
|
subquery.searchTermsAdvice.add("special:academia");
|
||||||
|
}
|
||||||
|
if (this == VINTAGE) {
|
||||||
|
subquery.searchTermsPriority.add("format:html123");
|
||||||
|
subquery.searchTermsPriority.add("js:false");
|
||||||
|
}
|
||||||
|
if (this == TILDE) {
|
||||||
|
subquery.searchTermsAdvice.add("special:tilde");
|
||||||
|
}
|
||||||
|
if (this == PLAIN_TEXT) {
|
||||||
|
subquery.searchTermsAdvice.add("format:plain");
|
||||||
|
}
|
||||||
|
if (this == WIKI) {
|
||||||
|
subquery.searchTermsAdvice.add("generator:wiki");
|
||||||
|
}
|
||||||
|
if (this == FORUM) {
|
||||||
|
subquery.searchTermsAdvice.add("generator:forum");
|
||||||
|
}
|
||||||
|
if (this == DOCS) {
|
||||||
|
subquery.searchTermsAdvice.add("generator:docs");
|
||||||
|
}
|
||||||
|
if (this == FOOD) {
|
||||||
|
subquery.searchTermsAdvice.add(HtmlFeature.CATEGORY_FOOD.getKeyword());
|
||||||
|
subquery.searchTermsExclude.add("special:ads");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpecificationLimit getYearLimit() {
|
||||||
|
if (this == SMALLWEB) {
|
||||||
|
return SpecificationLimit.greaterThan(2015);
|
||||||
|
}
|
||||||
|
if (this == VINTAGE) {
|
||||||
|
return SpecificationLimit.lessThan(2003);
|
||||||
|
}
|
||||||
|
else return SpecificationLimit.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpecificationLimit getSizeLimit() {
|
||||||
|
if (this == SMALLWEB) {
|
||||||
|
return SpecificationLimit.lessThan(500);
|
||||||
|
}
|
||||||
|
else return SpecificationLimit.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public SpecificationLimit getQualityLimit() {
|
||||||
|
if (this == SMALLWEB) {
|
||||||
|
return SpecificationLimit.lessThan(5);
|
||||||
|
}
|
||||||
|
else return SpecificationLimit.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,293 @@
|
|||||||
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
|
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
|
import nu.marginalia.model.crawl.HtmlFeature;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class to hold details about a single search result.
|
||||||
|
*/
|
||||||
|
public class UrlDetails implements Comparable<UrlDetails> {
|
||||||
|
public long id;
|
||||||
|
public int domainId;
|
||||||
|
|
||||||
|
public EdgeUrl url;
|
||||||
|
public String title;
|
||||||
|
public String description;
|
||||||
|
|
||||||
|
public String format;
|
||||||
|
public int features;
|
||||||
|
|
||||||
|
public DomainIndexingState domainState;
|
||||||
|
|
||||||
|
public double termScore;
|
||||||
|
|
||||||
|
public int resultsFromSameDomain;
|
||||||
|
|
||||||
|
public String positions;
|
||||||
|
public int positionsCount;
|
||||||
|
public SearchResultItem resultItem;
|
||||||
|
public List<SearchResultKeywordScore> keywordScores;
|
||||||
|
|
||||||
|
public UrlDetails(long id, int domainId, EdgeUrl url, String title, String description, String format, int features, DomainIndexingState domainState, double termScore, int resultsFromSameDomain, String positions, int positionsCount, SearchResultItem resultItem, List<SearchResultKeywordScore> keywordScores) {
|
||||||
|
this.id = id;
|
||||||
|
this.domainId = domainId;
|
||||||
|
this.url = url;
|
||||||
|
this.title = title;
|
||||||
|
this.description = description;
|
||||||
|
this.format = format;
|
||||||
|
this.features = features;
|
||||||
|
this.domainState = domainState;
|
||||||
|
this.termScore = termScore;
|
||||||
|
this.resultsFromSameDomain = resultsFromSameDomain;
|
||||||
|
this.positions = positions;
|
||||||
|
this.positionsCount = positionsCount;
|
||||||
|
this.resultItem = resultItem;
|
||||||
|
this.keywordScores = keywordScores;
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasMoreResults() {
|
||||||
|
return resultsFromSameDomain > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFormat() {
|
||||||
|
if (null == format) {
|
||||||
|
return "?";
|
||||||
|
}
|
||||||
|
switch (format) {
|
||||||
|
case "HTML123":
|
||||||
|
return "HTML 1-3";
|
||||||
|
case "HTML4":
|
||||||
|
return "HTML 4";
|
||||||
|
case "XHTML":
|
||||||
|
return "XHTML";
|
||||||
|
case "HTML5":
|
||||||
|
return "HTML 5";
|
||||||
|
case "PLAIN":
|
||||||
|
return "Plain Text";
|
||||||
|
default:
|
||||||
|
return "?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return Long.hashCode(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(UrlDetails other) {
|
||||||
|
int result = Double.compare(getTermScore(), other.getTermScore());
|
||||||
|
if (result == 0) result = Long.compare(getId(), other.getId());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (other == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (other instanceof UrlDetails) {
|
||||||
|
return ((UrlDetails) other).id == id;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
if (title == null || title.isBlank()) {
|
||||||
|
return url.toString();
|
||||||
|
}
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isPlainText() {
|
||||||
|
return "PLAIN".equals(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getProblemCount() {
|
||||||
|
int mask = HtmlFeature.JS.getFeatureBit()
|
||||||
|
| HtmlFeature.COOKIES.getFeatureBit()
|
||||||
|
| HtmlFeature.TRACKING.getFeatureBit()
|
||||||
|
| HtmlFeature.AFFILIATE_LINK.getFeatureBit()
|
||||||
|
| HtmlFeature.TRACKING_ADTECH.getFeatureBit()
|
||||||
|
| HtmlFeature.ADVERTISEMENT.getFeatureBit();
|
||||||
|
|
||||||
|
return Integer.bitCount(features & mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UrlProblem> getProblems() {
|
||||||
|
List<UrlProblem> problems = new ArrayList<>();
|
||||||
|
|
||||||
|
if (isScripts()) {
|
||||||
|
problems.add(new UrlProblem("Js", "The page uses Javascript"));
|
||||||
|
}
|
||||||
|
if (isCookies()) {
|
||||||
|
problems.add(new UrlProblem("Co", "The page uses Cookies"));
|
||||||
|
}
|
||||||
|
if (isTracking()) {
|
||||||
|
problems.add(new UrlProblem("Tr", "The page uses Tracking/Analytics"));
|
||||||
|
}
|
||||||
|
if (isAffiliate()) {
|
||||||
|
problems.add(new UrlProblem("Af", "The page may use Affiliate Linking"));
|
||||||
|
}
|
||||||
|
if (isAds()) {
|
||||||
|
problems.add(new UrlProblem("Ad", "The page uses Ads/Adtech Tracking"));
|
||||||
|
}
|
||||||
|
return problems;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isScripts() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.JS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isTracking() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.TRACKING);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isAffiliate() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.AFFILIATE_LINK);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isMedia() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.MEDIA);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCookies() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.COOKIES);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isAds() {
|
||||||
|
return HtmlFeature.hasFeature(features, HtmlFeature.TRACKING_ADTECH);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMatchRank() {
|
||||||
|
if (termScore <= 1) return 1;
|
||||||
|
if (termScore <= 2) return 2;
|
||||||
|
if (termScore <= 3) return 3;
|
||||||
|
if (termScore <= 5) return 5;
|
||||||
|
|
||||||
|
return 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getId() {
|
||||||
|
return this.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getDomainId() {
|
||||||
|
return this.domainId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EdgeUrl getUrl() {
|
||||||
|
return this.url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return this.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFeatures() {
|
||||||
|
return this.features;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainIndexingState getDomainState() {
|
||||||
|
return this.domainState;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getTermScore() {
|
||||||
|
return this.termScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getResultsFromSameDomain() {
|
||||||
|
return this.resultsFromSameDomain;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPositions() {
|
||||||
|
return this.positions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getPositionsCount() {
|
||||||
|
return this.positionsCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchResultItem getResultItem() {
|
||||||
|
return this.resultItem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SearchResultKeywordScore> getKeywordScores() {
|
||||||
|
return this.keywordScores;
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withId(long id) {
|
||||||
|
return this.id == id ? this : new UrlDetails(id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withDomainId(int domainId) {
|
||||||
|
return this.domainId == domainId ? this : new UrlDetails(this.id, domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withUrl(EdgeUrl url) {
|
||||||
|
return this.url == url ? this : new UrlDetails(this.id, this.domainId, url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withTitle(String title) {
|
||||||
|
return this.title == title ? this : new UrlDetails(this.id, this.domainId, this.url, title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withDescription(String description) {
|
||||||
|
return this.description == description ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withFormat(String format) {
|
||||||
|
return this.format == format ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withFeatures(int features) {
|
||||||
|
return this.features == features ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withDomainState(DomainIndexingState domainState) {
|
||||||
|
return this.domainState == domainState ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withTermScore(double termScore) {
|
||||||
|
return this.termScore == termScore ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withResultsFromSameDomain(int resultsFromSameDomain) {
|
||||||
|
return this.resultsFromSameDomain == resultsFromSameDomain ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withPositions(String positions) {
|
||||||
|
return this.positions == positions ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, positions, this.positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withPositionsCount(int positionsCount) {
|
||||||
|
return this.positionsCount == positionsCount ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, positionsCount, this.resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withResultItem(SearchResultItem resultItem) {
|
||||||
|
return this.resultItem == resultItem ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, resultItem, this.keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UrlDetails withKeywordScores(List<SearchResultKeywordScore> keywordScores) {
|
||||||
|
return this.keywordScores == keywordScores ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, keywordScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "UrlDetails(id=" + this.getId() + ", domainId=" + this.getDomainId() + ", url=" + this.getUrl() + ", title=" + this.getTitle() + ", description=" + this.getDescription() + ", format=" + this.getFormat() + ", features=" + this.getFeatures() + ", domainState=" + this.getDomainState() + ", termScore=" + this.getTermScore() + ", resultsFromSameDomain=" + this.getResultsFromSameDomain() + ", positions=" + this.getPositions() + ", positionsCount=" + this.getPositionsCount() + ", resultItem=" + this.getResultItem() + ", keywordScores=" + this.getKeywordScores() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
public static record UrlProblem(String name, String description) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package nu.marginalia.search.results;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class BrowseResultCleaner {
|
||||||
|
private final ScreenshotService screenshotService;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public BrowseResultCleaner(ScreenshotService screenshotService) {
|
||||||
|
this.screenshotService = screenshotService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Predicate<BrowseResult> shouldRemoveResultPredicateBr() {
|
||||||
|
Set<String> domainHashes = new HashSet<>(100);
|
||||||
|
|
||||||
|
return (res) -> !screenshotService.hasScreenshot(res.domainId())
|
||||||
|
|| !domainHashes.add(res.domainHash());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
package nu.marginalia.search.results;
|
||||||
|
|
||||||
|
import gnu.trove.list.TLongList;
|
||||||
|
import gnu.trove.list.array.TLongArrayList;
|
||||||
|
import gnu.trove.map.hash.TObjectIntHashMap;
|
||||||
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||||
|
import nu.marginalia.lsh.EasyLSH;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class UrlDeduplicator {
|
||||||
|
private final int LSH_SIMILARITY_THRESHOLD = 2;
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(UrlDeduplicator.class);
|
||||||
|
|
||||||
|
private final TIntHashSet seenSuperficialhashes = new TIntHashSet(200);
|
||||||
|
private final TLongList seehLSHList = new TLongArrayList(200);
|
||||||
|
private final TObjectIntHashMap<String> keyCount = new TObjectIntHashMap<>(200, 0.75f, 0);
|
||||||
|
|
||||||
|
private final int resultsPerKey;
|
||||||
|
public UrlDeduplicator(int resultsPerKey) {
|
||||||
|
this.resultsPerKey = resultsPerKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean shouldRemove(DecoratedSearchResultItem details) {
|
||||||
|
if (!deduplicateOnSuperficialHash(details))
|
||||||
|
return true;
|
||||||
|
if (!deduplicateOnLSH(details))
|
||||||
|
return true;
|
||||||
|
if (!limitResultsPerDomain(details))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean shouldRetain(DecoratedSearchResultItem details) {
|
||||||
|
return !shouldRemove(details);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean deduplicateOnSuperficialHash(DecoratedSearchResultItem details) {
|
||||||
|
return seenSuperficialhashes.add(Objects.hash(details.url.path, details.title));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean deduplicateOnLSH(DecoratedSearchResultItem details) {
|
||||||
|
long thisHash = details.dataHash;
|
||||||
|
|
||||||
|
if (0 == thisHash)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (seehLSHList.forEach(otherHash -> EasyLSH.hammingDistance(thisHash, otherHash) >= LSH_SIMILARITY_THRESHOLD))
|
||||||
|
{
|
||||||
|
seehLSHList.add(thisHash);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
|
||||||
|
final var domain = details.getUrl().getDomain();
|
||||||
|
final String key = domain.getDomainKey();
|
||||||
|
|
||||||
|
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
import spark.Spark;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
public class SearchAddToCrawlQueueService {
|
||||||
|
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final WebsiteUrl websiteUrl;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(SearchAddToCrawlQueueService.class);
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchAddToCrawlQueueService(DbDomainQueries domainQueries,
|
||||||
|
WebsiteUrl websiteUrl,
|
||||||
|
HikariDataSource dataSource) {
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
this.websiteUrl = websiteUrl;
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object suggestCrawling(Request request, Response response) throws SQLException {
|
||||||
|
logger.info("{}", request.queryParams());
|
||||||
|
int id = Integer.parseInt(request.queryParams("id"));
|
||||||
|
boolean nomisclick = "on".equals(request.queryParams("nomisclick"));
|
||||||
|
|
||||||
|
String domainName = getDomainName(id);
|
||||||
|
|
||||||
|
if (nomisclick) {
|
||||||
|
logger.info("Adding {} to crawl queue", domainName);
|
||||||
|
addToCrawlQueue(id);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.info("Nomisclick not set, not adding {} to crawl queue", domainName);
|
||||||
|
}
|
||||||
|
|
||||||
|
response.redirect(websiteUrl.withPath("/site/" + domainName));
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToCrawlQueue(int id) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
INSERT IGNORE INTO CRAWL_QUEUE(DOMAIN_NAME, SOURCE)
|
||||||
|
SELECT DOMAIN_NAME, "user" FROM EC_DOMAIN WHERE ID=?
|
||||||
|
""")) {
|
||||||
|
stmt.setInt(1, id);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getDomainName(int id) {
|
||||||
|
var domain = domainQueries.getDomain(id);
|
||||||
|
if (domain.isEmpty())
|
||||||
|
Spark.halt(404);
|
||||||
|
return domain.get().toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,87 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.api.domains.DomainInfoClient;
|
||||||
|
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||||
|
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||||
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.search.results.BrowseResultCleaner;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
import static java.util.Collections.shuffle;
|
||||||
|
|
||||||
|
public class SearchBrowseService {
|
||||||
|
private final DbBrowseDomainsRandom randomDomains;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final DomainBlacklist blacklist;
|
||||||
|
private final DomainInfoClient domainInfoClient;
|
||||||
|
private final BrowseResultCleaner browseResultCleaner;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchBrowseService(DbBrowseDomainsRandom randomDomains,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
|
DomainBlacklist blacklist,
|
||||||
|
DomainInfoClient domainInfoClient,
|
||||||
|
BrowseResultCleaner browseResultCleaner)
|
||||||
|
{
|
||||||
|
this.randomDomains = randomDomains;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
this.blacklist = blacklist;
|
||||||
|
this.domainInfoClient = domainInfoClient;
|
||||||
|
this.browseResultCleaner = browseResultCleaner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BrowseResultSet getRandomEntries(int set) {
|
||||||
|
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
|
||||||
|
|
||||||
|
results.removeIf(browseResultCleaner.shouldRemoveResultPredicateBr());
|
||||||
|
|
||||||
|
return new BrowseResultSet(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BrowseResultSet getRelatedEntries(String domainName) throws ExecutionException, InterruptedException, TimeoutException {
|
||||||
|
var domain = domainQueries.getDomainId(new EdgeDomain(domainName));
|
||||||
|
|
||||||
|
var neighbors = domainInfoClient.similarDomains(domain, 50)
|
||||||
|
.get(100, TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
neighbors.removeIf(sd -> !sd.screenshot());
|
||||||
|
|
||||||
|
// If the results are very few, supplement with the alternative shitty algorithm
|
||||||
|
if (neighbors.size() < 25) {
|
||||||
|
Set<SimilarDomain> allNeighbors = new HashSet<>(neighbors);
|
||||||
|
allNeighbors.addAll(domainInfoClient
|
||||||
|
.linkedDomains(domain, 50)
|
||||||
|
.get(100, TimeUnit.MILLISECONDS)
|
||||||
|
);
|
||||||
|
|
||||||
|
neighbors.clear();
|
||||||
|
neighbors.addAll(allNeighbors);
|
||||||
|
neighbors.removeIf(sd -> !sd.screenshot());
|
||||||
|
}
|
||||||
|
|
||||||
|
List<BrowseResult> results = new ArrayList<>(neighbors.size());
|
||||||
|
for (SimilarDomain sd : neighbors) {
|
||||||
|
var resultDomain = domainQueries.getDomain(sd.domainId());
|
||||||
|
if (resultDomain.isEmpty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
results.add(new BrowseResult(resultDomain.get().toRootUrlHttp(), sd.domainId(), 0, sd.screenshot()));
|
||||||
|
}
|
||||||
|
// shuffle the items for a less repetitive experience
|
||||||
|
shuffle(neighbors);
|
||||||
|
|
||||||
|
return new BrowseResultSet(results, domainName);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.search.SearchOperator;
|
||||||
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SearchCrosstalkService {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SearchCrosstalkService.class);
|
||||||
|
private final SearchOperator searchOperator;
|
||||||
|
private final MustacheRenderer<CrosstalkResult> renderer;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchCrosstalkService(SearchOperator searchOperator,
|
||||||
|
RendererFactory rendererFactory) throws IOException
|
||||||
|
{
|
||||||
|
this.searchOperator = searchOperator;
|
||||||
|
this.renderer = rendererFactory.renderer("search/site-info/site-crosstalk");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object handle(Request request, Response response) throws SQLException {
|
||||||
|
String domains = request.queryParams("domains");
|
||||||
|
String[] parts = StringUtils.split(domains, ',');
|
||||||
|
|
||||||
|
if (parts.length != 2) {
|
||||||
|
throw new IllegalArgumentException("Expected exactly two domains");
|
||||||
|
}
|
||||||
|
|
||||||
|
response.type("text/html");
|
||||||
|
|
||||||
|
for (int i = 0; i < parts.length; i++) {
|
||||||
|
parts[i] = parts[i].trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
var resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]);
|
||||||
|
var resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]);
|
||||||
|
|
||||||
|
var model = new CrosstalkResult(parts[0], parts[1], resAtoB, resBtoA);
|
||||||
|
|
||||||
|
return renderer.render(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private record CrosstalkResult(String domainA,
|
||||||
|
String domainB,
|
||||||
|
List<UrlDetails> forward,
|
||||||
|
List<UrlDetails> backward)
|
||||||
|
{
|
||||||
|
|
||||||
|
public boolean isFocusDomain() {
|
||||||
|
return true; // Hack to get the search result templates behave well
|
||||||
|
}
|
||||||
|
public boolean hasBoth() {
|
||||||
|
return !forward.isEmpty() && !backward.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.index.api.IndexMqClient;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class SearchErrorPageService {
|
||||||
|
private final IndexMqClient indexMqClient;
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
private final MustacheRenderer<Object> renderer;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchErrorPageService(IndexMqClient indexMqClient,
|
||||||
|
RendererFactory rendererFactory) throws IOException {
|
||||||
|
|
||||||
|
renderer = rendererFactory.renderer("search/error-page-search");
|
||||||
|
|
||||||
|
this.indexMqClient = indexMqClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void serveError(Request request, Response rsp) {
|
||||||
|
rsp.body(renderError(request, "Internal error",
|
||||||
|
"""
|
||||||
|
An error occurred when communicating with the search engine index.
|
||||||
|
<p>
|
||||||
|
This is hopefully a temporary state of affairs. It may be due to
|
||||||
|
an upgrade. The index typically takes a about two or three minutes
|
||||||
|
to reload from a cold restart. Thanks for your patience.
|
||||||
|
"""));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String renderError(Request request, String title, String message) {
|
||||||
|
return renderer.render(Map.of("title", title, "message", message,
|
||||||
|
"profile", request.queryParamOrDefault("profile", ""),
|
||||||
|
"js", request.queryParamOrDefault("js", ""),
|
||||||
|
"query", request.queryParamOrDefault("query", "")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,85 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/** Service for handling flagging sites. This code has an admin-facing correspondent in
|
||||||
|
* DomainComplaintService in control-service
|
||||||
|
*/
|
||||||
|
public class SearchFlagSiteService {
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
private final CategoryItem unknownCategory = new CategoryItem("unknown", "Unknown");
|
||||||
|
|
||||||
|
private final List<CategoryItem> categories =
|
||||||
|
List.of(
|
||||||
|
new CategoryItem("spam", "Spam"),
|
||||||
|
new CategoryItem("freebooting", "Reposting Stolen Content"),
|
||||||
|
new CategoryItem("broken", "Broken Website"),
|
||||||
|
new CategoryItem("shock", "Shocking/Offensive"),
|
||||||
|
new CategoryItem("blacklist", "Review Blacklisting"),
|
||||||
|
new CategoryItem("no-random", "Remove from Random Exploration")
|
||||||
|
);
|
||||||
|
|
||||||
|
private final Map<String, CategoryItem> categoryItemMap =
|
||||||
|
categories.stream().collect(Collectors.toMap(CategoryItem::categoryName, Function.identity()));
|
||||||
|
@Inject
|
||||||
|
public SearchFlagSiteService(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<CategoryItem> getCategories() {
|
||||||
|
return categories;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<FlagSiteComplaintModel> getExistingComplaints(int id) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var complaintsStmt = conn.prepareStatement("""
|
||||||
|
SELECT CATEGORY, FILE_DATE, REVIEWED, DECISION
|
||||||
|
FROM DOMAIN_COMPLAINT
|
||||||
|
WHERE DOMAIN_ID=?
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
List<FlagSiteComplaintModel> complaints = new ArrayList<>();
|
||||||
|
|
||||||
|
complaintsStmt.setInt(1, id);
|
||||||
|
ResultSet rs = complaintsStmt.executeQuery();
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
complaints.add(new FlagSiteComplaintModel(
|
||||||
|
categoryItemMap.getOrDefault(rs.getString(1), unknownCategory).categoryDesc,
|
||||||
|
rs.getString(2),
|
||||||
|
rs.getBoolean(3),
|
||||||
|
rs.getString(4)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return complaints;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void insertComplaint(FlagSiteFormData formData) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement(
|
||||||
|
"""
|
||||||
|
INSERT INTO DOMAIN_COMPLAINT(DOMAIN_ID, CATEGORY, DESCRIPTION, SAMPLE) VALUES (?, ?, ?, ?)
|
||||||
|
""")) {
|
||||||
|
stmt.setInt(1, formData.domainId);
|
||||||
|
stmt.setString(2, formData.category);
|
||||||
|
stmt.setString(3, formData.description);
|
||||||
|
stmt.setString(4, formData.sampleQuery);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record CategoryItem(String categoryName, String categoryDesc) {}
|
||||||
|
public record FlagSiteComplaintModel(String category, String submitTime, boolean isReviewed, String decision) {}
|
||||||
|
public record FlagSiteFormData(int domainId, String category, String description, String sampleQuery) {}
|
||||||
|
}
|
@ -0,0 +1,117 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.search.svc.SearchQueryCountService;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Renders the front page (index) */
|
||||||
|
@Singleton
|
||||||
|
public class SearchFrontPageService {
|
||||||
|
|
||||||
|
private final MustacheRenderer<IndexModel> template;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
private final SearchQueryCountService searchVisitorCount;
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchFrontPageService(RendererFactory rendererFactory,
|
||||||
|
HikariDataSource dataSource,
|
||||||
|
SearchQueryCountService searchVisitorCount
|
||||||
|
) throws IOException {
|
||||||
|
this.template = rendererFactory.renderer("search/index/index");
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
this.searchVisitorCount = searchVisitorCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String render(Request request, Response response) {
|
||||||
|
response.header("Cache-control", "public,max-age=3600");
|
||||||
|
|
||||||
|
return template.render(new IndexModel(
|
||||||
|
getNewsItems(),
|
||||||
|
searchVisitorCount.getQueriesPerMinute()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<NewsItem> getNewsItems() {
|
||||||
|
List<NewsItem> items = new ArrayList<>();
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT TITLE, LINK, SOURCE, LIST_DATE FROM SEARCH_NEWS_FEED ORDER BY LIST_DATE DESC
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
var rep = stmt.executeQuery();
|
||||||
|
|
||||||
|
while (rep.next()) {
|
||||||
|
items.add(new NewsItem(
|
||||||
|
rep.getString(1),
|
||||||
|
rep.getString(2),
|
||||||
|
rep.getString(3),
|
||||||
|
rep.getDate(4).toLocalDate()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.warn("Failed to fetch news items", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object renderNewsFeed(Request request, Response response) {
|
||||||
|
List<NewsItem> newsItems = getNewsItems();
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
sb.append("""
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Marginalia Search News and Mentions</title>
|
||||||
|
<link>https://search.marginalia.nu/</link>
|
||||||
|
<description>News and Mentions of Marginalia Search</description>
|
||||||
|
<language>en-us</language>
|
||||||
|
<ttl>60</ttl>
|
||||||
|
""");
|
||||||
|
|
||||||
|
sb.append("<lastBuildDate>").append(ZonedDateTime.now().format(DateTimeFormatter.RFC_1123_DATE_TIME)).append("</lastBuildDate>\n");
|
||||||
|
sb.append("<pubDate>").append(ZonedDateTime.now().format(DateTimeFormatter.RFC_1123_DATE_TIME)).append("</pubDate>\n");
|
||||||
|
sb.append("<ttl>60</ttl>\n");
|
||||||
|
for (var item : newsItems) {
|
||||||
|
sb.append("<item>\n");
|
||||||
|
sb.append("<title>").append(item.title()).append("</title>\n");
|
||||||
|
sb.append("<link>").append(item.url()).append("</link>\n");
|
||||||
|
if (item.source != null) {
|
||||||
|
sb.append("<author>").append(item.source()).append("</author>\n");
|
||||||
|
}
|
||||||
|
sb.append("<pubDate>").append(item.date().atStartOfDay().atZone(ZoneId.systemDefault()).format(DateTimeFormatter.RFC_1123_DATE_TIME)).append("</pubDate>\n");
|
||||||
|
sb.append("</item>\n");
|
||||||
|
}
|
||||||
|
sb.append("</channel>\n");
|
||||||
|
sb.append("</rss>\n");
|
||||||
|
|
||||||
|
response.type("application/rss+xml");
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private record IndexModel(List<NewsItem> news, int searchPerMinute) { }
|
||||||
|
private record NewsItem(String title, String url, String source, LocalDate date) {}
|
||||||
|
}
|
@ -0,0 +1,48 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import java.time.temporal.ChronoUnit;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
/** Keeps per-minute statistics of queries */
|
||||||
|
@Singleton
|
||||||
|
public class SearchQueryCountService {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
private final AtomicInteger lastMinuteQueries = new AtomicInteger();
|
||||||
|
|
||||||
|
private final TimeUnit minute = TimeUnit.of(ChronoUnit.MINUTES);
|
||||||
|
private volatile int queriesPerMinute;
|
||||||
|
|
||||||
|
public SearchQueryCountService() {
|
||||||
|
Thread updateThread = new Thread(this::updateQueriesPerMinute,
|
||||||
|
"SearchVisitorCountService::updateQueriesPerMinute");
|
||||||
|
updateThread.setDaemon(true);
|
||||||
|
updateThread.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Retreive the number of queries performed the minute before this one */
|
||||||
|
public int getQueriesPerMinute() {
|
||||||
|
return queriesPerMinute;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Update query statistics for presentation */
|
||||||
|
public void registerQuery() {
|
||||||
|
lastMinuteQueries.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateQueriesPerMinute() {
|
||||||
|
try {
|
||||||
|
for (;;) {
|
||||||
|
queriesPerMinute = lastMinuteQueries.getAndSet(0);
|
||||||
|
minute.sleep(1);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
logger.warn("Query counter thread was interrupted");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,62 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.search.command.CommandEvaluator;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.exceptions.RedirectException;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
public class SearchQueryService {
|
||||||
|
|
||||||
|
private final WebsiteUrl websiteUrl;
|
||||||
|
private final SearchErrorPageService errorPageService;
|
||||||
|
private final CommandEvaluator searchCommandEvaulator;
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchQueryService(
|
||||||
|
WebsiteUrl websiteUrl,
|
||||||
|
SearchErrorPageService errorPageService,
|
||||||
|
CommandEvaluator searchCommandEvaulator) {
|
||||||
|
this.websiteUrl = websiteUrl;
|
||||||
|
this.errorPageService = errorPageService;
|
||||||
|
this.searchCommandEvaulator = searchCommandEvaulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object pathSearch(Request request, Response response) {
|
||||||
|
try {
|
||||||
|
return searchCommandEvaulator.eval(response, parseParameters(request));
|
||||||
|
}
|
||||||
|
catch (RedirectException ex) {
|
||||||
|
response.redirect(ex.newUrl);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Error", ex);
|
||||||
|
errorPageService.serveError(request, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchParameters parseParameters(Request request) {
|
||||||
|
try {
|
||||||
|
final String queryParam = request.queryParams("query");
|
||||||
|
|
||||||
|
if (null == queryParam || queryParam.isBlank()) {
|
||||||
|
throw new RedirectException(websiteUrl.url());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SearchParameters(queryParam.trim(), request);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
// Bots keep sending bad requests, suppress the error otherwise it will
|
||||||
|
// fill up the logs.
|
||||||
|
|
||||||
|
throw new RedirectException(websiteUrl.url());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,416 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.api.domains.DomainInfoClient;
|
||||||
|
import nu.marginalia.api.domains.model.DomainInformation;
|
||||||
|
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||||
|
import nu.marginalia.api.feeds.FeedsClient;
|
||||||
|
import nu.marginalia.api.feeds.RpcFeed;
|
||||||
|
import nu.marginalia.api.feeds.RpcFeedItem;
|
||||||
|
import nu.marginalia.api.livecapture.LiveCaptureClient;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
|
import nu.marginalia.search.SearchOperator;
|
||||||
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
|
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import spark.Request;
|
||||||
|
import spark.Response;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
public class SearchSiteInfoService {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class);
|
||||||
|
|
||||||
|
private final SearchOperator searchOperator;
|
||||||
|
private final DomainInfoClient domainInfoClient;
|
||||||
|
private final SearchFlagSiteService flagSiteService;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final MustacheRenderer<Object> renderer;
|
||||||
|
private final FeedsClient feedsClient;
|
||||||
|
private final LiveCaptureClient liveCaptureClient;
|
||||||
|
private final ScreenshotService screenshotService;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchSiteInfoService(SearchOperator searchOperator,
|
||||||
|
DomainInfoClient domainInfoClient,
|
||||||
|
RendererFactory rendererFactory,
|
||||||
|
SearchFlagSiteService flagSiteService,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
|
FeedsClient feedsClient,
|
||||||
|
LiveCaptureClient liveCaptureClient,
|
||||||
|
ScreenshotService screenshotService) throws IOException
|
||||||
|
{
|
||||||
|
this.searchOperator = searchOperator;
|
||||||
|
this.domainInfoClient = domainInfoClient;
|
||||||
|
this.flagSiteService = flagSiteService;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
|
||||||
|
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
||||||
|
|
||||||
|
this.feedsClient = feedsClient;
|
||||||
|
this.liveCaptureClient = liveCaptureClient;
|
||||||
|
this.screenshotService = screenshotService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object handle(Request request, Response response) throws SQLException {
|
||||||
|
String domainName = request.params("site");
|
||||||
|
String view = request.queryParamOrDefault("view", "info");
|
||||||
|
|
||||||
|
if (null == domainName || domainName.isBlank()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var model = switch (view) {
|
||||||
|
case "links" -> listLinks(domainName);
|
||||||
|
case "docs" -> listDocs(domainName);
|
||||||
|
case "info" -> listInfo(domainName);
|
||||||
|
case "report" -> reportSite(domainName);
|
||||||
|
default -> listInfo(domainName);
|
||||||
|
};
|
||||||
|
|
||||||
|
return renderer.render(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object handlePost(Request request, Response response) throws SQLException {
|
||||||
|
String domainName = request.params("site");
|
||||||
|
String view = request.queryParamOrDefault("view", "info");
|
||||||
|
|
||||||
|
if (null == domainName || domainName.isBlank()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!view.equals("report"))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
final int domainId = domainQueries.getDomainId(new EdgeDomain(domainName));
|
||||||
|
|
||||||
|
FlagSiteFormData formData = new FlagSiteFormData(
|
||||||
|
domainId,
|
||||||
|
request.queryParams("category"),
|
||||||
|
request.queryParams("description"),
|
||||||
|
request.queryParams("sampleQuery")
|
||||||
|
);
|
||||||
|
flagSiteService.insertComplaint(formData);
|
||||||
|
|
||||||
|
var complaints = flagSiteService.getExistingComplaints(domainId);
|
||||||
|
|
||||||
|
var model = new ReportDomain(domainName, domainId, complaints, List.of(), true);
|
||||||
|
|
||||||
|
return renderer.render(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object reportSite(String domainName) throws SQLException {
|
||||||
|
int domainId = domainQueries.getDomainId(new EdgeDomain(domainName));
|
||||||
|
var existingComplaints = flagSiteService.getExistingComplaints(domainId);
|
||||||
|
|
||||||
|
return new ReportDomain(domainName,
|
||||||
|
domainId,
|
||||||
|
existingComplaints,
|
||||||
|
flagSiteService.getCategories(),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Backlinks listLinks(String domainName) {
|
||||||
|
return new Backlinks(domainName,
|
||||||
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
|
searchOperator.doBacklinkSearch(domainName));
|
||||||
|
}
|
||||||
|
|
||||||
|
private SiteInfoWithContext listInfo(String domainName) {
|
||||||
|
|
||||||
|
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
|
||||||
|
|
||||||
|
final Future<DomainInformation> domainInfoFuture;
|
||||||
|
final Future<List<SimilarDomain>> similarSetFuture;
|
||||||
|
final Future<List<SimilarDomain>> linkingDomainsFuture;
|
||||||
|
final CompletableFuture<RpcFeed> feedItemsFuture;
|
||||||
|
String url = "https://" + domainName + "/";
|
||||||
|
|
||||||
|
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
|
||||||
|
|
||||||
|
|
||||||
|
if (domainId < 0) {
|
||||||
|
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
|
}
|
||||||
|
else if (!domainInfoClient.isAccepting()) {
|
||||||
|
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
||||||
|
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
|
||||||
|
linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25);
|
||||||
|
feedItemsFuture = feedsClient.getFeed(domainId);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
|
||||||
|
if (!sampleResults.isEmpty()) {
|
||||||
|
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
var result = new SiteInfoWithContext(domainName,
|
||||||
|
domainId,
|
||||||
|
url,
|
||||||
|
hasScreenshot,
|
||||||
|
waitForFuture(domainInfoFuture, () -> createDummySiteInfo(domainName)),
|
||||||
|
waitForFuture(similarSetFuture, List::of),
|
||||||
|
waitForFuture(linkingDomainsFuture, List::of),
|
||||||
|
waitForFuture(feedItemsFuture.thenApply(FeedItems::new), () -> FeedItems.dummyValue(domainName)),
|
||||||
|
sampleResults
|
||||||
|
);
|
||||||
|
|
||||||
|
requestMissingScreenshots(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Request missing screenshots for the given site info */
|
||||||
|
private void requestMissingScreenshots(SiteInfoWithContext result) {
|
||||||
|
|
||||||
|
// Always request the main site screenshot, even if we already have it
|
||||||
|
// as this will make the live-capture do a staleness check and update
|
||||||
|
// as needed.
|
||||||
|
liveCaptureClient.requestScreengrab(result.domainId());
|
||||||
|
|
||||||
|
int requests = 1;
|
||||||
|
|
||||||
|
// Request screenshots for similar and linking domains only if they are absent
|
||||||
|
// also throttle the requests to at most 5 per view.
|
||||||
|
|
||||||
|
if (result.similar() != null) {
|
||||||
|
for (var similar : result.similar()) {
|
||||||
|
if (similar.screenshot()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (++requests > 5) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
liveCaptureClient.requestScreengrab(similar.domainId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.linking() != null) {
|
||||||
|
for (var linking : result.linking()) {
|
||||||
|
if (linking.screenshot()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (++requests > 5) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
liveCaptureClient.requestScreengrab(linking.domainId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T waitForFuture(Future<T> future, Supplier<T> fallback) {
|
||||||
|
try {
|
||||||
|
return future.get(250, TimeUnit.MILLISECONDS);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.info("Failed to get domain data: {}", e.getMessage());
|
||||||
|
return fallback.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private DomainInformation createDummySiteInfo(String domainName) {
|
||||||
|
return DomainInformation.builder()
|
||||||
|
.domain(new EdgeDomain(domainName))
|
||||||
|
.suggestForCrawling(true)
|
||||||
|
.unknownDomain(true)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Docs listDocs(String domainName) {
|
||||||
|
int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
|
||||||
|
return new Docs(domainName,
|
||||||
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
|
searchOperator.doSiteSearch(domainName, domainId, 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Docs(Map<String, Boolean> view,
|
||||||
|
String domain,
|
||||||
|
long domainId,
|
||||||
|
List<UrlDetails> results) {
|
||||||
|
public Docs(String domain, long domainId, List<UrlDetails> results) {
|
||||||
|
this(Map.of("docs", true), domain, domainId, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String focusDomain() { return domain; }
|
||||||
|
|
||||||
|
public String query() { return "site:" + domain; }
|
||||||
|
|
||||||
|
public boolean isKnown() {
|
||||||
|
return domainId > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Backlinks(Map<String, Boolean> view, String domain, long domainId, List<UrlDetails> results) {
|
||||||
|
public Backlinks(String domain, long domainId, List<UrlDetails> results) {
|
||||||
|
this(Map.of("links", true), domain, domainId, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String query() { return "links:" + domain; }
|
||||||
|
|
||||||
|
public boolean isKnown() {
|
||||||
|
return domainId > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record SiteInfoWithContext(Map<String, Boolean> view,
|
||||||
|
Map<String, Boolean> domainState,
|
||||||
|
String domain,
|
||||||
|
int domainId,
|
||||||
|
String siteUrl,
|
||||||
|
boolean hasScreenshot,
|
||||||
|
DomainInformation domainInformation,
|
||||||
|
List<SimilarDomain> similar,
|
||||||
|
List<SimilarDomain> linking,
|
||||||
|
FeedItems feed,
|
||||||
|
List<UrlDetails> samples
|
||||||
|
) {
|
||||||
|
public SiteInfoWithContext(String domain,
|
||||||
|
int domainId,
|
||||||
|
String siteUrl,
|
||||||
|
boolean hasScreenshot,
|
||||||
|
DomainInformation domainInformation,
|
||||||
|
List<SimilarDomain> similar,
|
||||||
|
List<SimilarDomain> linking,
|
||||||
|
FeedItems feedInfo,
|
||||||
|
List<UrlDetails> samples
|
||||||
|
)
|
||||||
|
{
|
||||||
|
this(Map.of("info", true),
|
||||||
|
Map.of(domainInfoState(domainInformation), true),
|
||||||
|
domain,
|
||||||
|
domainId,
|
||||||
|
siteUrl,
|
||||||
|
hasScreenshot,
|
||||||
|
domainInformation,
|
||||||
|
similar,
|
||||||
|
linking,
|
||||||
|
feedInfo,
|
||||||
|
samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLayout() {
|
||||||
|
// My CSS is too weak to handle this in CSS alone, so I guess we're doing layout in Java...
|
||||||
|
if (similar != null && similar.size() < 25) {
|
||||||
|
return "lopsided";
|
||||||
|
}
|
||||||
|
else if (feed != null && !feed.items().isEmpty()) {
|
||||||
|
return "lopsided";
|
||||||
|
}
|
||||||
|
else if (samples != null && !samples.isEmpty()) {
|
||||||
|
return "lopsided";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "balanced";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String query() { return "site:" + domain; }
|
||||||
|
|
||||||
|
private static String domainInfoState(DomainInformation info) {
|
||||||
|
if (info.isBlacklisted()) {
|
||||||
|
return "blacklisted";
|
||||||
|
}
|
||||||
|
if (!info.isUnknownDomain() && info.isSuggestForCrawling()) {
|
||||||
|
return "suggestForCrawling";
|
||||||
|
}
|
||||||
|
if (info.isInCrawlQueue()) {
|
||||||
|
return "inCrawlQueue";
|
||||||
|
}
|
||||||
|
if (info.isUnknownDomain()) {
|
||||||
|
return "unknownDomain";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "indexed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isKnown() {
|
||||||
|
return domainId > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record FeedItem(String title, String date, String description, String url) {
|
||||||
|
|
||||||
|
public FeedItem(RpcFeedItem rpcFeedItem) {
|
||||||
|
this(rpcFeedItem.getTitle(),
|
||||||
|
rpcFeedItem.getDate(),
|
||||||
|
rpcFeedItem.getDescription(),
|
||||||
|
rpcFeedItem.getUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String pubDay() { // Extract the date from an ISO style date string
|
||||||
|
if (date.length() > 10) {
|
||||||
|
return date.substring(0, 10);
|
||||||
|
}
|
||||||
|
return date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String descriptionSafe() {
|
||||||
|
return description
|
||||||
|
.replace("<", "<")
|
||||||
|
.replace(">", ">");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
|
||||||
|
|
||||||
|
public static FeedItems dummyValue(String domain) {
|
||||||
|
return new FeedItems(domain, "", "", List.of());
|
||||||
|
}
|
||||||
|
|
||||||
|
public FeedItems(RpcFeed rpcFeedItems) {
|
||||||
|
this(rpcFeedItems.getDomain(),
|
||||||
|
rpcFeedItems.getFeedUrl(),
|
||||||
|
rpcFeedItems.getUpdated(),
|
||||||
|
rpcFeedItems.getItemsList().stream().map(FeedItem::new).toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record ReportDomain(
|
||||||
|
Map<String, Boolean> view,
|
||||||
|
String domain,
|
||||||
|
int domainId,
|
||||||
|
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
|
||||||
|
List<SearchFlagSiteService.CategoryItem> category,
|
||||||
|
boolean submitted)
|
||||||
|
{
|
||||||
|
public ReportDomain(String domain,
|
||||||
|
int domainId,
|
||||||
|
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
|
||||||
|
List<SearchFlagSiteService.CategoryItem> category,
|
||||||
|
boolean submitted) {
|
||||||
|
this(Map.of("report", true), domain, domainId, complaints, category, submitted);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String query() { return "site:" + domain; }
|
||||||
|
|
||||||
|
public boolean isKnown() {
|
||||||
|
return domainId > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,73 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import nu.marginalia.api.math.MathClient;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.annotation.CheckForNull;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class SearchUnitConversionService {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
private final Pattern conversionPattern = Pattern.compile("((\\d+|\\s+|[.()\\-^+%*/]|log[^a-z]|log2[^a-z]|sqrt[^a-z]|log10|cos[^a-z]|sin[^a-z]|tan[^a-z]|log2|pi[^a-z]|e[^a-z]|2pi[^a-z])+)\\s*([a-zA-Z][a-zA-Z^.0-9]*\\s?[a-zA-Z^.0-9]*)\\s+in\\s+([a-zA-Z^.0-9]+\\s?[a-zA-Z^.0-9]*)");
|
||||||
|
private final Predicate<String> evalPredicate = Pattern.compile("(\\d+|\\s+|[.()\\-^+%*/]|log|log2|sqrt|log10|cos|sin|tan|pi|e|2pi)+").asMatchPredicate();
|
||||||
|
|
||||||
|
private final MathClient mathClient;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchUnitConversionService(MathClient mathClient) {
|
||||||
|
this.mathClient = mathClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<String> tryConversion(String query) {
|
||||||
|
var matcher = conversionPattern.matcher(query);
|
||||||
|
if (!matcher.matches())
|
||||||
|
return Optional.empty();
|
||||||
|
|
||||||
|
String value = matcher.group(1);
|
||||||
|
String from = matcher.group(3);
|
||||||
|
String to = matcher.group(4);
|
||||||
|
|
||||||
|
logger.info("{} -> '{}' '{}' '{}'", query, value, from, to);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var resultFuture = mathClient.unitConversion(value, from, to);
|
||||||
|
return Optional.of(
|
||||||
|
resultFuture.get(250, TimeUnit.MILLISECONDS)
|
||||||
|
);
|
||||||
|
} catch (ExecutionException e) {
|
||||||
|
logger.error("Error in unit conversion", e);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
logger.error("Interrupted while waiting for unit conversion", e);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public @CheckForNull Future<String> tryEval(String query) {
|
||||||
|
if (!evalPredicate.test(query)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var expr = query.toLowerCase().trim();
|
||||||
|
|
||||||
|
if (expr.chars().allMatch(Character::isDigit)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("eval({})", expr);
|
||||||
|
|
||||||
|
return mathClient.evalMath(expr);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
# Search Service
|
||||||
|
|
||||||
|
This is the old search service that search traffic with the old GUI.
|
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 891 B After Width: | Height: | Size: 891 B |
@ -280,6 +280,7 @@ public class SearchServicePaperDoll extends AbstractModule {
|
|||||||
true,
|
true,
|
||||||
true,
|
true,
|
||||||
true,
|
true,
|
||||||
|
true,
|
||||||
SimilarDomain.LinkType.FOWARD
|
SimilarDomain.LinkType.FOWARD
|
||||||
));
|
));
|
||||||
dummyLinks.add(new SimilarDomain(
|
dummyLinks.add(new SimilarDomain(
|
||||||
@ -290,6 +291,7 @@ public class SearchServicePaperDoll extends AbstractModule {
|
|||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
true,
|
true,
|
||||||
|
true,
|
||||||
SimilarDomain.LinkType.BACKWARD
|
SimilarDomain.LinkType.BACKWARD
|
||||||
));
|
));
|
||||||
dummyLinks.add(new SimilarDomain(
|
dummyLinks.add(new SimilarDomain(
|
||||||
@ -300,6 +302,7 @@ public class SearchServicePaperDoll extends AbstractModule {
|
|||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
|
false,
|
||||||
SimilarDomain.LinkType.BIDIRECTIONAL
|
SimilarDomain.LinkType.BIDIRECTIONAL
|
||||||
));
|
));
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user