From d8956c51d06df72c163c10e52280c17acb256e41 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 9 Oct 2023 14:42:33 +0200 Subject: [PATCH] (refactor) Remove api:search-api Application services should not have an API, but purely act as clients to the core services (which should always have an API). --- .../marginalia/query/model/QueryParams.java | 15 +++ code/api/search-api/build.gradle | 33 ------ code/api/search-api/readme.md | 8 -- .../search/client/SearchClient.java | 52 -------- .../search/client/SearchMqEndpoints.java | 6 - .../api-service/build.gradle | 4 +- .../nu/marginalia/api/ApiSearchOperator.java | 111 ++++++++++++++++++ .../java/nu/marginalia/api/ApiService.java | 20 ++-- .../api}/model/ApiSearchResult.java | 2 +- .../model/ApiSearchResultQueryDetails.java | 2 +- .../api}/model/ApiSearchResults.java | 2 +- .../nu/marginalia/api/svc/ResponseCache.java | 12 +- .../marginalia/api/svc/ResponseCacheTest.java | 3 +- .../search-service/build.gradle | 1 - .../nu/marginalia/search/SearchOperator.java | 12 -- .../nu/marginalia/search/SearchService.java | 9 +- .../search/svc/SearchApiQueryService.java | 108 ----------------- .../control-service/build.gradle | 3 +- .../nu/marginalia/control/ControlService.java | 46 +------- .../actor/task/ConvertAndLoadActor.java | 5 - .../control/svc/ControlBlacklistService.java | 7 ++ .../control/svc/SearchToBanService.java | 69 +++++++++++ .../templates/control/search-to-ban.hdb | 4 +- settings.gradle | 1 - 24 files changed, 234 insertions(+), 301 deletions(-) delete mode 100644 code/api/search-api/build.gradle delete mode 100644 code/api/search-api/readme.md delete mode 100644 code/api/search-api/src/main/java/nu/marginalia/search/client/SearchClient.java delete mode 100644 code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java create mode 100644 code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java rename code/{api/search-api/src/main/java/nu/marginalia/search/client => services-application/api-service/src/main/java/nu/marginalia/api}/model/ApiSearchResult.java (89%) rename code/{api/search-api/src/main/java/nu/marginalia/search/client => services-application/api-service/src/main/java/nu/marginalia/api}/model/ApiSearchResultQueryDetails.java (83%) rename code/{api/search-api/src/main/java/nu/marginalia/search/client => services-application/api-service/src/main/java/nu/marginalia/api}/model/ApiSearchResults.java (86%) delete mode 100644 code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java create mode 100644 code/services-core/control-service/src/main/java/nu/marginalia/control/svc/SearchToBanService.java diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java index 43a55393..2a912324 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java @@ -23,4 +23,19 @@ public record QueryParams( SearchSetIdentifier identifier ) { + public QueryParams(String query, QueryLimits limits, SearchSetIdentifier identifier) { + this(query, null, + List.of(), + List.of(), + List.of(), + List.of(), + SpecificationLimit.none(), + SpecificationLimit.none(), + SpecificationLimit.none(), + SpecificationLimit.none(), + List.of(), + limits, + identifier + ); + } } diff --git a/code/api/search-api/build.gradle b/code/api/search-api/build.gradle deleted file mode 100644 index 8b0eed50..00000000 --- a/code/api/search-api/build.gradle +++ /dev/null @@ -1,33 +0,0 @@ -plugins { - id 'java' - - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(21)) - } -} - -dependencies { - implementation project(':code:common:model') - implementation project(':code:common:config') - implementation project(':code:libraries:message-queue') - implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') - - implementation libs.bundles.slf4j - - implementation libs.prometheus - implementation libs.notnull - implementation libs.guice - implementation libs.rxjava - implementation libs.gson - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito - -} diff --git a/code/api/search-api/readme.md b/code/api/search-api/readme.md deleted file mode 100644 index fafac549..00000000 --- a/code/api/search-api/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -# Search API - -Client and models for talking to the [search-service](../../services-core/search-service), -implemented with the base client from [service-client](../../common/service-client). - -## Central Classes - -* [SearchClient](src/main/java/nu/marginalia/search/client/SearchClient.java) \ No newline at end of file diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchClient.java b/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchClient.java deleted file mode 100644 index 8faef5be..00000000 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchClient.java +++ /dev/null @@ -1,52 +0,0 @@ -package nu.marginalia.search.client; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import io.reactivex.rxjava3.core.Observable; -import nu.marginalia.client.AbstractDynamicClient; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.mq.MessageQueueFactory; -import nu.marginalia.mq.outbox.MqOutbox; -import nu.marginalia.search.client.model.ApiSearchResults; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; -import nu.marginalia.WmsaHome; -import nu.marginalia.client.Context; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.CheckReturnValue; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.UUID; - -@Singleton -public class SearchClient extends AbstractDynamicClient { - private final Logger logger = LoggerFactory.getLogger(getClass()); - - private final MqOutbox outbox; - - @Inject - public SearchClient(ServiceDescriptors descriptors, - MessageQueueFactory messageQueueFactory) { - - super(descriptors.forId(ServiceId.Search), WmsaHome.getHostsFile(), GsonFactory::get); - - String inboxName = ServiceId.Search.name + ":" + "0"; - String outboxName = System.getProperty("service-name", UUID.randomUUID().toString()); - - outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID()); - - } - - - public MqOutbox outbox() { - return outbox; - } - - @CheckReturnValue - public Observable query(Context ctx, String queryString, int count, int profile) { - return this.get(ctx, String.format("/api/search?query=%s&count=%d&index=%d", URLEncoder.encode(queryString, StandardCharsets.UTF_8), count, profile), ApiSearchResults.class); - } - -} diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java b/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java deleted file mode 100644 index 1c546b3e..00000000 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/SearchMqEndpoints.java +++ /dev/null @@ -1,6 +0,0 @@ -package nu.marginalia.search.client; - -public class SearchMqEndpoints { - /** Flushes the URL caches, run if significant changes have occurred in the URLs database */ - public static final String FLUSH_CACHES = "FLUSH_CACHES"; -} diff --git a/code/services-application/api-service/build.gradle b/code/services-application/api-service/build.gradle index 4b3b3b12..bd9a4ca6 100644 --- a/code/services-application/api-service/build.gradle +++ b/code/services-application/api-service/build.gradle @@ -28,7 +28,9 @@ dependencies { implementation project(':code:common:config') implementation project(':code:common:service-discovery') implementation project(':code:common:service-client') - implementation project(':code:api:search-api') + implementation project(':code:api:query-api') + implementation project(':code:api:index-api') + implementation project(':code:features-index:index-query') implementation libs.bundles.slf4j diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java new file mode 100644 index 00000000..7dca777c --- /dev/null +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java @@ -0,0 +1,111 @@ +package nu.marginalia.api; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.api.model.ApiSearchResult; +import nu.marginalia.api.model.ApiSearchResultQueryDetails; +import nu.marginalia.api.model.ApiSearchResults; +import nu.marginalia.client.Context; +import nu.marginalia.index.client.model.query.SearchSetIdentifier; +import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.index.query.limit.QueryLimits; +import nu.marginalia.index.query.limit.SpecificationLimit; +import nu.marginalia.index.searchset.SearchSet; +import nu.marginalia.model.idx.WordMetadata; +import nu.marginalia.query.client.QueryClient; +import nu.marginalia.query.model.QueryParams; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +@Singleton +public class ApiSearchOperator { + private final QueryClient queryClient; + + @Inject + public ApiSearchOperator(QueryClient queryClient) { + this.queryClient = queryClient; + } + + public ApiSearchResults query(Context context, + String query, + int count, + int index) + { + var rsp = queryClient.search(context, createParams(query, count, index)); + + return new ApiSearchResults("RESTRICTED", query, + rsp.results() + .stream() + .map(this::convert) + .sorted(Comparator.comparing(ApiSearchResult::getQuality).reversed()) + .limit(count) + .collect(Collectors.toList())); + } + + private QueryParams createParams(String query, int count, int index) { + SearchSetIdentifier searchSet = selectSearchSet(index); + + return new QueryParams( + query, + new QueryLimits( + 2, + Math.min(100, count), + 150, + 8192), + searchSet); + } + + private SearchSetIdentifier selectSearchSet(int index) { + return switch (index) { + case 0 -> SearchSetIdentifier.NONE; + case 1 -> SearchSetIdentifier.SMALLWEB; + case 2 -> SearchSetIdentifier.RETRO; + case 3 -> SearchSetIdentifier.NONE; + case 5 -> SearchSetIdentifier.NONE; + default -> SearchSetIdentifier.NONE; + }; + } + + + + ApiSearchResult convert(DecoratedSearchResultItem url) { + List> details = new ArrayList<>(); + if (url.rawIndexResult != null) { + var bySet = url.rawIndexResult.keywordScores.stream().collect(Collectors.groupingBy(SearchResultKeywordScore::subquery)); + + outer: + for (var entries : bySet.values()) { + List lst = new ArrayList<>(); + for (var entry : entries) { + var metadata = new WordMetadata(entry.encodedWordMetadata()); + if (metadata.isEmpty()) + continue outer; + + Set flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet()); + lst.add(new ApiSearchResultQueryDetails(entry.keyword, Long.bitCount(metadata.positions()), flags)); + } + details.add(lst); + } + } + + return new ApiSearchResult( + url.url.toString(), + url.getTitle(), + url.getDescription(), + sanitizeNaN(url.rankingScore, -100), + details + ); + } + + private double sanitizeNaN(double value, double alternative) { + if (!Double.isFinite(value)) { + return alternative; + } + return value; + } +} diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java index aed2006d..ab4c7295 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java @@ -3,13 +3,13 @@ package nu.marginalia.api; import com.google.gson.Gson; import com.google.inject.Inject; import nu.marginalia.api.model.ApiLicense; +import nu.marginalia.api.model.ApiSearchResults; import nu.marginalia.api.svc.LicenseService; import nu.marginalia.api.svc.RateLimiterService; import nu.marginalia.api.svc.ResponseCache; import nu.marginalia.client.Context; import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.search.client.SearchClient; -import nu.marginalia.search.client.model.ApiSearchResults; +import nu.marginalia.query.client.QueryClient; import nu.marginalia.service.server.*; import nu.marginalia.service.server.mq.MqNotification; import org.slf4j.Logger; @@ -24,29 +24,32 @@ public class ApiService extends Service { private final Logger logger = LoggerFactory.getLogger(getClass()); private final Gson gson = GsonFactory.get(); - private final SearchClient searchClient; + private final QueryClient queryClient; private final ResponseCache responseCache; private final LicenseService licenseService; private final RateLimiterService rateLimiterService; + private final ApiSearchOperator searchOperator; // Marker for filtering out sensitive content from the persistent logs private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); @Inject public ApiService(BaseServiceParams params, - SearchClient searchClient, + QueryClient queryClient, ResponseCache responseCache, LicenseService licenseService, - RateLimiterService rateLimiterService + RateLimiterService rateLimiterService, + ApiSearchOperator searchOperator ) { super(params); - this.searchClient = searchClient; + this.queryClient = queryClient; this.responseCache = responseCache; this.licenseService = licenseService; this.rateLimiterService = rateLimiterService; + this.searchOperator = searchOperator; Spark.get("/public/api/", (rq, rsp) -> { rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi"); @@ -102,8 +105,9 @@ public class ApiService extends Service { logger.info(queryMarker, "{} Search {}", license.key, query); - return searchClient.query(Context.fromRequest(request), query, count, index) - .blockingFirst().withLicense(license.getLicense()); + return searchOperator + .query(Context.fromRequest(request), query, count, index) + .withLicense(license.getLicense()); } private int intParam(Request request, String name, int defaultValue) { diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResult.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResult.java similarity index 89% rename from code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResult.java rename to code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResult.java index bedc3046..f4e0147f 100644 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResult.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResult.java @@ -1,4 +1,4 @@ -package nu.marginalia.search.client.model; +package nu.marginalia.api.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java similarity index 83% rename from code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java rename to code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java index 431b195f..8b665fc5 100644 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java @@ -1,4 +1,4 @@ -package nu.marginalia.search.client.model; +package nu.marginalia.api.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResults.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResults.java similarity index 86% rename from code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResults.java rename to code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResults.java index 688e9e91..b1438692 100644 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResults.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResults.java @@ -1,4 +1,4 @@ -package nu.marginalia.search.client.model; +package nu.marginalia.api.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java index 032ad9b4..0f0a4a07 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java @@ -3,20 +3,12 @@ package nu.marginalia.api.svc; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.inject.Singleton; -import nu.marginalia.api.model.ApiLicense; -import nu.marginalia.search.client.model.ApiSearchResults; +import nu.marginalia.api.model.*; import java.time.Duration; import java.util.Optional; -/** This response cache exists entirely to help SearXNG with its rate limiting. - * For some reason they're hitting the API with like 5-12 identical requests. - *

- * I've submitted an issue, they were like nah mang it works fine must - * be something else ¯\_(ツ)_/¯. - *

- * So we're going to cache the API responses for a short while to mitigate the - * impact of such shotgun queries on the ratelimit. +/** This response cache exists entirely to help clients with its rate limiting. */ @Singleton public class ResponseCache { diff --git a/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java b/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java index fc257650..9cc01d9c 100644 --- a/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java +++ b/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java @@ -1,7 +1,6 @@ package nu.marginalia.api.svc; -import nu.marginalia.api.model.ApiLicense; -import nu.marginalia.search.client.model.ApiSearchResults; +import nu.marginalia.api.model.*; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/services-application/search-service/build.gradle b/code/services-application/search-service/build.gradle index 064818d1..249e6219 100644 --- a/code/services-application/search-service/build.gradle +++ b/code/services-application/search-service/build.gradle @@ -35,7 +35,6 @@ dependencies { implementation project(':code:api:assistant-api') implementation project(':code:api:query-api') implementation project(':code:api:index-api') - implementation project(':code:api:search-api') implementation project(':code:common:service-discovery') implementation project(':code:common:service-client') implementation project(':code:common:renderer') diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java index 4b67e823..9b90ddd9 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java @@ -61,18 +61,6 @@ public class SearchOperator { this.searchUnitConversionService = searchUnitConversionService; } - public List doApiSearch(Context ctx, - UserSearchParameters params) { - - // TODO: This shouldn't route through search-service! - var queryParams = paramFactory.forRegularSearch(params); - var queryResponse = queryClient.search(ctx, queryParams); - - logger.info(queryMarker, "Human terms (API): {}", Strings.join(queryResponse.searchTermsHuman(), ',')); - - return searchQueryService.getResultsFromQuery(queryResponse); - } - public List doSiteSearch(Context ctx, String domain) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java index d9263a43..af3735e1 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java @@ -24,10 +24,8 @@ public class SearchService extends Service { private final WebsiteUrl websiteUrl; private final StaticResources staticResources; - private final FileStorageService fileStorageService; private static final Logger logger = LoggerFactory.getLogger(SearchService.class); - private final ServiceEventLog eventLog; @SneakyThrows @Inject @@ -38,16 +36,12 @@ public class SearchService extends Service { SearchErrorPageService errorPageService, SearchAddToCrawlQueueService addToCrawlQueueService, SearchFlagSiteService flagSiteService, - SearchQueryService searchQueryService, - SearchApiQueryService apiQueryService, - FileStorageService fileStorageService + SearchQueryService searchQueryService ) { super(params); - this.eventLog = params.eventLog; this.websiteUrl = websiteUrl; this.staticResources = staticResources; - this.fileStorageService = fileStorageService; Spark.staticFiles.expireTime(600); @@ -55,7 +49,6 @@ public class SearchService extends Service { Gson gson = GsonFactory.get(); - Spark.get("/api/search", apiQueryService::apiSearch, gson::toJson); Spark.get("/public/search", searchQueryService::pathSearch); Spark.get("/public/site-search/:site/*", this::siteSearchRedir); Spark.get("/public/", frontPageService::render); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java deleted file mode 100644 index 37094f66..00000000 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java +++ /dev/null @@ -1,108 +0,0 @@ -package nu.marginalia.search.svc; - -import com.google.common.base.Strings; -import com.google.inject.Inject; -import lombok.SneakyThrows; -import nu.marginalia.db.DomainBlacklist; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; -import nu.marginalia.search.client.model.ApiSearchResultQueryDetails; -import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.search.SearchOperator; -import nu.marginalia.search.model.UrlDetails; -import nu.marginalia.search.client.model.ApiSearchResult; -import nu.marginalia.search.client.model.ApiSearchResults; -import nu.marginalia.search.model.SearchProfile; -import nu.marginalia.client.Context; -import nu.marginalia.search.command.SearchJsParameter; -import nu.marginalia.search.model.UserSearchParameters; -import spark.Request; -import spark.Response; - -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; - -public class SearchApiQueryService { - private SearchOperator searchOperator; - private final DomainBlacklist blacklist; - - @Inject - public SearchApiQueryService( - SearchOperator searchOperator, - DomainBlacklist blacklist - ) { - this.searchOperator = searchOperator; - this.blacklist = blacklist; - } - - @SneakyThrows - public Object apiSearch(Request request, Response response) { - - final var ctx = Context.fromRequest(request); - final String queryParam = request.queryParams("query"); - final int limit; - SearchProfile profile = SearchProfile.YOLO; - - String count = request.queryParamOrDefault("count", "20"); - limit = Integer.parseInt(count); - - String index = request.queryParamOrDefault("index", "0"); - if (!Strings.isNullOrEmpty(index)) { - profile = switch (index) { - case "0" -> SearchProfile.YOLO; - case "1" -> SearchProfile.MODERN; - case "2" -> SearchProfile.DEFAULT; - case "3" -> SearchProfile.CORPO_CLEAN; - case "5" -> SearchProfile.YOLO; - case "blogosphere" -> SearchProfile.BLOGOSPHERE; - default -> SearchProfile.CORPO_CLEAN; - }; - } - - final String humanQuery = queryParam.trim(); - - var results = searchOperator.doApiSearch(ctx, new UserSearchParameters(humanQuery, profile, SearchJsParameter.DEFAULT)); - - results.removeIf(details -> blacklist.isBlacklisted(details.domainId)); - - return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(this::convert).limit(limit).collect(Collectors.toList())); - } - - ApiSearchResult convert(UrlDetails url) { - List> details = new ArrayList<>(); - if (url.resultItem != null) { - var bySet = url.resultItem.keywordScores.stream().collect(Collectors.groupingBy(SearchResultKeywordScore::subquery)); - - outer: - for (var entries : bySet.values()) { - List lst = new ArrayList<>(); - for (var entry : entries) { - var metadata = new WordMetadata(entry.encodedWordMetadata()); - if (metadata.isEmpty()) - continue outer; - - Set flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet()); - lst.add(new ApiSearchResultQueryDetails(entry.keyword, Long.bitCount(metadata.positions()), flags)); - } - details.add(lst); - } - } - - return new ApiSearchResult( - url.url.toString(), - url.getTitle(), - url.getDescription(), - sanitizeNaN(url.getTermScore(), -100), - details - ); - } - - private double sanitizeNaN(double value, double alternative) { - if (!Double.isFinite(value)) { - return alternative; - } - return value; - } - -} diff --git a/code/services-core/control-service/build.gradle b/code/services-core/control-service/build.gradle index 373c1a6f..c1e18501 100644 --- a/code/services-core/control-service/build.gradle +++ b/code/services-core/control-service/build.gradle @@ -32,11 +32,12 @@ dependencies { implementation project(':code:libraries:message-queue') implementation project(':code:common:service-discovery') implementation project(':code:common:service-client') - implementation project(':code:api:search-api') implementation project(':code:api:index-api') + implementation project(':code:api:query-api') implementation project(':code:api:process-mqapi') implementation project(':code:features-search:screenshots') implementation project(':code:features-index:index-journal') + implementation project(':code:features-index:index-query') implementation project(':code:process-models:crawl-spec') implementation libs.bundles.slf4j diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java index 305c74f0..dcd7496b 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java @@ -3,7 +3,6 @@ package nu.marginalia.control; import com.google.gson.Gson; import com.google.inject.Inject; import gnu.trove.list.array.TIntArrayList; -import nu.marginalia.client.Context; import nu.marginalia.client.ServiceMonitors; import nu.marginalia.control.actor.Actor; import nu.marginalia.control.model.*; @@ -11,11 +10,9 @@ import nu.marginalia.control.svc.*; import nu.marginalia.db.storage.model.FileStorageId; import nu.marginalia.db.storage.model.FileStorageType; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.screenshot.ScreenshotService; -import nu.marginalia.search.client.SearchClient; import nu.marginalia.service.server.*; import org.eclipse.jetty.util.StringUtil; import org.slf4j.Logger; @@ -40,8 +37,8 @@ public class ControlService extends Service { private final ApiKeyService apiKeyService; private final DomainComplaintService domainComplaintService; private final ControlBlacklistService blacklistService; + private final SearchToBanService searchToBanService; private final RandomExplorationService randomExplorationService; - private final SearchClient searchClient; private final ControlActorService controlActorService; private final StaticResources staticResources; private final MessageQueueService messageQueueService; @@ -63,8 +60,8 @@ public class ControlService extends Service { ControlBlacklistService blacklistService, ControlActionsService controlActionsService, ScreenshotService screenshotService, - RandomExplorationService randomExplorationService, - SearchClient searchClient + SearchToBanService searchToBanService, + RandomExplorationService randomExplorationService ) throws IOException { super(params); @@ -74,8 +71,8 @@ public class ControlService extends Service { this.apiKeyService = apiKeyService; this.domainComplaintService = domainComplaintService; this.blacklistService = blacklistService; + this.searchToBanService = searchToBanService; this.randomExplorationService = randomExplorationService; - this.searchClient = searchClient; var indexRenderer = rendererFactory.renderer("control/index"); var eventsRenderer = rendererFactory.renderer("control/events"); @@ -176,8 +173,8 @@ public class ControlService extends Service { Spark.get("/public/blacklist", this::blacklistModel, blacklistRenderer::render); Spark.post("/public/blacklist", this::updateBlacklist, redirectToBlacklist); - Spark.get("/public/search-to-ban", this::searchToBanModel, searchToBanRenderer::render); - Spark.post("/public/search-to-ban", this::searchToBanModel, searchToBanRenderer::render); + Spark.get("/public/search-to-ban", searchToBanService::handle, searchToBanRenderer::render); + Spark.post("/public/search-to-ban", searchToBanService::handle, searchToBanRenderer::render); // API Keys @@ -252,37 +249,6 @@ public class ControlService extends Service { return Map.of("blacklist", blacklistService.lastNAdditions(100)); } - private Object searchToBanModel(Request request, Response response) { - String q = request.queryParams("q"); - - if (Objects.equals(request.requestMethod(), "POST")) { - request.params().forEach((k,v) -> System.out.println(k + " -- " + v)); - List bannedUrls = new ArrayList<>(); - - String query = request.queryParams("query"); - for (var param : request.queryParams()) { - if ("query".equals(param)) { - continue; - } - EdgeUrl.parse(param).ifPresent(url -> - blacklistService.addToBlacklist(url.domain, query) - ); - bannedUrls.add(param); - } - - request.queryParams().forEach(System.out::println); - q = query; - } - - if (q == null || q.isBlank()) { - return Map.of(); - } else { - return searchClient - .query(Context.fromRequest(request), q, 200, 5) - .blockingFirst(); - } - } - private Object updateBlacklist(Request request, Response response) { var domain = new EdgeDomain(request.queryParams("domain")); if ("add".equals(request.queryParams("act"))) { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java index a2789be3..2525656c 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/task/ConvertAndLoadActor.java @@ -26,8 +26,6 @@ import nu.marginalia.mq.outbox.MqOutbox; import nu.marginalia.actor.prototype.AbstractActorPrototype; import nu.marginalia.actor.state.ActorState; import nu.marginalia.actor.state.ActorResumeBehavior; -import nu.marginalia.search.client.SearchClient; -import nu.marginalia.search.client.SearchMqEndpoints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,7 +55,6 @@ public class ConvertAndLoadActor extends AbstractActorPrototype { private final MqOutbox mqLoaderOutbox; private final MqOutbox mqIndexConstructorOutbox; private final MqOutbox indexOutbox; - private final MqOutbox searchOutbox; private final FileStorageService storageService; private final BackupService backupService; private final Gson gson; @@ -83,7 +80,6 @@ public class ConvertAndLoadActor extends AbstractActorPrototype { ProcessOutboxes processOutboxes, FileStorageService storageService, IndexClient indexClient, - SearchClient searchClient, BackupService backupService, Gson gson ) @@ -91,7 +87,6 @@ public class ConvertAndLoadActor extends AbstractActorPrototype { super(stateFactory); this.processWatcher = processWatcher; this.indexOutbox = indexClient.outbox(); - this.searchOutbox = searchClient.outbox(); this.mqConverterOutbox = processOutboxes.getConverterOutbox(); this.mqLoaderOutbox = processOutboxes.getLoaderOutbox(); this.mqIndexConstructorOutbox = processOutboxes.getIndexConstructorOutbox(); diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/ControlBlacklistService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/ControlBlacklistService.java index d23a06e2..5d692f52 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/ControlBlacklistService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/ControlBlacklistService.java @@ -4,6 +4,8 @@ import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.model.BlacklistedDomainModel; import nu.marginalia.model.EdgeDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.sql.SQLException; import java.util.ArrayList; @@ -12,6 +14,7 @@ import java.util.List; public class ControlBlacklistService { private final HikariDataSource dataSource; + private final Logger logger = LoggerFactory.getLogger(getClass()); @Inject public ControlBlacklistService(HikariDataSource dataSource) { @@ -19,6 +22,8 @@ public class ControlBlacklistService { } public void addToBlacklist(EdgeDomain domain, String comment) { + logger.info("Blacklisting {} -- {}", domain, comment); + try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" INSERT IGNORE INTO EC_DOMAIN_BLACKLIST (URL_DOMAIN, COMMENT) VALUES (?, ?) @@ -33,6 +38,8 @@ public class ControlBlacklistService { } public void removeFromBlacklist(EdgeDomain domain) { + logger.info("Un-blacklisting {}", domain); + try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" DELETE FROM EC_DOMAIN_BLACKLIST WHERE URL_DOMAIN=? diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/SearchToBanService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/SearchToBanService.java new file mode 100644 index 00000000..c4fb4e2c --- /dev/null +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/svc/SearchToBanService.java @@ -0,0 +1,69 @@ +package nu.marginalia.control.svc; + +import com.google.inject.Inject; +import nu.marginalia.client.Context; +import nu.marginalia.index.client.model.query.SearchSetIdentifier; +import nu.marginalia.index.query.limit.QueryLimits; +import nu.marginalia.model.EdgeUrl; +import nu.marginalia.query.client.QueryClient; +import nu.marginalia.query.model.QueryParams; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; + +import java.util.Map; +import java.util.Objects; + +public class SearchToBanService { + private final ControlBlacklistService blacklistService; + private final QueryClient queryClient; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public SearchToBanService(ControlBlacklistService blacklistService, + QueryClient queryClient) + { + + this.blacklistService = blacklistService; + this.queryClient = queryClient; + } + + public Object handle(Request request, Response response) { + if (Objects.equals(request.requestMethod(), "POST")) { + executeBlacklisting(request); + + return findResults(Context.fromRequest(request), request.queryParams("query")); + } + + return findResults(Context.fromRequest(request), request.queryParams("q")); + } + + private Object findResults(Context ctx, String q) { + if (q == null || q.isBlank()) { + return Map.of(); + } else { + return executeQuery(ctx, q); + } + } + + private void executeBlacklisting(Request request) { + String query = request.queryParams("query"); + for (var param : request.queryParams()) { + logger.info(param + ": " + request.queryParams(param)); + if ("query".equals(param)) { + continue; + } + EdgeUrl.parse(param).ifPresent(url -> + blacklistService.addToBlacklist(url.domain, query) + ); + } + } + + private Object executeQuery(Context ctx, String query) { + return queryClient.search(ctx, new QueryParams( + query, new QueryLimits(2, 200, 250, 8192), + SearchSetIdentifier.NONE + )); + } +} diff --git a/code/services-core/control-service/src/main/resources/templates/control/search-to-ban.hdb b/code/services-core/control-service/src/main/resources/templates/control/search-to-ban.hdb index 68fd94ef..371f9fe3 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/search-to-ban.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/search-to-ban.hdb @@ -11,7 +11,7 @@

Search to Ban


- +
{{#unless results}}

This utility lets you use the search engine to find spammy results, and ban them @@ -21,7 +21,7 @@


- + {{#each results}} diff --git a/settings.gradle b/settings.gradle index 29e76e76..1e9087af 100644 --- a/settings.gradle +++ b/settings.gradle @@ -45,7 +45,6 @@ include 'code:features-index:index-forward' include 'code:features-index:index-reverse' include 'code:features-index:domain-ranking' -include 'code:api:search-api' include 'code:api:query-api' include 'code:api:index-api' include 'code:api:assistant-api'
{{title}}