From 8d0af9548ba3ec29078c85683dac330f9289b057 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 26 Feb 2024 13:53:12 +0100 Subject: [PATCH] (search) Bot mitigation Add the ability to indicate to the search service that a request is malicious, and to poison the results by providing randomly reorered old results instead. --- .../nu/marginalia/search/SearchOperator.java | 56 +++++++++++++++---- .../search/command/SearchParameters.java | 13 +++-- .../search/svc/SearchQueryService.java | 3 +- 3 files changed, 53 insertions(+), 19 deletions(-) diff --git a/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java index d3141159..e07186b6 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java @@ -20,6 +20,7 @@ import org.slf4j.Marker; import org.slf4j.MarkerFactory; import javax.annotation.Nullable; +import java.lang.ref.WeakReference; import java.time.Duration; import java.util.*; import java.util.concurrent.Future; @@ -87,31 +88,62 @@ public class SearchOperator { return searchQueryService.getResultsFromQuery(queryResponse); } + private volatile WeakReference> oldResults = new WeakReference<>(Collections.emptyList()); + public DecoratedSearchResults doSearch(SearchParameters userParams) { Future eval = searchUnitConversionService.tryEval(userParams.query()); - var queryParams = paramFactory.forRegularSearch(userParams); - var queryResponse = queryClient.search(queryParams); - List queryResults = searchQueryService.getResultsFromQuery(queryResponse); + List clusteredResults; + QueryResponse queryResponse; + List problems; + String evalResult; + String focusDomain; - logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ',')); - logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); + if (userParams.poisonResults() && Math.random() > 0.1) { - String evalResult = getFutureOrDefault(eval, ""); + // For botnet users, we return random old query results. This is to make + // it harder for them to figure out if they are being rate limited. - List clusteredResults = SearchResultClusterer - .selectStrategy(queryResponse) - .clusterResults(queryResults, 25); + clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of())); + + // Shuffle the results to make it harder to distinguish + Collections.shuffle(clusteredResults); + + problems = List.of(); + evalResult = ""; + focusDomain = ""; + } else { + var queryParams = paramFactory.forRegularSearch(userParams); + queryResponse = queryClient.search(queryParams); + var queryResults = searchQueryService.getResultsFromQuery(queryResponse); + + logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ',')); + logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); + + evalResult = getFutureOrDefault(eval, ""); + + clusteredResults = SearchResultClusterer + .selectStrategy(queryResponse) + .clusterResults(queryResults, 25); + + focusDomain = queryResponse.domain(); + problems = getProblems(evalResult, queryResults, queryResponse); + + if (userParams.poisonResults()) { + // Save the results to feed to the botnet + oldResults = new WeakReference<>(clusteredResults); + } + } return DecoratedSearchResults.builder() .params(userParams) - .problems(getProblems(evalResult, queryResults, queryResponse)) + .problems(problems) .evalResult(evalResult) .results(clusteredResults) .filters(new SearchFilters(websiteUrl, userParams)) - .focusDomain(queryResponse.domain()) - .focusDomainId(getDomainId(queryResponse.domain())) + .focusDomain(focusDomain) + .focusDomainId(getDomainId(focusDomain)) .build(); } diff --git a/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java index fcb90f13..9c47419d 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java @@ -16,29 +16,30 @@ public record SearchParameters(String query, SearchJsParameter js, SearchRecentParameter recent, SearchTitleParameter searchTitle, - SearchAdtechParameter adtech + SearchAdtechParameter adtech, + boolean poisonResults ) { public String profileStr() { return profile.filterId; } public SearchParameters withProfile(SearchProfile profile) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withJs(SearchJsParameter js) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withAdtech(SearchAdtechParameter adtech) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withRecent(SearchRecentParameter recent) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withTitle(SearchTitleParameter title) { - return new SearchParameters(query, profile, js, recent, title, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech, poisonResults); } public String renderUrl(WebsiteUrl baseUrl) { diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java index 948b0af7..3cc9166d 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java @@ -57,7 +57,8 @@ public class SearchQueryService { SearchJsParameter.parse(request.queryParams("js")), SearchRecentParameter.parse(request.queryParams("recent")), SearchTitleParameter.parse(request.queryParams("searchTitle")), - SearchAdtechParameter.parse(request.queryParams("adtech")) + SearchAdtechParameter.parse(request.queryParams("adtech")), + "1".equals(request.headers("X-Poison-Results")) ); } catch (Exception ex) {