mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(search) Bot mitigation
Add the ability to indicate to the search service that a request is malicious, and to poison the results by providing randomly reorered old results instead.
This commit is contained in:
parent
67aa20ea2c
commit
8d0af9548b
@ -20,6 +20,7 @@ import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.lang.ref.WeakReference;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Future;
|
||||
@ -87,31 +88,62 @@ public class SearchOperator {
|
||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||
}
|
||||
|
||||
private volatile WeakReference<List<ClusteredUrlDetails>> oldResults = new WeakReference<>(Collections.emptyList());
|
||||
|
||||
public DecoratedSearchResults doSearch(SearchParameters userParams) {
|
||||
|
||||
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
var queryResponse = queryClient.search(queryParams);
|
||||
|
||||
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||
List<ClusteredUrlDetails> clusteredResults;
|
||||
QueryResponse queryResponse;
|
||||
List<String> problems;
|
||||
String evalResult;
|
||||
String focusDomain;
|
||||
|
||||
if (userParams.poisonResults() && Math.random() > 0.1) {
|
||||
|
||||
// For botnet users, we return random old query results. This is to make
|
||||
// it harder for them to figure out if they are being rate limited.
|
||||
|
||||
clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of()));
|
||||
|
||||
// Shuffle the results to make it harder to distinguish
|
||||
Collections.shuffle(clusteredResults);
|
||||
|
||||
problems = List.of();
|
||||
evalResult = "";
|
||||
focusDomain = "";
|
||||
} else {
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
queryResponse = queryClient.search(queryParams);
|
||||
var queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||
|
||||
String evalResult = getFutureOrDefault(eval, "");
|
||||
evalResult = getFutureOrDefault(eval, "");
|
||||
|
||||
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||
clusteredResults = SearchResultClusterer
|
||||
.selectStrategy(queryResponse)
|
||||
.clusterResults(queryResults, 25);
|
||||
|
||||
focusDomain = queryResponse.domain();
|
||||
problems = getProblems(evalResult, queryResults, queryResponse);
|
||||
|
||||
if (userParams.poisonResults()) {
|
||||
// Save the results to feed to the botnet
|
||||
oldResults = new WeakReference<>(clusteredResults);
|
||||
}
|
||||
}
|
||||
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(userParams)
|
||||
.problems(getProblems(evalResult, queryResults, queryResponse))
|
||||
.problems(problems)
|
||||
.evalResult(evalResult)
|
||||
.results(clusteredResults)
|
||||
.filters(new SearchFilters(websiteUrl, userParams))
|
||||
.focusDomain(queryResponse.domain())
|
||||
.focusDomainId(getDomainId(queryResponse.domain()))
|
||||
.focusDomain(focusDomain)
|
||||
.focusDomainId(getDomainId(focusDomain))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
@ -16,29 +16,30 @@ public record SearchParameters(String query,
|
||||
SearchJsParameter js,
|
||||
SearchRecentParameter recent,
|
||||
SearchTitleParameter searchTitle,
|
||||
SearchAdtechParameter adtech
|
||||
SearchAdtechParameter adtech,
|
||||
boolean poisonResults
|
||||
) {
|
||||
public String profileStr() {
|
||||
return profile.filterId;
|
||||
}
|
||||
|
||||
public SearchParameters withProfile(SearchProfile profile) {
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||
}
|
||||
|
||||
public SearchParameters withJs(SearchJsParameter js) {
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||
}
|
||||
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||
}
|
||||
|
||||
public SearchParameters withRecent(SearchRecentParameter recent) {
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||
}
|
||||
|
||||
public SearchParameters withTitle(SearchTitleParameter title) {
|
||||
return new SearchParameters(query, profile, js, recent, title, adtech);
|
||||
return new SearchParameters(query, profile, js, recent, title, adtech, poisonResults);
|
||||
}
|
||||
|
||||
public String renderUrl(WebsiteUrl baseUrl) {
|
||||
|
@ -57,7 +57,8 @@ public class SearchQueryService {
|
||||
SearchJsParameter.parse(request.queryParams("js")),
|
||||
SearchRecentParameter.parse(request.queryParams("recent")),
|
||||
SearchTitleParameter.parse(request.queryParams("searchTitle")),
|
||||
SearchAdtechParameter.parse(request.queryParams("adtech"))
|
||||
SearchAdtechParameter.parse(request.queryParams("adtech")),
|
||||
"1".equals(request.headers("X-Poison-Results"))
|
||||
);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
|
Loading…
Reference in New Issue
Block a user