mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(search) Bot mitigation
Add the ability to indicate to the search service that a request is malicious, and to poison the results by providing randomly reorered old results instead.
This commit is contained in:
parent
67aa20ea2c
commit
8d0af9548b
@ -20,6 +20,7 @@ import org.slf4j.Marker;
|
|||||||
import org.slf4j.MarkerFactory;
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
import java.lang.ref.WeakReference;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
@ -87,31 +88,62 @@ public class SearchOperator {
|
|||||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private volatile WeakReference<List<ClusteredUrlDetails>> oldResults = new WeakReference<>(Collections.emptyList());
|
||||||
|
|
||||||
public DecoratedSearchResults doSearch(SearchParameters userParams) {
|
public DecoratedSearchResults doSearch(SearchParameters userParams) {
|
||||||
|
|
||||||
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
||||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
|
||||||
var queryResponse = queryClient.search(queryParams);
|
|
||||||
|
|
||||||
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
List<ClusteredUrlDetails> clusteredResults;
|
||||||
|
QueryResponse queryResponse;
|
||||||
|
List<String> problems;
|
||||||
|
String evalResult;
|
||||||
|
String focusDomain;
|
||||||
|
|
||||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
if (userParams.poisonResults() && Math.random() > 0.1) {
|
||||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
|
||||||
|
|
||||||
String evalResult = getFutureOrDefault(eval, "");
|
// For botnet users, we return random old query results. This is to make
|
||||||
|
// it harder for them to figure out if they are being rate limited.
|
||||||
|
|
||||||
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of()));
|
||||||
.selectStrategy(queryResponse)
|
|
||||||
.clusterResults(queryResults, 25);
|
// Shuffle the results to make it harder to distinguish
|
||||||
|
Collections.shuffle(clusteredResults);
|
||||||
|
|
||||||
|
problems = List.of();
|
||||||
|
evalResult = "";
|
||||||
|
focusDomain = "";
|
||||||
|
} else {
|
||||||
|
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||||
|
queryResponse = queryClient.search(queryParams);
|
||||||
|
var queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||||
|
|
||||||
|
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||||
|
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||||
|
|
||||||
|
evalResult = getFutureOrDefault(eval, "");
|
||||||
|
|
||||||
|
clusteredResults = SearchResultClusterer
|
||||||
|
.selectStrategy(queryResponse)
|
||||||
|
.clusterResults(queryResults, 25);
|
||||||
|
|
||||||
|
focusDomain = queryResponse.domain();
|
||||||
|
problems = getProblems(evalResult, queryResults, queryResponse);
|
||||||
|
|
||||||
|
if (userParams.poisonResults()) {
|
||||||
|
// Save the results to feed to the botnet
|
||||||
|
oldResults = new WeakReference<>(clusteredResults);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return DecoratedSearchResults.builder()
|
return DecoratedSearchResults.builder()
|
||||||
.params(userParams)
|
.params(userParams)
|
||||||
.problems(getProblems(evalResult, queryResults, queryResponse))
|
.problems(problems)
|
||||||
.evalResult(evalResult)
|
.evalResult(evalResult)
|
||||||
.results(clusteredResults)
|
.results(clusteredResults)
|
||||||
.filters(new SearchFilters(websiteUrl, userParams))
|
.filters(new SearchFilters(websiteUrl, userParams))
|
||||||
.focusDomain(queryResponse.domain())
|
.focusDomain(focusDomain)
|
||||||
.focusDomainId(getDomainId(queryResponse.domain()))
|
.focusDomainId(getDomainId(focusDomain))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,29 +16,30 @@ public record SearchParameters(String query,
|
|||||||
SearchJsParameter js,
|
SearchJsParameter js,
|
||||||
SearchRecentParameter recent,
|
SearchRecentParameter recent,
|
||||||
SearchTitleParameter searchTitle,
|
SearchTitleParameter searchTitle,
|
||||||
SearchAdtechParameter adtech
|
SearchAdtechParameter adtech,
|
||||||
|
boolean poisonResults
|
||||||
) {
|
) {
|
||||||
public String profileStr() {
|
public String profileStr() {
|
||||||
return profile.filterId;
|
return profile.filterId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchParameters withProfile(SearchProfile profile) {
|
public SearchParameters withProfile(SearchProfile profile) {
|
||||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchParameters withJs(SearchJsParameter js) {
|
public SearchParameters withJs(SearchJsParameter js) {
|
||||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||||
}
|
}
|
||||||
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
|
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
|
||||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchParameters withRecent(SearchRecentParameter recent) {
|
public SearchParameters withRecent(SearchRecentParameter recent) {
|
||||||
return new SearchParameters(query, profile, js, recent, searchTitle, adtech);
|
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchParameters withTitle(SearchTitleParameter title) {
|
public SearchParameters withTitle(SearchTitleParameter title) {
|
||||||
return new SearchParameters(query, profile, js, recent, title, adtech);
|
return new SearchParameters(query, profile, js, recent, title, adtech, poisonResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String renderUrl(WebsiteUrl baseUrl) {
|
public String renderUrl(WebsiteUrl baseUrl) {
|
||||||
|
@ -57,7 +57,8 @@ public class SearchQueryService {
|
|||||||
SearchJsParameter.parse(request.queryParams("js")),
|
SearchJsParameter.parse(request.queryParams("js")),
|
||||||
SearchRecentParameter.parse(request.queryParams("recent")),
|
SearchRecentParameter.parse(request.queryParams("recent")),
|
||||||
SearchTitleParameter.parse(request.queryParams("searchTitle")),
|
SearchTitleParameter.parse(request.queryParams("searchTitle")),
|
||||||
SearchAdtechParameter.parse(request.queryParams("adtech"))
|
SearchAdtechParameter.parse(request.queryParams("adtech")),
|
||||||
|
"1".equals(request.headers("X-Poison-Results"))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
|
Loading…
Reference in New Issue
Block a user