(search) Absorb SearchQueryIndexService into SearchOperator, and clean up SearchOperator

This commit is contained in:
Viktor Lofgren 2024-08-22 11:49:29 +02:00
parent 557bdaa694
commit 2db0e446cb
2 changed files with 63 additions and 114 deletions

View File

@ -7,14 +7,19 @@ import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.math.MathClient;
import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.bbpc.BrailleBlockPunchCards;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.SearchFilters;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.results.UrlDeduplicator;
import nu.marginalia.search.svc.SearchQueryCountService;
import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings;
import org.slf4j.Logger;
@ -23,9 +28,10 @@ import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import javax.annotation.Nullable;
import java.lang.ref.WeakReference;
import java.time.Duration;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@ -41,30 +47,30 @@ public class SearchOperator {
private final MathClient mathClient;
private final DbDomainQueries domainQueries;
private final QueryClient queryClient;
private final SearchQueryIndexService searchQueryService;
private final SearchQueryParamFactory paramFactory;
private final WebsiteUrl websiteUrl;
private final SearchUnitConversionService searchUnitConversionService;
private final SearchQueryCountService searchVisitorCount;
@Inject
public SearchOperator(MathClient mathClient,
DbDomainQueries domainQueries,
QueryClient queryClient,
SearchQueryIndexService searchQueryService,
SearchQueryParamFactory paramFactory,
WebsiteUrl websiteUrl,
SearchUnitConversionService searchUnitConversionService)
SearchUnitConversionService searchUnitConversionService,
SearchQueryCountService searchVisitorCount
)
{
this.mathClient = mathClient;
this.domainQueries = domainQueries;
this.queryClient = queryClient;
this.searchQueryService = searchQueryService;
this.paramFactory = paramFactory;
this.websiteUrl = websiteUrl;
this.searchUnitConversionService = searchUnitConversionService;
this.searchVisitorCount = searchVisitorCount;
}
public List<UrlDetails> doSiteSearch(String domain,
@ -74,7 +80,7 @@ public class SearchOperator {
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
var queryResponse = queryClient.search(queryParams);
return searchQueryService.getResultsFromQuery(queryResponse);
return getResultsFromQuery(queryResponse);
}
public List<UrlDetails> doBacklinkSearch(String domain) {
@ -82,63 +88,35 @@ public class SearchOperator {
var queryParams = paramFactory.forBacklinkSearch(domain);
var queryResponse = queryClient.search(queryParams);
return searchQueryService.getResultsFromQuery(queryResponse);
return getResultsFromQuery(queryResponse);
}
public List<UrlDetails> doLinkSearch(String source, String dest) {
var queryParams = paramFactory.forLinkSearch(source, dest);
var queryResponse = queryClient.search(queryParams);
return searchQueryService.getResultsFromQuery(queryResponse);
return getResultsFromQuery(queryResponse);
}
private volatile WeakReference<List<ClusteredUrlDetails>> oldResults = new WeakReference<>(Collections.emptyList());
public DecoratedSearchResults doSearch(SearchParameters userParams) {
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
List<ClusteredUrlDetails> clusteredResults;
QueryResponse queryResponse;
List<String> problems;
String evalResult;
String focusDomain;
if (userParams.poisonResults() && Math.random() > 0.1) {
// For botnet users, we return random old query results. This is to make
// it harder for them to figure out if they are being rate limited.
clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of()));
// Shuffle the results to make it harder to distinguish
Collections.shuffle(clusteredResults);
problems = List.of();
evalResult = "";
focusDomain = "";
} else {
var queryParams = paramFactory.forRegularSearch(userParams);
queryResponse = queryClient.search(queryParams);
var queryResults = searchQueryService.getResultsFromQuery(queryResponse);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse);
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
evalResult = getFutureOrDefault(eval, "");
String evalResult = getFutureOrDefault(eval, "");
clusteredResults = SearchResultClusterer
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.selectStrategy(queryResponse)
.clusterResults(queryResults, 25);
focusDomain = queryResponse.domain();
problems = getProblems(evalResult, queryResults, queryResponse);
if (userParams.poisonResults()) {
// Save the results to feed to the botnet
oldResults = new WeakReference<>(clusteredResults);
}
}
String focusDomain = queryResponse.domain();
List<String> problems = getProblems(evalResult, queryResults, queryResponse);
return DecoratedSearchResults.builder()
.params(userParams)
@ -151,6 +129,41 @@ public class SearchOperator {
.build();
}
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
final QueryLimits limits = queryResponse.specs().queryLimits;
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
// Update the query count (this is what you see on the front page)
searchVisitorCount.registerQuery();
return queryResponse.results().stream()
.filter(deduplicator::shouldRetain)
.limit(limits.resultsTotal())
.map(SearchOperator::createDetails)
.toList();
}
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
return new UrlDetails(
item.documentId(),
item.domainId(),
item.url,
item.title,
item.description,
item.format,
item.features,
DomainIndexingState.ACTIVE,
item.rankingScore, // termScore
item.resultsFromDomain,
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
Long.bitCount(item.bestPositions),
item.rawIndexResult,
item.rawIndexResult.keywordScores
);
}
private <T> T getFutureOrDefault(@Nullable Future<T> fut, T defaultValue) {
if (fut == null || fut.isCancelled()) {
return defaultValue;
@ -214,6 +227,4 @@ public class SearchOperator {
return STR."\"\{term}\" could be spelled \{suggestionsStr}";
}
}

View File

@ -1,62 +0,0 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.bbpc.BrailleBlockPunchCards;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.results.UrlDeduplicator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.util.List;
@Singleton
public class SearchQueryIndexService {
private final SearchQueryCountService searchVisitorCount;
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public SearchQueryIndexService(SearchQueryCountService searchVisitorCount) {
this.searchVisitorCount = searchVisitorCount;
}
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
final QueryLimits limits = queryResponse.specs().queryLimits;
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
// Update the query count (this is what you see on the front page)
searchVisitorCount.registerQuery();
return queryResponse.results().stream()
.filter(deduplicator::shouldRetain)
.limit(limits.resultsTotal())
.map(SearchQueryIndexService::createDetails)
.toList();
}
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
return new UrlDetails(
item.documentId(),
item.domainId(),
item.url,
item.title,
item.description,
item.format,
item.features,
DomainIndexingState.ACTIVE,
item.rankingScore, // termScore
item.resultsFromDomain,
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
Long.bitCount(item.bestPositions),
item.rawIndexResult,
item.rawIndexResult.keywordScores
);
}
}