mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(search) Absorb SearchQueryIndexService into SearchOperator, and clean up SearchOperator
This commit is contained in:
parent
557bdaa694
commit
2db0e446cb
@ -7,14 +7,19 @@ import nu.marginalia.WebsiteUrl;
|
|||||||
import nu.marginalia.api.math.MathClient;
|
import nu.marginalia.api.math.MathClient;
|
||||||
import nu.marginalia.api.searchquery.QueryClient;
|
import nu.marginalia.api.searchquery.QueryClient;
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||||
|
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||||
|
import nu.marginalia.bbpc.BrailleBlockPunchCards;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
import nu.marginalia.search.model.ClusteredUrlDetails;
|
import nu.marginalia.search.model.ClusteredUrlDetails;
|
||||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
import nu.marginalia.search.model.SearchFilters;
|
import nu.marginalia.search.model.SearchFilters;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
import nu.marginalia.search.results.UrlDeduplicator;
|
||||||
|
import nu.marginalia.search.svc.SearchQueryCountService;
|
||||||
import nu.marginalia.search.svc.SearchUnitConversionService;
|
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||||
import org.apache.logging.log4j.util.Strings;
|
import org.apache.logging.log4j.util.Strings;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -23,9 +28,10 @@ import org.slf4j.Marker;
|
|||||||
import org.slf4j.MarkerFactory;
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.lang.ref.WeakReference;
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -41,30 +47,30 @@ public class SearchOperator {
|
|||||||
private final MathClient mathClient;
|
private final MathClient mathClient;
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
private final QueryClient queryClient;
|
private final QueryClient queryClient;
|
||||||
private final SearchQueryIndexService searchQueryService;
|
|
||||||
private final SearchQueryParamFactory paramFactory;
|
private final SearchQueryParamFactory paramFactory;
|
||||||
private final WebsiteUrl websiteUrl;
|
private final WebsiteUrl websiteUrl;
|
||||||
private final SearchUnitConversionService searchUnitConversionService;
|
private final SearchUnitConversionService searchUnitConversionService;
|
||||||
|
private final SearchQueryCountService searchVisitorCount;
|
||||||
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchOperator(MathClient mathClient,
|
public SearchOperator(MathClient mathClient,
|
||||||
DbDomainQueries domainQueries,
|
DbDomainQueries domainQueries,
|
||||||
QueryClient queryClient,
|
QueryClient queryClient,
|
||||||
SearchQueryIndexService searchQueryService,
|
|
||||||
SearchQueryParamFactory paramFactory,
|
SearchQueryParamFactory paramFactory,
|
||||||
WebsiteUrl websiteUrl,
|
WebsiteUrl websiteUrl,
|
||||||
SearchUnitConversionService searchUnitConversionService)
|
SearchUnitConversionService searchUnitConversionService,
|
||||||
|
SearchQueryCountService searchVisitorCount
|
||||||
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
this.mathClient = mathClient;
|
this.mathClient = mathClient;
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
this.queryClient = queryClient;
|
this.queryClient = queryClient;
|
||||||
|
|
||||||
this.searchQueryService = searchQueryService;
|
|
||||||
this.paramFactory = paramFactory;
|
this.paramFactory = paramFactory;
|
||||||
this.websiteUrl = websiteUrl;
|
this.websiteUrl = websiteUrl;
|
||||||
this.searchUnitConversionService = searchUnitConversionService;
|
this.searchUnitConversionService = searchUnitConversionService;
|
||||||
|
this.searchVisitorCount = searchVisitorCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<UrlDetails> doSiteSearch(String domain,
|
public List<UrlDetails> doSiteSearch(String domain,
|
||||||
@ -74,7 +80,7 @@ public class SearchOperator {
|
|||||||
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
|
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
|
||||||
var queryResponse = queryClient.search(queryParams);
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
return getResultsFromQuery(queryResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<UrlDetails> doBacklinkSearch(String domain) {
|
public List<UrlDetails> doBacklinkSearch(String domain) {
|
||||||
@ -82,63 +88,35 @@ public class SearchOperator {
|
|||||||
var queryParams = paramFactory.forBacklinkSearch(domain);
|
var queryParams = paramFactory.forBacklinkSearch(domain);
|
||||||
var queryResponse = queryClient.search(queryParams);
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
return getResultsFromQuery(queryResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<UrlDetails> doLinkSearch(String source, String dest) {
|
public List<UrlDetails> doLinkSearch(String source, String dest) {
|
||||||
var queryParams = paramFactory.forLinkSearch(source, dest);
|
var queryParams = paramFactory.forLinkSearch(source, dest);
|
||||||
var queryResponse = queryClient.search(queryParams);
|
var queryResponse = queryClient.search(queryParams);
|
||||||
|
|
||||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
return getResultsFromQuery(queryResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
private volatile WeakReference<List<ClusteredUrlDetails>> oldResults = new WeakReference<>(Collections.emptyList());
|
|
||||||
|
|
||||||
public DecoratedSearchResults doSearch(SearchParameters userParams) {
|
public DecoratedSearchResults doSearch(SearchParameters userParams) {
|
||||||
|
|
||||||
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
Future<String> eval = searchUnitConversionService.tryEval(userParams.query());
|
||||||
|
|
||||||
List<ClusteredUrlDetails> clusteredResults;
|
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||||
QueryResponse queryResponse;
|
QueryResponse queryResponse = queryClient.search(queryParams);
|
||||||
List<String> problems;
|
var queryResults = getResultsFromQuery(queryResponse);
|
||||||
String evalResult;
|
|
||||||
String focusDomain;
|
|
||||||
|
|
||||||
if (userParams.poisonResults() && Math.random() > 0.1) {
|
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||||
|
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||||
|
|
||||||
// For botnet users, we return random old query results. This is to make
|
String evalResult = getFutureOrDefault(eval, "");
|
||||||
// it harder for them to figure out if they are being rate limited.
|
|
||||||
|
|
||||||
clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of()));
|
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||||
|
.selectStrategy(queryResponse)
|
||||||
|
.clusterResults(queryResults, 25);
|
||||||
|
|
||||||
// Shuffle the results to make it harder to distinguish
|
String focusDomain = queryResponse.domain();
|
||||||
Collections.shuffle(clusteredResults);
|
List<String> problems = getProblems(evalResult, queryResults, queryResponse);
|
||||||
|
|
||||||
problems = List.of();
|
|
||||||
evalResult = "";
|
|
||||||
focusDomain = "";
|
|
||||||
} else {
|
|
||||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
|
||||||
queryResponse = queryClient.search(queryParams);
|
|
||||||
var queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
|
||||||
|
|
||||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
|
||||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
|
||||||
|
|
||||||
evalResult = getFutureOrDefault(eval, "");
|
|
||||||
|
|
||||||
clusteredResults = SearchResultClusterer
|
|
||||||
.selectStrategy(queryResponse)
|
|
||||||
.clusterResults(queryResults, 25);
|
|
||||||
|
|
||||||
focusDomain = queryResponse.domain();
|
|
||||||
problems = getProblems(evalResult, queryResults, queryResponse);
|
|
||||||
|
|
||||||
if (userParams.poisonResults()) {
|
|
||||||
// Save the results to feed to the botnet
|
|
||||||
oldResults = new WeakReference<>(clusteredResults);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return DecoratedSearchResults.builder()
|
return DecoratedSearchResults.builder()
|
||||||
.params(userParams)
|
.params(userParams)
|
||||||
@ -151,6 +129,41 @@ public class SearchOperator {
|
|||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
|
||||||
|
final QueryLimits limits = queryResponse.specs().queryLimits;
|
||||||
|
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
|
||||||
|
|
||||||
|
// Update the query count (this is what you see on the front page)
|
||||||
|
searchVisitorCount.registerQuery();
|
||||||
|
|
||||||
|
return queryResponse.results().stream()
|
||||||
|
.filter(deduplicator::shouldRetain)
|
||||||
|
.limit(limits.resultsTotal())
|
||||||
|
.map(SearchOperator::createDetails)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
|
||||||
|
return new UrlDetails(
|
||||||
|
item.documentId(),
|
||||||
|
item.domainId(),
|
||||||
|
item.url,
|
||||||
|
item.title,
|
||||||
|
item.description,
|
||||||
|
item.format,
|
||||||
|
item.features,
|
||||||
|
DomainIndexingState.ACTIVE,
|
||||||
|
item.rankingScore, // termScore
|
||||||
|
item.resultsFromDomain,
|
||||||
|
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
|
||||||
|
Long.bitCount(item.bestPositions),
|
||||||
|
item.rawIndexResult,
|
||||||
|
item.rawIndexResult.keywordScores
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private <T> T getFutureOrDefault(@Nullable Future<T> fut, T defaultValue) {
|
private <T> T getFutureOrDefault(@Nullable Future<T> fut, T defaultValue) {
|
||||||
if (fut == null || fut.isCancelled()) {
|
if (fut == null || fut.isCancelled()) {
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
@ -214,6 +227,4 @@ public class SearchOperator {
|
|||||||
return STR."\"\{term}\" could be spelled \{suggestionsStr}";
|
return STR."\"\{term}\" could be spelled \{suggestionsStr}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,62 +0,0 @@
|
|||||||
package nu.marginalia.search.svc;
|
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
|
||||||
import com.google.inject.Singleton;
|
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
|
||||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
|
||||||
import nu.marginalia.bbpc.BrailleBlockPunchCards;
|
|
||||||
import nu.marginalia.index.query.limit.QueryLimits;
|
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
|
||||||
import nu.marginalia.search.results.UrlDeduplicator;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.slf4j.Marker;
|
|
||||||
import org.slf4j.MarkerFactory;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
public class SearchQueryIndexService {
|
|
||||||
private final SearchQueryCountService searchVisitorCount;
|
|
||||||
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public SearchQueryIndexService(SearchQueryCountService searchVisitorCount) {
|
|
||||||
this.searchVisitorCount = searchVisitorCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
|
|
||||||
final QueryLimits limits = queryResponse.specs().queryLimits;
|
|
||||||
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
|
|
||||||
|
|
||||||
// Update the query count (this is what you see on the front page)
|
|
||||||
searchVisitorCount.registerQuery();
|
|
||||||
|
|
||||||
return queryResponse.results().stream()
|
|
||||||
.filter(deduplicator::shouldRetain)
|
|
||||||
.limit(limits.resultsTotal())
|
|
||||||
.map(SearchQueryIndexService::createDetails)
|
|
||||||
.toList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
|
|
||||||
return new UrlDetails(
|
|
||||||
item.documentId(),
|
|
||||||
item.domainId(),
|
|
||||||
item.url,
|
|
||||||
item.title,
|
|
||||||
item.description,
|
|
||||||
item.format,
|
|
||||||
item.features,
|
|
||||||
DomainIndexingState.ACTIVE,
|
|
||||||
item.rankingScore, // termScore
|
|
||||||
item.resultsFromDomain,
|
|
||||||
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
|
|
||||||
Long.bitCount(item.bestPositions),
|
|
||||||
item.rawIndexResult,
|
|
||||||
item.rawIndexResult.keywordScores
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user