(search) Restrict site-search by passing domain id along with the site:-term

This will help these queries deal with domains that do not have a subdomain so that they do not drag up subdomains as well, as they are also given the special site:-keyword for their corresponding parent domain.
This commit is contained in:
Viktor Lofgren 2024-07-30 21:41:07 +02:00
parent ec600b967d
commit f19148132a
3 changed files with 16 additions and 11 deletions

View File

@ -6,11 +6,14 @@ import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.math.MathClient;
import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.*;
import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.SearchFilters;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings;
@ -65,9 +68,10 @@ public class SearchOperator {
}
public List<UrlDetails> doSiteSearch(String domain,
int domainId,
int count) {
var queryParams = paramFactory.forSiteSearch(domain, count);
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
var queryResponse = queryClient.search(queryParams);
return searchQueryService.getResultsFromQuery(queryResponse);

View File

@ -1,12 +1,12 @@
package nu.marginalia.search;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.search.command.SearchParameters;
import java.util.List;
@ -42,7 +42,7 @@ public class SearchQueryParamFactory {
}
public QueryParams forSiteSearch(String domain, int count) {
public QueryParams forSiteSearch(String domain, int domainId, int count) {
return new QueryParams("site:"+domain,
null,
List.of(),
@ -53,7 +53,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
List.of(domainId),
new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,

View File

@ -5,13 +5,13 @@ import nu.marginalia.api.domains.DomainInfoClient;
import nu.marginalia.api.domains.model.DomainInformation;
import nu.marginalia.api.domains.model.SimilarDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.feedlot.model.FeedItems;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import org.slf4j.Logger;
@ -153,7 +153,7 @@ public class SearchSiteInfoService {
linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25);
}
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, 5);
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
}
@ -195,9 +195,10 @@ public class SearchSiteInfoService {
}
private Docs listDocs(String domainName) {
int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
return new Docs(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doSiteSearch(domainName, 100));
searchOperator.doSiteSearch(domainName, domainId, 100));
}
public record Docs(Map<String, Boolean> view,