(search) Restrict site-search by passing domain id along with the site:-term

This will help these queries deal with domains that do not have a subdomain so that they do not drag up subdomains as well, as they are also given the special site:-keyword for their corresponding parent domain.
This commit is contained in:
Viktor Lofgren 2024-07-30 21:41:07 +02:00
parent ec600b967d
commit f19148132a
3 changed files with 16 additions and 11 deletions

View File

@ -6,11 +6,14 @@ import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl; import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.math.MathClient; import nu.marginalia.api.math.MathClient;
import nu.marginalia.api.searchquery.QueryClient; import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.api.searchquery.model.query.QueryResponse; import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.*; import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.SearchFilters;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchQueryIndexService; import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.svc.SearchUnitConversionService; import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings; import org.apache.logging.log4j.util.Strings;
@ -65,9 +68,10 @@ public class SearchOperator {
} }
public List<UrlDetails> doSiteSearch(String domain, public List<UrlDetails> doSiteSearch(String domain,
int domainId,
int count) { int count) {
var queryParams = paramFactory.forSiteSearch(domain, count); var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
var queryResponse = queryClient.search(queryParams); var queryResponse = queryClient.search(queryParams);
return searchQueryService.getResultsFromQuery(queryResponse); return searchQueryService.getResultsFromQuery(queryResponse);

View File

@ -1,12 +1,12 @@
package nu.marginalia.search; package nu.marginalia.search;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier; import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.command.SearchParameters;
import java.util.List; import java.util.List;
@ -42,7 +42,7 @@ public class SearchQueryParamFactory {
} }
public QueryParams forSiteSearch(String domain, int count) { public QueryParams forSiteSearch(String domain, int domainId, int count) {
return new QueryParams("site:"+domain, return new QueryParams("site:"+domain,
null, null,
List.of(), List.of(),
@ -53,7 +53,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
List.of(), List.of(domainId),
new QueryLimits(count, count, 100, 512), new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE.name(), SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO, QueryStrategy.AUTO,

View File

@ -5,13 +5,13 @@ import nu.marginalia.api.domains.DomainInfoClient;
import nu.marginalia.api.domains.model.DomainInformation; import nu.marginalia.api.domains.model.DomainInformation;
import nu.marginalia.api.domains.model.SimilarDomain; import nu.marginalia.api.domains.model.SimilarDomain;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.feedlot.model.FeedItems; import nu.marginalia.feedlot.model.FeedItems;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory; import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService; import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.search.SearchOperator; import nu.marginalia.search.SearchOperator;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData; import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -153,7 +153,7 @@ public class SearchSiteInfoService {
linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25); linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25);
} }
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, 5); List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
if (!sampleResults.isEmpty()) { if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString(); url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
} }
@ -195,9 +195,10 @@ public class SearchSiteInfoService {
} }
private Docs listDocs(String domainName) { private Docs listDocs(String domainName) {
int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
return new Docs(domainName, return new Docs(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doSiteSearch(domainName, 100)); searchOperator.doSiteSearch(domainName, domainId, 100));
} }
public record Docs(Map<String, Boolean> view, public record Docs(Map<String, Boolean> view,