(query-service) Move query parsing from search-service to the new query service.

This commit is contained in:
Viktor Lofgren 2023-10-09 13:27:44 +02:00
parent 94c882af7d
commit 97e17282ab
24 changed files with 294 additions and 187 deletions

View File

@ -14,6 +14,7 @@ dependencies {
implementation project(':code:api:index-api') implementation project(':code:api:index-api')
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:libraries:message-queue') implementation project(':code:libraries:message-queue')
implementation project(':code:features-index:index-query')
implementation project(':code:common:service-discovery') implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client') implementation project(':code:common:service-client')

View File

@ -12,6 +12,8 @@ import nu.marginalia.index.client.model.results.SearchResultSet;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.mq.MessageQueueFactory; import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.outbox.MqOutbox; import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.service.descriptor.ServiceDescriptors; import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.id.ServiceId;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -24,6 +26,7 @@ import java.util.UUID;
public class QueryClient extends AbstractDynamicClient { public class QueryClient extends AbstractDynamicClient {
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register(); private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
@ -49,6 +52,12 @@ public class QueryClient extends AbstractDynamicClient {
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst() () -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
); );
} }
@CheckReturnValue
public QueryResponse search(Context ctx, QueryParams params) {
return wmsa_search_index_api_search_time.time(
() -> this.postGet(ctx, "/search/", params, QueryResponse.class).blockingFirst()
);
}
public MqOutbox outbox() { public MqOutbox outbox() {
return outbox; return outbox;
} }

View File

@ -0,0 +1,21 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSpecification;
import java.util.*;
public class ProcessedQuery {
public final SearchSpecification specs;
public final List<String> searchTermsHuman;
public final String domain;
public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
this.specs = specs;
this.searchTermsHuman = searchTermsHuman;
this.domain = domain;
}
public ProcessedQuery(SearchSpecification justSpecs) {
this(justSpecs, List.of(), null);
}
}

View File

@ -0,0 +1,26 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import java.util.List;
public record QueryParams(
String humanQuery,
String nearDomain,
List<String> tacitIncludes,
List<String> tacitExcludes,
List<String> tacitPriority,
List<String> tacitAdvice,
SpecificationLimit quality,
SpecificationLimit year,
SpecificationLimit size,
SpecificationLimit rank,
List<Integer> domainIds,
QueryLimits limits,
SearchSetIdentifier identifier
)
{
}

View File

@ -0,0 +1,23 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public record QueryResponse(SearchSpecification specs,
List<DecoratedSearchResultItem> results,
List<String> searchTermsHuman,
List<String> problems,
String domain)
{
public Set<String> getAllKeywords() {
Set<String> keywords = new HashSet<>(100);
for (var sq : specs.subqueries) {
keywords.addAll(sq.searchTermsInclude);
}
return keywords;
}
}

View File

@ -1,4 +1,7 @@
package nu.marginalia.index.query.limit; package nu.marginalia.index.query.limit;
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) { public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
public QueryLimits forSingleDomain() {
return new QueryLimits(resultsTotal, resultsTotal, timeoutMs, fetchSize);
}
} }

View File

@ -36,7 +36,6 @@ public class Token {
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break; case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
case PRIORTY_TERM: visitor.onPriorityTerm(this); break; case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
case ADVICE_TERM: visitor.onAdviceTerm(this); break; case ADVICE_TERM: visitor.onAdviceTerm(this); break;
case NEAR_TERM: visitor.onNearTerm(this); break;
case LITERAL_TERM: visitor.onLiteralTerm(this); break; case LITERAL_TERM: visitor.onLiteralTerm(this); break;
case YEAR_TERM: visitor.onYearTerm(this); break; case YEAR_TERM: visitor.onYearTerm(this); break;

View File

@ -6,8 +6,6 @@ public interface TokenVisitor {
void onExcludeTerm(Token token); void onExcludeTerm(Token token);
void onPriorityTerm(Token token); void onPriorityTerm(Token token);
void onAdviceTerm(Token token); void onAdviceTerm(Token token);
void onNearTerm(Token token);
void onYearTerm(Token token); void onYearTerm(Token token);
void onSizeTerm(Token token); void onSizeTerm(Token token);
void onRankTerm(Token token); void onRankTerm(Token token);

View File

@ -28,7 +28,12 @@ dependencies {
implementation project(':code:common:service') implementation project(':code:common:service')
implementation project(':code:common:service-client') implementation project(':code:common:service-client')
implementation project(':code:api:index-api') implementation project(':code:api:index-api')
implementation project(':code:api:query-api')
implementation project(':code:common:service-discovery') implementation project(':code:common:service-discovery')
implementation project(':code:features-search:query-parser')
implementation project(':code:features-index:index-query')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation libs.bundles.slf4j implementation libs.bundles.slf4j
@ -39,6 +44,7 @@ dependencies {
implementation libs.guice implementation libs.guice
implementation libs.protobuf implementation libs.protobuf
implementation libs.rxjava implementation libs.rxjava
implementation libs.bundles.mariadb
testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit testImplementation libs.bundles.junit

View File

@ -2,10 +2,13 @@ package nu.marginalia.query;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.inject.AbstractModule; import com.google.inject.AbstractModule;
import nu.marginalia.LanguageModels;
import nu.marginalia.WmsaHome;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
public class QueryModule extends AbstractModule { public class QueryModule extends AbstractModule {
public void configure() { public void configure() {
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
bind(Gson.class).toProvider(GsonFactory::get); bind(Gson.class).toProvider(GsonFactory::get);
} }
} }

View File

@ -6,27 +6,54 @@ import nu.marginalia.client.Context;
import nu.marginalia.index.client.IndexClient; import nu.marginalia.index.client.IndexClient;
import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.SearchResultSet; import nu.marginalia.index.client.model.results.SearchResultSet;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.query.svc.QueryFactory;
import nu.marginalia.service.server.BaseServiceParams; import nu.marginalia.service.server.BaseServiceParams;
import nu.marginalia.service.server.Service; import nu.marginalia.service.server.Service;
import spark.Request; import spark.Request;
import spark.Response; import spark.Response;
import spark.Spark; import spark.Spark;
import java.util.List;
public class QueryService extends Service { public class QueryService extends Service {
private final IndexClient indexClient; private final IndexClient indexClient;
private final Gson gson; private final Gson gson;
private final QueryFactory queryFactory;
@Inject @Inject
public QueryService(BaseServiceParams params, public QueryService(BaseServiceParams params,
IndexClient indexClient, IndexClient indexClient,
Gson gson) Gson gson,
QueryFactory queryFactory)
{ {
super(params); super(params);
this.indexClient = indexClient; this.indexClient = indexClient;
this.gson = gson; this.gson = gson;
this.queryFactory = queryFactory;
Spark.post("/delegate/", this::delegateToIndex, gson::toJson); Spark.post("/delegate/", this::delegateToIndex, gson::toJson);
Spark.post("/search/", this::search, gson::toJson);
}
private Object search(Request request, Response response) {
String json = request.body();
QueryParams params = gson.fromJson(json, QueryParams.class);
var query = queryFactory.createQuery(params);
var rsp = executeQuery(Context.fromRequest(request), query.specs);
response.type("application/json");
return new QueryResponse(
query.specs,
rsp.results,
query.searchTermsHuman,
List.of(),
query.domain
);
} }
private SearchResultSet delegateToIndex(Request request, Response response) { private SearchResultSet delegateToIndex(Request request, Response response) {
@ -35,7 +62,10 @@ public class QueryService extends Service {
response.type("application/json"); response.type("application/json");
return indexClient.query(Context.fromRequest(request), specsSet); return executeQuery(Context.fromRequest(request), specsSet);
} }
private SearchResultSet executeQuery(Context ctx, SearchSpecification query) {
return indexClient.query(ctx, query);
}
} }

View File

@ -1,4 +1,4 @@
package nu.marginalia.search.query; package nu.marginalia.query.svc;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
@ -6,34 +6,29 @@ import nu.marginalia.LanguageModels;
import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.language.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.language.WordPatterns;
import nu.marginalia.ngrams.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.ProcessedQuery;
import nu.marginalia.query_parser.QueryParser; import nu.marginalia.query_parser.QueryParser;
import nu.marginalia.query_parser.QueryPermutation; import nu.marginalia.query_parser.QueryPermutation;
import nu.marginalia.query_parser.QueryVariants; import nu.marginalia.query_parser.QueryVariants;
import nu.marginalia.query_parser.token.Token; import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType; import nu.marginalia.query_parser.token.TokenType;
import nu.marginalia.search.db.DbNearDomainsQuery; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.query.model.SearchQuery;
import nu.marginalia.search.query.model.UserSearchParameters;
import nu.marginalia.language.WordPatterns;
import org.eclipse.jetty.http.HttpStatus;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import spark.Spark;
import java.util.*; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@Singleton @Singleton
public class QueryFactory { public class QueryFactory {
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final DbNearDomainsQuery dbNearDomainsQuery;
private static final int RETAIN_QUERY_VARIANT_COUNT = 5; private static final int RETAIN_QUERY_VARIANT_COUNT = 5;
private final ThreadLocal<QueryVariants> queryVariants; private final ThreadLocal<QueryVariants> queryVariants;
@ -45,10 +40,7 @@ public class QueryFactory {
public QueryFactory(LanguageModels lm, public QueryFactory(LanguageModels lm,
TermFrequencyDict dict, TermFrequencyDict dict,
EnglishDictionary englishDictionary, EnglishDictionary englishDictionary,
NGramBloomFilter nGramBloomFilter, NGramBloomFilter nGramBloomFilter) {
DbNearDomainsQuery dbNearDomainsQuery) {
this.dbNearDomainsQuery = dbNearDomainsQuery;
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary)); this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary));
} }
@ -60,7 +52,7 @@ public class QueryFactory {
return new QueryPermutation(queryVariants.get()); return new QueryPermutation(queryVariants.get());
} }
public SearchQuery createQuery(UserSearchParameters params) { public ProcessedQuery createQuery(QueryParams params) {
final var processedQuery = createQuery(getQueryPermutation(), params); final var processedQuery = createQuery(getQueryPermutation(), params);
final List<SearchSubquery> subqueries = processedQuery.specs.subqueries; final List<SearchSubquery> subqueries = processedQuery.specs.subqueries;
@ -72,59 +64,25 @@ public class QueryFactory {
return processedQuery; return processedQuery;
} }
public SearchQuery createQuery(SearchProfile profile,
int limitPerDomain,
int limitTotal,
String... termsInclude)
{
List<SearchSubquery> sqs = new ArrayList<>();
sqs.add(new SearchSubquery(
Arrays.asList(termsInclude),
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList()));
var specs = SearchSpecification.builder()
.subqueries(sqs)
.domains(Collections.emptyList())
.searchSetIdentifier(profile.searchSetIdentifier)
.queryLimits(new QueryLimits(limitPerDomain, limitTotal, 250, 8192))
.humanQuery("")
.year(SpecificationLimit.none())
.size(SpecificationLimit.none())
.rank(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults())
.quality(SpecificationLimit.none())
.queryStrategy(QueryStrategy.AUTO)
.build();
return new SearchQuery(specs);
}
private void trimArray(List<?> arr, int maxSize) { private void trimArray(List<?> arr, int maxSize) {
if (arr.size() > maxSize) { if (arr.size() > maxSize) {
arr.subList(0, arr.size() - maxSize).clear(); arr.subList(0, arr.size() - maxSize).clear();
} }
} }
public SearchQuery createQuery(QueryPermutation queryPermutation, public ProcessedQuery createQuery(QueryPermutation queryPermutation,
UserSearchParameters params) QueryParams params)
{ {
final var query = params.humanQuery(); final var query = params.humanQuery();
final var profile = params.profile();
if (query.length() > 1000) { if (query.length() > 1000) {
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man"); throw new IllegalArgumentException("Query too long");
} }
List<String> searchTermsHuman = new ArrayList<>(); List<String> searchTermsHuman = new ArrayList<>();
List<String> problems = new ArrayList<>(); List<String> problems = new ArrayList<>();
String domain = null;
String near = null,
domain = null;
var basicQuery = queryParser.parse(query); var basicQuery = queryParser.parse(query);
@ -134,7 +92,7 @@ public class QueryFactory {
} }
QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(profile); QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(params);
for (Token t : basicQuery) { for (Token t : basicQuery) {
if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) { if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) {
@ -153,50 +111,46 @@ public class QueryFactory {
List<SearchSubquery> subqueries = new ArrayList<>(); List<SearchSubquery> subqueries = new ArrayList<>();
for (var parts : queryPermutations) { for (var parts : queryPermutations) {
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(profile, parts); QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(parts);
SearchSubquery subquery = termsAccumulator.createSubquery(); SearchSubquery subquery = termsAccumulator.createSubquery();
near = termsAccumulator.near;
domain = termsAccumulator.domain; domain = termsAccumulator.domain;
params.profile().addTacitTerms(subquery);
params.jsSetting().addTacitTerms(subquery);
subqueries.add(subquery); subqueries.add(subquery);
} }
List<Integer> domains = Collections.emptyList(); List<Integer> domainIds = params.domainIds();
if (near != null) { var limits = params.limits();
if (domain == null) { // Disable limits on number of results per domain if we're searching with a site:-type term
domains = dbNearDomainsQuery.getRelatedDomains(near, problems::add);
}
}
int domainLimit;
if (domain != null) { if (domain != null) {
domainLimit = 1000; limits = limits.forSingleDomain();
} else {
domainLimit = 2;
} }
var specsBuilder = SearchSpecification.builder() var specsBuilder = SearchSpecification.builder()
.subqueries(subqueries) .subqueries(subqueries)
.queryLimits(new QueryLimits(domainLimit, 100, 250, 4096))
.humanQuery(query) .humanQuery(query)
.quality(qualityLimits.qualityLimit) .quality(qualityLimits.qualityLimit)
.year(qualityLimits.year) .year(qualityLimits.year)
.size(qualityLimits.size) .size(qualityLimits.size)
.rank(qualityLimits.rank) .rank(qualityLimits.rank)
.domains(domains) .domains(domainIds)
.queryLimits(limits)
.searchSetIdentifier(params.identifier())
.rankingParams(ResultRankingParameters.sensibleDefaults()) .rankingParams(ResultRankingParameters.sensibleDefaults())
.queryStrategy(qualityLimits.queryStrategy) .queryStrategy(qualityLimits.queryStrategy);
.searchSetIdentifier(profile.searchSetIdentifier);
SearchSpecification specs = specsBuilder.build(); SearchSpecification specs = specsBuilder.build();
return new SearchQuery(specs, searchTermsHuman, domain); for (var sq : specs.subqueries) {
sq.searchTermsAdvice.addAll(params.tacitAdvice());
sq.searchTermsPriority.addAll(params.tacitPriority());
sq.searchTermsInclude.addAll(params.tacitIncludes());
sq.searchTermsExclude.addAll(params.tacitExcludes());
}
return new ProcessedQuery(specs, searchTermsHuman, domain);
} }

View File

@ -1,10 +1,10 @@
package nu.marginalia.search.query; package nu.marginalia.query.svc;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query_parser.token.Token; import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenVisitor; import nu.marginalia.query_parser.token.TokenVisitor;
import nu.marginalia.search.model.SearchProfile;
public class QueryLimitsAccumulator implements TokenVisitor { public class QueryLimitsAccumulator implements TokenVisitor {
public SpecificationLimit qualityLimit; public SpecificationLimit qualityLimit;
@ -14,11 +14,11 @@ public class QueryLimitsAccumulator implements TokenVisitor {
public QueryStrategy queryStrategy = QueryStrategy.AUTO; public QueryStrategy queryStrategy = QueryStrategy.AUTO;
public QueryLimitsAccumulator(SearchProfile profile) { public QueryLimitsAccumulator(QueryParams params) {
qualityLimit = profile.getQualityLimit(); qualityLimit = params.quality();
year = profile.getYearLimit(); year = params.year();
size = profile.getSizeLimit(); size = params.size();
rank = SpecificationLimit.none(); rank = params.rank();
} }
private SpecificationLimit parseSpecificationLimit(String str) { private SpecificationLimit parseSpecificationLimit(String str) {
@ -89,7 +89,4 @@ public class QueryLimitsAccumulator implements TokenVisitor {
@Override @Override
public void onAdviceTerm(Token token) {} public void onAdviceTerm(Token token) {}
@Override
public void onNearTerm(Token token) {}
} }

View File

@ -1,10 +1,10 @@
package nu.marginalia.search.query; package nu.marginalia.query.svc;
import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.language.WordPatterns; import nu.marginalia.language.WordPatterns;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query_parser.token.Token; import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenVisitor; import nu.marginalia.query_parser.token.TokenVisitor;
import nu.marginalia.search.model.SearchProfile;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -18,16 +18,13 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
public List<String> searchTermsPriority = new ArrayList<>(); public List<String> searchTermsPriority = new ArrayList<>();
public List<List<String>> searchTermCoherences = new ArrayList<>(); public List<List<String>> searchTermCoherences = new ArrayList<>();
public String near;
public String domain; public String domain;
public SearchSubquery createSubquery() { public SearchSubquery createSubquery() {
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences); return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
} }
public QuerySearchTermsAccumulator(SearchProfile profile, List<Token> parts) { public QuerySearchTermsAccumulator(List<Token> parts) {
near = profile.getNearDomain();
for (Token t : parts) { for (Token t : parts) {
t.visit(this); t.visit(this);
} }
@ -100,11 +97,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
} }
} }
@Override
public void onNearTerm(Token token) {
near = token.str;
}
@Override @Override
public void onYearTerm(Token token) { public void onYearTerm(Token token) {

View File

@ -1,14 +1,15 @@
package nu.marginalia.search.query; package nu.marginalia.query.svc;
import nu.marginalia.WmsaHome; import nu.marginalia.WmsaHome;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.index.query.limit.SpecificationLimitType;
import nu.marginalia.language.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.ngrams.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.query.model.UserSearchParameters;
import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -30,15 +31,24 @@ public class QueryFactoryTest {
queryFactory = new QueryFactory(lm, queryFactory = new QueryFactory(lm,
tfd, tfd,
new EnglishDictionary(tfd), new EnglishDictionary(tfd),
new NGramBloomFilter(lm), new NGramBloomFilter(lm)
null
); );
} }
public SearchSpecification parseAndGetSpecs(String query) { public SearchSpecification parseAndGetSpecs(String query) {
return queryFactory.createQuery( return queryFactory.createQuery(
new UserSearchParameters(query, SearchProfile.CORPO, SearchJsParameter.DEFAULT) new QueryParams(query, null,
).specs; List.of(),
List.of(),
List.of(),
List.of(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
null,
new QueryLimits(100, 100, 100, 100),
SearchSetIdentifier.BLOGS)).specs;
} }
@Test @Test

View File

@ -7,12 +7,12 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.assistant.client.AssistantClient; import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.search.model.DecoratedSearchResults; import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.query.QueryFactory; import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.search.query.model.SearchQuery;
import nu.marginalia.search.query.model.UserSearchParameters;
import nu.marginalia.search.svc.SearchQueryIndexService; import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.svc.SearchUnitConversionService; import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings; import org.apache.logging.log4j.util.Strings;
@ -37,58 +37,71 @@ public class SearchOperator {
private final AssistantClient assistantClient; private final AssistantClient assistantClient;
private final DbDomainQueries domainQueries; private final DbDomainQueries domainQueries;
private final QueryFactory queryFactory; private final QueryClient queryClient;
private final SearchQueryIndexService searchQueryService; private final SearchQueryIndexService searchQueryService;
private final SearchQueryParamFactory paramFactory;
private final SearchUnitConversionService searchUnitConversionService; private final SearchUnitConversionService searchUnitConversionService;
@Inject @Inject
public SearchOperator(AssistantClient assistantClient, public SearchOperator(AssistantClient assistantClient,
DbDomainQueries domainQueries, DbDomainQueries domainQueries,
QueryFactory queryFactory, QueryClient queryClient,
SearchQueryIndexService searchQueryService, SearchQueryIndexService searchQueryService,
SearchUnitConversionService searchUnitConversionService) { SearchQueryParamFactory paramFactory,
SearchUnitConversionService searchUnitConversionService)
{
this.assistantClient = assistantClient; this.assistantClient = assistantClient;
this.domainQueries = domainQueries; this.domainQueries = domainQueries;
this.queryFactory = queryFactory; this.queryClient = queryClient;
this.searchQueryService = searchQueryService; this.searchQueryService = searchQueryService;
this.paramFactory = paramFactory;
this.searchUnitConversionService = searchUnitConversionService; this.searchUnitConversionService = searchUnitConversionService;
} }
public List<UrlDetails> doApiSearch(Context ctx, public List<UrlDetails> doApiSearch(Context ctx,
UserSearchParameters params) { UserSearchParameters params) {
// TODO: This shouldn't route through search-service!
var queryParams = paramFactory.forRegularSearch(params);
var queryResponse = queryClient.search(ctx, queryParams);
SearchQuery processedQuery = queryFactory.createQuery(params); logger.info(queryMarker, "Human terms (API): {}", Strings.join(queryResponse.searchTermsHuman(), ','));
logger.info(queryMarker, "Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ',')); return searchQueryService.getResultsFromQuery(queryResponse);
return searchQueryService.executeQuery(ctx, processedQuery.specs);
} }
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters params) { public List<UrlDetails> doSiteSearch(Context ctx,
String domain) {
Future<String> eval = searchUnitConversionService.tryEval(ctx, params.humanQuery()); var queryParams = paramFactory.forSiteSearch(domain);
SearchQuery processedQuery = queryFactory.createQuery(params); var queryResponse = queryClient.search(ctx, queryParams);
logger.info(queryMarker, "Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ',')); return searchQueryService.getResultsFromQuery(queryResponse);
}
List<UrlDetails> queryResults = searchQueryService.executeQuery(ctx, processedQuery.specs); public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
var queryParams = paramFactory.forRegularSearch(userParams);
var queryResponse = queryClient.search(ctx, queryParams);
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
String evalResult = getFutureOrDefault(eval, ""); String evalResult = getFutureOrDefault(eval, "");
return DecoratedSearchResults.builder() return DecoratedSearchResults.builder()
.params(params) .params(userParams)
.problems(getProblems(ctx, evalResult, queryResults, processedQuery)) .problems(getProblems(ctx, evalResult, queryResults, queryResponse))
.evalResult(evalResult) .evalResult(evalResult)
.results(queryResults) .results(queryResults)
.focusDomain(processedQuery.domain) .focusDomain(queryResponse.domain())
.focusDomainId(getDomainId(processedQuery.domain)) .focusDomainId(getDomainId(queryResponse.domain()))
.build(); .build();
} }
@ -113,20 +126,20 @@ public class SearchOperator {
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1); return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
} }
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) { private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, QueryResponse response) {
final List<String> problems = new ArrayList<>(processedQuery.problems); final List<String> problems = new ArrayList<>(response.problems());
boolean siteSearch = processedQuery.domain != null; boolean siteSearch = response.domain() != null;
if (!siteSearch) { if (!siteSearch) {
if (queryResults.size() <= 5 && null == evalResult) { if (queryResults.size() <= 5 && null == evalResult) {
spellCheckTerms(ctx, processedQuery).forEach(problems::add); spellCheckTerms(ctx, response).forEach(problems::add);
} }
if (queryResults.size() <= 5) { if (queryResults.size() <= 5) {
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>."); problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
} }
Set<String> representativeKeywords = processedQuery.getAllKeywords(); Set<String> representativeKeywords = response.getAllKeywords();
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning"))) if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
{ {
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition"); problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
@ -137,8 +150,8 @@ public class SearchOperator {
} }
private Iterable<String> spellCheckTerms(Context ctx, SearchQuery disjointedQuery) { private Iterable<String> spellCheckTerms(Context ctx, QueryResponse response) {
return Observable.fromIterable(disjointedQuery.searchTermsHuman) return Observable.fromIterable(response.searchTermsHuman())
.subscribeOn(Schedulers.io()) .subscribeOn(Schedulers.io())
.flatMap(term -> assistantClient.spellCheck(ctx, term) .flatMap(term -> assistantClient.spellCheck(ctx, term)
.onErrorReturn(e -> Collections.emptyList()) .onErrorReturn(e -> Collections.emptyList())

View File

@ -0,0 +1,53 @@
package nu.marginalia.search;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.search.model.UserSearchParameters;
import java.util.List;
public class SearchQueryParamFactory {
public QueryParams forRegularSearch(UserSearchParameters userParams) {
SearchSubquery prototype = new SearchSubquery();
var profile = userParams.profile();
profile.addTacitTerms(prototype);
return new QueryParams(
userParams.humanQuery(),
null,
prototype.searchTermsInclude,
prototype.searchTermsExclude,
prototype.searchTermsPriority,
prototype.searchTermsAdvice,
profile.getQualityLimit(),
profile.getYearLimit(),
profile.getSizeLimit(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(2, 100, 200, 8192),
profile.searchSetIdentifier
);
}
public QueryParams forSiteSearch(String domain) {
return new QueryParams("site:"+domain,
null,
List.of(),
List.of(),
List.of(),
List.of(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE
);
}
}

View File

@ -8,7 +8,7 @@ import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DecoratedSearchResults; import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.query.model.UserSearchParameters; import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory; import nu.marginalia.renderer.RendererFactory;

View File

@ -3,12 +3,12 @@ package nu.marginalia.search.command.commands;
import com.google.inject.Inject; import com.google.inject.Inject;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DomainInformation; import nu.marginalia.search.model.DomainInformation;
import nu.marginalia.search.model.SearchProfile; import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.query.QueryFactory;
import nu.marginalia.search.siteinfo.DomainInformationService; import nu.marginalia.search.siteinfo.DomainInformationService;
import nu.marginalia.search.svc.SearchQueryIndexService; import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
@ -25,9 +25,9 @@ import java.util.regex.Pattern;
public class SiteListCommand implements SearchCommandInterface { public class SiteListCommand implements SearchCommandInterface {
private final DbDomainQueries domainQueries; private final DbDomainQueries domainQueries;
private final QueryFactory queryFactory;
private final DomainInformationService domainInformationService; private final DomainInformationService domainInformationService;
private final SearchQueryIndexService searchQueryIndexService; private final SearchQueryIndexService searchQueryIndexService;
private final SearchOperator searchOperator;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final MustacheRenderer<DomainInformation> siteInfoRenderer; private final MustacheRenderer<DomainInformation> siteInfoRenderer;
@ -38,16 +38,16 @@ public class SiteListCommand implements SearchCommandInterface {
public SiteListCommand( public SiteListCommand(
DomainInformationService domainInformationService, DomainInformationService domainInformationService,
DbDomainQueries domainQueries, DbDomainQueries domainQueries,
QueryFactory queryFactory, RendererFactory rendererFactory, RendererFactory rendererFactory,
SearchQueryIndexService searchQueryIndexService) SearchQueryIndexService searchQueryIndexService, SearchOperator searchOperator)
throws IOException throws IOException
{ {
this.domainQueries = domainQueries; this.domainQueries = domainQueries;
this.domainInformationService = domainInformationService; this.domainInformationService = domainInformationService;
this.queryFactory = queryFactory;
siteInfoRenderer = rendererFactory.renderer("search/site-info"); siteInfoRenderer = rendererFactory.renderer("search/site-info");
this.searchQueryIndexService = searchQueryIndexService; this.searchQueryIndexService = searchQueryIndexService;
this.searchOperator = searchOperator;
} }
@Override @Override
@ -63,8 +63,8 @@ public class SiteListCommand implements SearchCommandInterface {
Path screenshotPath = null; Path screenshotPath = null;
int domainId = -1; int domainId = -1;
if (null != domain) { if (null != domain) {
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain); resultSet = searchOperator.doSiteSearch(ctx, domain.toString());
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery.specs);
var maybeId = domainQueries.tryGetDomainId(domain); var maybeId = domainQueries.tryGetDomainId(domain);
if (maybeId.isPresent()) { if (maybeId.isPresent()) {
domainId = maybeId.getAsInt(); domainId = maybeId.getAsInt();

View File

@ -3,7 +3,6 @@ package nu.marginalia.search.model;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Getter; import lombok.Getter;
import nu.marginalia.search.query.model.UserSearchParameters;
import java.util.List; import java.util.List;

View File

@ -1,4 +1,4 @@
package nu.marginalia.search.query.model; package nu.marginalia.search.model;
import nu.marginalia.search.command.SearchJsParameter; import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.model.SearchProfile; import nu.marginalia.search.model.SearchProfile;

View File

@ -1,28 +0,0 @@
package nu.marginalia.search.query.model;
import lombok.AllArgsConstructor;
import nu.marginalia.index.client.model.query.SearchSpecification;
import java.util.*;
@AllArgsConstructor
public class SearchQuery {
public final SearchSpecification specs;
public final Set<String> problems = new TreeSet<>();
public final List<String> searchTermsHuman;
public String domain;
public SearchQuery(SearchSpecification justSpecs) {
searchTermsHuman = new ArrayList<>();
specs = justSpecs;
}
public Set<String> getAllKeywords() {
Set<String> keywords = new HashSet<>(100);
for (var sq : specs.subqueries) {
keywords.addAll(sq.searchTermsInclude);
}
return keywords;
}
}

View File

@ -14,7 +14,7 @@ import nu.marginalia.search.client.model.ApiSearchResults;
import nu.marginalia.search.model.SearchProfile; import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.search.command.SearchJsParameter; import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.query.model.UserSearchParameters; import nu.marginalia.search.model.UserSearchParameters;
import spark.Request; import spark.Request;
import spark.Response; import spark.Response;

View File

@ -5,6 +5,8 @@ import com.google.inject.Singleton;
import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.query.client.QueryClient; import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.results.SearchResultDecorator; import nu.marginalia.search.results.SearchResultDecorator;
import nu.marginalia.search.results.UrlDeduplicator; import nu.marginalia.search.results.UrlDeduplicator;
@ -15,7 +17,6 @@ import org.slf4j.Marker;
import org.slf4j.MarkerFactory; import org.slf4j.MarkerFactory;
import java.util.*; import java.util.*;
import java.util.regex.Pattern;
@Singleton @Singleton
public class SearchQueryIndexService { public class SearchQueryIndexService {
@ -40,12 +41,9 @@ public class SearchQueryIndexService {
} }
public List<UrlDetails> executeQuery(Context ctx, SearchSpecification specs) { public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
// Send the query
final var queryResponse = queryClient.delegate(ctx, specs);
// Remove duplicates and other chaff // Remove duplicates and other chaff
final var results = limitAndDeduplicateResults(specs, queryResponse.results); final var results = limitAndDeduplicateResults(queryResponse.specs(), queryResponse.results());
// Update the query count (this is what you see on the front page) // Update the query count (this is what you see on the front page)
searchVisitorCount.registerQuery(); searchVisitorCount.registerQuery();