mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(query-service) Move query parsing from search-service to the new query service.
This commit is contained in:
parent
94c882af7d
commit
97e17282ab
@ -14,6 +14,7 @@ dependencies {
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:features-index:index-query')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
|
||||
|
@ -12,6 +12,8 @@ import nu.marginalia.index.client.model.results.SearchResultSet;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import org.slf4j.Logger;
|
||||
@ -24,6 +26,7 @@ import java.util.UUID;
|
||||
public class QueryClient extends AbstractDynamicClient {
|
||||
|
||||
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
|
||||
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@ -49,6 +52,12 @@ public class QueryClient extends AbstractDynamicClient {
|
||||
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
|
||||
);
|
||||
}
|
||||
@CheckReturnValue
|
||||
public QueryResponse search(Context ctx, QueryParams params) {
|
||||
return wmsa_search_index_api_search_time.time(
|
||||
() -> this.postGet(ctx, "/search/", params, QueryResponse.class).blockingFirst()
|
||||
);
|
||||
}
|
||||
public MqOutbox outbox() {
|
||||
return outbox;
|
||||
}
|
||||
|
@ -0,0 +1,21 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ProcessedQuery {
|
||||
public final SearchSpecification specs;
|
||||
public final List<String> searchTermsHuman;
|
||||
public final String domain;
|
||||
|
||||
public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
|
||||
this.specs = specs;
|
||||
this.searchTermsHuman = searchTermsHuman;
|
||||
this.domain = domain;
|
||||
}
|
||||
|
||||
public ProcessedQuery(SearchSpecification justSpecs) {
|
||||
this(justSpecs, List.of(), null);
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record QueryParams(
|
||||
String humanQuery,
|
||||
String nearDomain,
|
||||
List<String> tacitIncludes,
|
||||
List<String> tacitExcludes,
|
||||
List<String> tacitPriority,
|
||||
List<String> tacitAdvice,
|
||||
SpecificationLimit quality,
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
List<Integer> domainIds,
|
||||
QueryLimits limits,
|
||||
SearchSetIdentifier identifier
|
||||
)
|
||||
{
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public record QueryResponse(SearchSpecification specs,
|
||||
List<DecoratedSearchResultItem> results,
|
||||
List<String> searchTermsHuman,
|
||||
List<String> problems,
|
||||
String domain)
|
||||
{
|
||||
public Set<String> getAllKeywords() {
|
||||
Set<String> keywords = new HashSet<>(100);
|
||||
for (var sq : specs.subqueries) {
|
||||
keywords.addAll(sq.searchTermsInclude);
|
||||
}
|
||||
return keywords;
|
||||
}
|
||||
}
|
@ -1,4 +1,7 @@
|
||||
package nu.marginalia.index.query.limit;
|
||||
|
||||
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
|
||||
public QueryLimits forSingleDomain() {
|
||||
return new QueryLimits(resultsTotal, resultsTotal, timeoutMs, fetchSize);
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,6 @@ public class Token {
|
||||
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
|
||||
case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
|
||||
case ADVICE_TERM: visitor.onAdviceTerm(this); break;
|
||||
case NEAR_TERM: visitor.onNearTerm(this); break;
|
||||
case LITERAL_TERM: visitor.onLiteralTerm(this); break;
|
||||
|
||||
case YEAR_TERM: visitor.onYearTerm(this); break;
|
||||
|
@ -6,8 +6,6 @@ public interface TokenVisitor {
|
||||
void onExcludeTerm(Token token);
|
||||
void onPriorityTerm(Token token);
|
||||
void onAdviceTerm(Token token);
|
||||
void onNearTerm(Token token);
|
||||
|
||||
void onYearTerm(Token token);
|
||||
void onSizeTerm(Token token);
|
||||
void onRankTerm(Token token);
|
||||
|
@ -28,7 +28,12 @@ dependencies {
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:service-client')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:features-search:query-parser')
|
||||
implementation project(':code:features-index:index-query')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
@ -39,6 +44,7 @@ dependencies {
|
||||
implementation libs.guice
|
||||
implementation libs.protobuf
|
||||
implementation libs.rxjava
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@ -2,10 +2,13 @@ package nu.marginalia.query;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.AbstractModule;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
|
||||
public class QueryModule extends AbstractModule {
|
||||
public void configure() {
|
||||
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||
bind(Gson.class).toProvider(GsonFactory::get);
|
||||
}
|
||||
}
|
||||
|
@ -6,27 +6,54 @@ import nu.marginalia.client.Context;
|
||||
import nu.marginalia.index.client.IndexClient;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.SearchResultSet;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.query.svc.QueryFactory;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.service.server.Service;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class QueryService extends Service {
|
||||
|
||||
private final IndexClient indexClient;
|
||||
private final Gson gson;
|
||||
private final QueryFactory queryFactory;
|
||||
|
||||
@Inject
|
||||
public QueryService(BaseServiceParams params,
|
||||
IndexClient indexClient,
|
||||
Gson gson)
|
||||
Gson gson,
|
||||
QueryFactory queryFactory)
|
||||
{
|
||||
super(params);
|
||||
this.indexClient = indexClient;
|
||||
this.gson = gson;
|
||||
this.queryFactory = queryFactory;
|
||||
|
||||
Spark.post("/delegate/", this::delegateToIndex, gson::toJson);
|
||||
Spark.post("/search/", this::search, gson::toJson);
|
||||
}
|
||||
|
||||
private Object search(Request request, Response response) {
|
||||
String json = request.body();
|
||||
QueryParams params = gson.fromJson(json, QueryParams.class);
|
||||
|
||||
var query = queryFactory.createQuery(params);
|
||||
var rsp = executeQuery(Context.fromRequest(request), query.specs);
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
return new QueryResponse(
|
||||
query.specs,
|
||||
rsp.results,
|
||||
query.searchTermsHuman,
|
||||
List.of(),
|
||||
query.domain
|
||||
);
|
||||
}
|
||||
|
||||
private SearchResultSet delegateToIndex(Request request, Response response) {
|
||||
@ -35,7 +62,10 @@ public class QueryService extends Service {
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
return indexClient.query(Context.fromRequest(request), specsSet);
|
||||
return executeQuery(Context.fromRequest(request), specsSet);
|
||||
}
|
||||
|
||||
private SearchResultSet executeQuery(Context ctx, SearchSpecification query) {
|
||||
return indexClient.query(ctx, query);
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.query;
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
@ -6,34 +6,29 @@ import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.ProcessedQuery;
|
||||
import nu.marginalia.query_parser.QueryParser;
|
||||
import nu.marginalia.query_parser.QueryPermutation;
|
||||
import nu.marginalia.query_parser.QueryVariants;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import nu.marginalia.search.db.DbNearDomainsQuery;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
import nu.marginalia.search.query.model.SearchQuery;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import org.eclipse.jetty.http.HttpStatus;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class QueryFactory {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final DbNearDomainsQuery dbNearDomainsQuery;
|
||||
|
||||
private static final int RETAIN_QUERY_VARIANT_COUNT = 5;
|
||||
private final ThreadLocal<QueryVariants> queryVariants;
|
||||
@ -45,10 +40,7 @@ public class QueryFactory {
|
||||
public QueryFactory(LanguageModels lm,
|
||||
TermFrequencyDict dict,
|
||||
EnglishDictionary englishDictionary,
|
||||
NGramBloomFilter nGramBloomFilter,
|
||||
DbNearDomainsQuery dbNearDomainsQuery) {
|
||||
this.dbNearDomainsQuery = dbNearDomainsQuery;
|
||||
|
||||
NGramBloomFilter nGramBloomFilter) {
|
||||
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary));
|
||||
}
|
||||
|
||||
@ -60,7 +52,7 @@ public class QueryFactory {
|
||||
return new QueryPermutation(queryVariants.get());
|
||||
}
|
||||
|
||||
public SearchQuery createQuery(UserSearchParameters params) {
|
||||
public ProcessedQuery createQuery(QueryParams params) {
|
||||
final var processedQuery = createQuery(getQueryPermutation(), params);
|
||||
final List<SearchSubquery> subqueries = processedQuery.specs.subqueries;
|
||||
|
||||
@ -72,59 +64,25 @@ public class QueryFactory {
|
||||
return processedQuery;
|
||||
}
|
||||
|
||||
public SearchQuery createQuery(SearchProfile profile,
|
||||
int limitPerDomain,
|
||||
int limitTotal,
|
||||
String... termsInclude)
|
||||
{
|
||||
List<SearchSubquery> sqs = new ArrayList<>();
|
||||
|
||||
sqs.add(new SearchSubquery(
|
||||
Arrays.asList(termsInclude),
|
||||
Collections.emptyList(),
|
||||
Collections.emptyList(),
|
||||
Collections.emptyList(),
|
||||
Collections.emptyList()));
|
||||
|
||||
var specs = SearchSpecification.builder()
|
||||
.subqueries(sqs)
|
||||
.domains(Collections.emptyList())
|
||||
.searchSetIdentifier(profile.searchSetIdentifier)
|
||||
.queryLimits(new QueryLimits(limitPerDomain, limitTotal, 250, 8192))
|
||||
.humanQuery("")
|
||||
.year(SpecificationLimit.none())
|
||||
.size(SpecificationLimit.none())
|
||||
.rank(SpecificationLimit.none())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.quality(SpecificationLimit.none())
|
||||
.queryStrategy(QueryStrategy.AUTO)
|
||||
.build();
|
||||
|
||||
return new SearchQuery(specs);
|
||||
}
|
||||
|
||||
private void trimArray(List<?> arr, int maxSize) {
|
||||
if (arr.size() > maxSize) {
|
||||
arr.subList(0, arr.size() - maxSize).clear();
|
||||
}
|
||||
}
|
||||
|
||||
public SearchQuery createQuery(QueryPermutation queryPermutation,
|
||||
UserSearchParameters params)
|
||||
public ProcessedQuery createQuery(QueryPermutation queryPermutation,
|
||||
QueryParams params)
|
||||
{
|
||||
final var query = params.humanQuery();
|
||||
final var profile = params.profile();
|
||||
|
||||
if (query.length() > 1000) {
|
||||
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");
|
||||
throw new IllegalArgumentException("Query too long");
|
||||
}
|
||||
|
||||
List<String> searchTermsHuman = new ArrayList<>();
|
||||
List<String> problems = new ArrayList<>();
|
||||
|
||||
|
||||
String near = null,
|
||||
domain = null;
|
||||
String domain = null;
|
||||
|
||||
var basicQuery = queryParser.parse(query);
|
||||
|
||||
@ -134,7 +92,7 @@ public class QueryFactory {
|
||||
}
|
||||
|
||||
|
||||
QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(profile);
|
||||
QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(params);
|
||||
|
||||
for (Token t : basicQuery) {
|
||||
if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) {
|
||||
@ -153,50 +111,46 @@ public class QueryFactory {
|
||||
List<SearchSubquery> subqueries = new ArrayList<>();
|
||||
|
||||
for (var parts : queryPermutations) {
|
||||
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(profile, parts);
|
||||
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(parts);
|
||||
|
||||
SearchSubquery subquery = termsAccumulator.createSubquery();
|
||||
|
||||
near = termsAccumulator.near;
|
||||
domain = termsAccumulator.domain;
|
||||
|
||||
params.profile().addTacitTerms(subquery);
|
||||
params.jsSetting().addTacitTerms(subquery);
|
||||
|
||||
subqueries.add(subquery);
|
||||
}
|
||||
|
||||
List<Integer> domains = Collections.emptyList();
|
||||
List<Integer> domainIds = params.domainIds();
|
||||
|
||||
if (near != null) {
|
||||
if (domain == null) {
|
||||
domains = dbNearDomainsQuery.getRelatedDomains(near, problems::add);
|
||||
}
|
||||
}
|
||||
|
||||
int domainLimit;
|
||||
var limits = params.limits();
|
||||
// Disable limits on number of results per domain if we're searching with a site:-type term
|
||||
if (domain != null) {
|
||||
domainLimit = 1000;
|
||||
} else {
|
||||
domainLimit = 2;
|
||||
limits = limits.forSingleDomain();
|
||||
}
|
||||
|
||||
var specsBuilder = SearchSpecification.builder()
|
||||
.subqueries(subqueries)
|
||||
.queryLimits(new QueryLimits(domainLimit, 100, 250, 4096))
|
||||
.humanQuery(query)
|
||||
.quality(qualityLimits.qualityLimit)
|
||||
.year(qualityLimits.year)
|
||||
.size(qualityLimits.size)
|
||||
.rank(qualityLimits.rank)
|
||||
.domains(domains)
|
||||
.domains(domainIds)
|
||||
.queryLimits(limits)
|
||||
.searchSetIdentifier(params.identifier())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.queryStrategy(qualityLimits.queryStrategy)
|
||||
.searchSetIdentifier(profile.searchSetIdentifier);
|
||||
.queryStrategy(qualityLimits.queryStrategy);
|
||||
|
||||
SearchSpecification specs = specsBuilder.build();
|
||||
|
||||
return new SearchQuery(specs, searchTermsHuman, domain);
|
||||
for (var sq : specs.subqueries) {
|
||||
sq.searchTermsAdvice.addAll(params.tacitAdvice());
|
||||
sq.searchTermsPriority.addAll(params.tacitPriority());
|
||||
sq.searchTermsInclude.addAll(params.tacitIncludes());
|
||||
sq.searchTermsExclude.addAll(params.tacitExcludes());
|
||||
}
|
||||
|
||||
return new ProcessedQuery(specs, searchTermsHuman, domain);
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.search.query;
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
|
||||
public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
public SpecificationLimit qualityLimit;
|
||||
@ -14,11 +14,11 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
|
||||
public QueryStrategy queryStrategy = QueryStrategy.AUTO;
|
||||
|
||||
public QueryLimitsAccumulator(SearchProfile profile) {
|
||||
qualityLimit = profile.getQualityLimit();
|
||||
year = profile.getYearLimit();
|
||||
size = profile.getSizeLimit();
|
||||
rank = SpecificationLimit.none();
|
||||
public QueryLimitsAccumulator(QueryParams params) {
|
||||
qualityLimit = params.quality();
|
||||
year = params.year();
|
||||
size = params.size();
|
||||
rank = params.rank();
|
||||
}
|
||||
|
||||
private SpecificationLimit parseSpecificationLimit(String str) {
|
||||
@ -89,7 +89,4 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
|
||||
@Override
|
||||
public void onAdviceTerm(Token token) {}
|
||||
|
||||
@Override
|
||||
public void onNearTerm(Token token) {}
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.search.query;
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@ -18,16 +18,13 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
||||
public List<String> searchTermsPriority = new ArrayList<>();
|
||||
public List<List<String>> searchTermCoherences = new ArrayList<>();
|
||||
|
||||
public String near;
|
||||
public String domain;
|
||||
|
||||
public SearchSubquery createSubquery() {
|
||||
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
||||
}
|
||||
|
||||
public QuerySearchTermsAccumulator(SearchProfile profile, List<Token> parts) {
|
||||
near = profile.getNearDomain();
|
||||
|
||||
public QuerySearchTermsAccumulator(List<Token> parts) {
|
||||
for (Token t : parts) {
|
||||
t.visit(this);
|
||||
}
|
||||
@ -100,11 +97,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onNearTerm(Token token) {
|
||||
near = token.str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onYearTerm(Token token) {
|
||||
|
@ -1,14 +1,15 @@
|
||||
package nu.marginalia.search.query;
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.search.command.SearchJsParameter;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@ -30,15 +31,24 @@ public class QueryFactoryTest {
|
||||
queryFactory = new QueryFactory(lm,
|
||||
tfd,
|
||||
new EnglishDictionary(tfd),
|
||||
new NGramBloomFilter(lm),
|
||||
null
|
||||
new NGramBloomFilter(lm)
|
||||
);
|
||||
}
|
||||
|
||||
public SearchSpecification parseAndGetSpecs(String query) {
|
||||
return queryFactory.createQuery(
|
||||
new UserSearchParameters(query, SearchProfile.CORPO, SearchJsParameter.DEFAULT)
|
||||
).specs;
|
||||
new QueryParams(query, null,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
null,
|
||||
new QueryLimits(100, 100, 100, 100),
|
||||
SearchSetIdentifier.BLOGS)).specs;
|
||||
}
|
||||
|
||||
@Test
|
@ -7,12 +7,12 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import nu.marginalia.assistant.client.AssistantClient;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.search.query.QueryFactory;
|
||||
import nu.marginalia.search.query.model.SearchQuery;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
@ -37,58 +37,71 @@ public class SearchOperator {
|
||||
|
||||
private final AssistantClient assistantClient;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final QueryFactory queryFactory;
|
||||
|
||||
private final QueryClient queryClient;
|
||||
private final SearchQueryIndexService searchQueryService;
|
||||
private final SearchQueryParamFactory paramFactory;
|
||||
private final SearchUnitConversionService searchUnitConversionService;
|
||||
|
||||
|
||||
@Inject
|
||||
public SearchOperator(AssistantClient assistantClient,
|
||||
DbDomainQueries domainQueries,
|
||||
QueryFactory queryFactory,
|
||||
QueryClient queryClient,
|
||||
SearchQueryIndexService searchQueryService,
|
||||
SearchUnitConversionService searchUnitConversionService) {
|
||||
SearchQueryParamFactory paramFactory,
|
||||
SearchUnitConversionService searchUnitConversionService)
|
||||
{
|
||||
|
||||
this.assistantClient = assistantClient;
|
||||
this.domainQueries = domainQueries;
|
||||
this.queryFactory = queryFactory;
|
||||
this.queryClient = queryClient;
|
||||
|
||||
this.searchQueryService = searchQueryService;
|
||||
this.paramFactory = paramFactory;
|
||||
this.searchUnitConversionService = searchUnitConversionService;
|
||||
}
|
||||
|
||||
public List<UrlDetails> doApiSearch(Context ctx,
|
||||
UserSearchParameters params) {
|
||||
|
||||
// TODO: This shouldn't route through search-service!
|
||||
var queryParams = paramFactory.forRegularSearch(params);
|
||||
var queryResponse = queryClient.search(ctx, queryParams);
|
||||
|
||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
||||
logger.info(queryMarker, "Human terms (API): {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||
|
||||
logger.info(queryMarker, "Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
|
||||
return searchQueryService.executeQuery(ctx, processedQuery.specs);
|
||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters params) {
|
||||
public List<UrlDetails> doSiteSearch(Context ctx,
|
||||
String domain) {
|
||||
|
||||
Future<String> eval = searchUnitConversionService.tryEval(ctx, params.humanQuery());
|
||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
||||
var queryParams = paramFactory.forSiteSearch(domain);
|
||||
var queryResponse = queryClient.search(ctx, queryParams);
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||
}
|
||||
|
||||
List<UrlDetails> queryResults = searchQueryService.executeQuery(ctx, processedQuery.specs);
|
||||
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
|
||||
|
||||
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
var queryResponse = queryClient.search(ctx, queryParams);
|
||||
|
||||
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||
|
||||
String evalResult = getFutureOrDefault(eval, "");
|
||||
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(params)
|
||||
.problems(getProblems(ctx, evalResult, queryResults, processedQuery))
|
||||
.params(userParams)
|
||||
.problems(getProblems(ctx, evalResult, queryResults, queryResponse))
|
||||
.evalResult(evalResult)
|
||||
.results(queryResults)
|
||||
.focusDomain(processedQuery.domain)
|
||||
.focusDomainId(getDomainId(processedQuery.domain))
|
||||
.focusDomain(queryResponse.domain())
|
||||
.focusDomainId(getDomainId(queryResponse.domain()))
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -113,20 +126,20 @@ public class SearchOperator {
|
||||
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
||||
}
|
||||
|
||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
||||
final List<String> problems = new ArrayList<>(processedQuery.problems);
|
||||
boolean siteSearch = processedQuery.domain != null;
|
||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, QueryResponse response) {
|
||||
final List<String> problems = new ArrayList<>(response.problems());
|
||||
boolean siteSearch = response.domain() != null;
|
||||
|
||||
if (!siteSearch) {
|
||||
if (queryResults.size() <= 5 && null == evalResult) {
|
||||
spellCheckTerms(ctx, processedQuery).forEach(problems::add);
|
||||
spellCheckTerms(ctx, response).forEach(problems::add);
|
||||
}
|
||||
|
||||
if (queryResults.size() <= 5) {
|
||||
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
|
||||
}
|
||||
|
||||
Set<String> representativeKeywords = processedQuery.getAllKeywords();
|
||||
Set<String> representativeKeywords = response.getAllKeywords();
|
||||
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
|
||||
{
|
||||
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
|
||||
@ -137,8 +150,8 @@ public class SearchOperator {
|
||||
}
|
||||
|
||||
|
||||
private Iterable<String> spellCheckTerms(Context ctx, SearchQuery disjointedQuery) {
|
||||
return Observable.fromIterable(disjointedQuery.searchTermsHuman)
|
||||
private Iterable<String> spellCheckTerms(Context ctx, QueryResponse response) {
|
||||
return Observable.fromIterable(response.searchTermsHuman())
|
||||
.subscribeOn(Schedulers.io())
|
||||
.flatMap(term -> assistantClient.spellCheck(ctx, term)
|
||||
.onErrorReturn(e -> Collections.emptyList())
|
||||
|
@ -0,0 +1,53 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class SearchQueryParamFactory {
|
||||
|
||||
public QueryParams forRegularSearch(UserSearchParameters userParams) {
|
||||
SearchSubquery prototype = new SearchSubquery();
|
||||
var profile = userParams.profile();
|
||||
profile.addTacitTerms(prototype);
|
||||
|
||||
return new QueryParams(
|
||||
userParams.humanQuery(),
|
||||
null,
|
||||
prototype.searchTermsInclude,
|
||||
prototype.searchTermsExclude,
|
||||
prototype.searchTermsPriority,
|
||||
prototype.searchTermsAdvice,
|
||||
profile.getQualityLimit(),
|
||||
profile.getYearLimit(),
|
||||
profile.getSizeLimit(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(2, 100, 200, 8192),
|
||||
profile.searchSetIdentifier
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
public QueryParams forSiteSearch(String domain) {
|
||||
return new QueryParams("site:"+domain,
|
||||
null,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(100, 100, 100, 512),
|
||||
SearchSetIdentifier.NONE
|
||||
);
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@ import nu.marginalia.search.command.SearchCommandInterface;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
|
||||
|
@ -3,12 +3,12 @@ package nu.marginalia.search.command.commands;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.search.SearchOperator;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.command.SearchCommandInterface;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.model.DomainInformation;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
import nu.marginalia.search.query.QueryFactory;
|
||||
import nu.marginalia.search.siteinfo.DomainInformationService;
|
||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||
import nu.marginalia.client.Context;
|
||||
@ -25,9 +25,9 @@ import java.util.regex.Pattern;
|
||||
|
||||
public class SiteListCommand implements SearchCommandInterface {
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final QueryFactory queryFactory;
|
||||
private final DomainInformationService domainInformationService;
|
||||
private final SearchQueryIndexService searchQueryIndexService;
|
||||
private final SearchOperator searchOperator;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
|
||||
@ -38,16 +38,16 @@ public class SiteListCommand implements SearchCommandInterface {
|
||||
public SiteListCommand(
|
||||
DomainInformationService domainInformationService,
|
||||
DbDomainQueries domainQueries,
|
||||
QueryFactory queryFactory, RendererFactory rendererFactory,
|
||||
SearchQueryIndexService searchQueryIndexService)
|
||||
RendererFactory rendererFactory,
|
||||
SearchQueryIndexService searchQueryIndexService, SearchOperator searchOperator)
|
||||
throws IOException
|
||||
{
|
||||
this.domainQueries = domainQueries;
|
||||
this.domainInformationService = domainInformationService;
|
||||
this.queryFactory = queryFactory;
|
||||
|
||||
siteInfoRenderer = rendererFactory.renderer("search/site-info");
|
||||
this.searchQueryIndexService = searchQueryIndexService;
|
||||
this.searchOperator = searchOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -63,8 +63,8 @@ public class SiteListCommand implements SearchCommandInterface {
|
||||
Path screenshotPath = null;
|
||||
int domainId = -1;
|
||||
if (null != domain) {
|
||||
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
|
||||
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery.specs);
|
||||
resultSet = searchOperator.doSiteSearch(ctx, domain.toString());
|
||||
|
||||
var maybeId = domainQueries.tryGetDomainId(domain);
|
||||
if (maybeId.isPresent()) {
|
||||
domainId = maybeId.getAsInt();
|
||||
|
@ -3,7 +3,6 @@ package nu.marginalia.search.model;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.query.model;
|
||||
package nu.marginalia.search.model;
|
||||
|
||||
import nu.marginalia.search.command.SearchJsParameter;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
@ -1,28 +0,0 @@
|
||||
package nu.marginalia.search.query.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@AllArgsConstructor
|
||||
public class SearchQuery {
|
||||
public final SearchSpecification specs;
|
||||
|
||||
public final Set<String> problems = new TreeSet<>();
|
||||
public final List<String> searchTermsHuman;
|
||||
public String domain;
|
||||
|
||||
public SearchQuery(SearchSpecification justSpecs) {
|
||||
searchTermsHuman = new ArrayList<>();
|
||||
specs = justSpecs;
|
||||
}
|
||||
|
||||
public Set<String> getAllKeywords() {
|
||||
Set<String> keywords = new HashSet<>(100);
|
||||
for (var sq : specs.subqueries) {
|
||||
keywords.addAll(sq.searchTermsInclude);
|
||||
}
|
||||
return keywords;
|
||||
}
|
||||
}
|
@ -14,7 +14,7 @@ import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.search.command.SearchJsParameter;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
|
||||
|
@ -5,6 +5,8 @@ import com.google.inject.Singleton;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.results.SearchResultDecorator;
|
||||
import nu.marginalia.search.results.UrlDeduplicator;
|
||||
@ -15,7 +17,6 @@ import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Singleton
|
||||
public class SearchQueryIndexService {
|
||||
@ -40,12 +41,9 @@ public class SearchQueryIndexService {
|
||||
|
||||
}
|
||||
|
||||
public List<UrlDetails> executeQuery(Context ctx, SearchSpecification specs) {
|
||||
// Send the query
|
||||
final var queryResponse = queryClient.delegate(ctx, specs);
|
||||
|
||||
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
|
||||
// Remove duplicates and other chaff
|
||||
final var results = limitAndDeduplicateResults(specs, queryResponse.results);
|
||||
final var results = limitAndDeduplicateResults(queryResponse.specs(), queryResponse.results());
|
||||
|
||||
// Update the query count (this is what you see on the front page)
|
||||
searchVisitorCount.registerQuery();
|
||||
|
Loading…
Reference in New Issue
Block a user