mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(query-service) Move query parsing from search-service to the new query service.
This commit is contained in:
parent
94c882af7d
commit
97e17282ab
@ -14,6 +14,7 @@ dependencies {
|
|||||||
implementation project(':code:api:index-api')
|
implementation project(':code:api:index-api')
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
implementation project(':code:features-index:index-query')
|
||||||
implementation project(':code:common:service-discovery')
|
implementation project(':code:common:service-discovery')
|
||||||
implementation project(':code:common:service-client')
|
implementation project(':code:common:service-client')
|
||||||
|
|
||||||
|
@ -12,6 +12,8 @@ import nu.marginalia.index.client.model.results.SearchResultSet;
|
|||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.mq.MessageQueueFactory;
|
import nu.marginalia.mq.MessageQueueFactory;
|
||||||
import nu.marginalia.mq.outbox.MqOutbox;
|
import nu.marginalia.mq.outbox.MqOutbox;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
|
import nu.marginalia.query.model.QueryResponse;
|
||||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||||
import nu.marginalia.service.id.ServiceId;
|
import nu.marginalia.service.id.ServiceId;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -24,6 +26,7 @@ import java.util.UUID;
|
|||||||
public class QueryClient extends AbstractDynamicClient {
|
public class QueryClient extends AbstractDynamicClient {
|
||||||
|
|
||||||
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
|
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
|
||||||
|
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
@ -49,6 +52,12 @@ public class QueryClient extends AbstractDynamicClient {
|
|||||||
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
|
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@CheckReturnValue
|
||||||
|
public QueryResponse search(Context ctx, QueryParams params) {
|
||||||
|
return wmsa_search_index_api_search_time.time(
|
||||||
|
() -> this.postGet(ctx, "/search/", params, QueryResponse.class).blockingFirst()
|
||||||
|
);
|
||||||
|
}
|
||||||
public MqOutbox outbox() {
|
public MqOutbox outbox() {
|
||||||
return outbox;
|
return outbox;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,21 @@
|
|||||||
|
package nu.marginalia.query.model;
|
||||||
|
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class ProcessedQuery {
|
||||||
|
public final SearchSpecification specs;
|
||||||
|
public final List<String> searchTermsHuman;
|
||||||
|
public final String domain;
|
||||||
|
|
||||||
|
public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
|
||||||
|
this.specs = specs;
|
||||||
|
this.searchTermsHuman = searchTermsHuman;
|
||||||
|
this.domain = domain;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ProcessedQuery(SearchSpecification justSpecs) {
|
||||||
|
this(justSpecs, List.of(), null);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
package nu.marginalia.query.model;
|
||||||
|
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public record QueryParams(
|
||||||
|
String humanQuery,
|
||||||
|
String nearDomain,
|
||||||
|
List<String> tacitIncludes,
|
||||||
|
List<String> tacitExcludes,
|
||||||
|
List<String> tacitPriority,
|
||||||
|
List<String> tacitAdvice,
|
||||||
|
SpecificationLimit quality,
|
||||||
|
SpecificationLimit year,
|
||||||
|
SpecificationLimit size,
|
||||||
|
SpecificationLimit rank,
|
||||||
|
List<Integer> domainIds,
|
||||||
|
QueryLimits limits,
|
||||||
|
SearchSetIdentifier identifier
|
||||||
|
)
|
||||||
|
{
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
package nu.marginalia.query.model;
|
||||||
|
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
|
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public record QueryResponse(SearchSpecification specs,
|
||||||
|
List<DecoratedSearchResultItem> results,
|
||||||
|
List<String> searchTermsHuman,
|
||||||
|
List<String> problems,
|
||||||
|
String domain)
|
||||||
|
{
|
||||||
|
public Set<String> getAllKeywords() {
|
||||||
|
Set<String> keywords = new HashSet<>(100);
|
||||||
|
for (var sq : specs.subqueries) {
|
||||||
|
keywords.addAll(sq.searchTermsInclude);
|
||||||
|
}
|
||||||
|
return keywords;
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,7 @@
|
|||||||
package nu.marginalia.index.query.limit;
|
package nu.marginalia.index.query.limit;
|
||||||
|
|
||||||
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
|
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
|
||||||
|
public QueryLimits forSingleDomain() {
|
||||||
|
return new QueryLimits(resultsTotal, resultsTotal, timeoutMs, fetchSize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,6 @@ public class Token {
|
|||||||
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
|
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
|
||||||
case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
|
case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
|
||||||
case ADVICE_TERM: visitor.onAdviceTerm(this); break;
|
case ADVICE_TERM: visitor.onAdviceTerm(this); break;
|
||||||
case NEAR_TERM: visitor.onNearTerm(this); break;
|
|
||||||
case LITERAL_TERM: visitor.onLiteralTerm(this); break;
|
case LITERAL_TERM: visitor.onLiteralTerm(this); break;
|
||||||
|
|
||||||
case YEAR_TERM: visitor.onYearTerm(this); break;
|
case YEAR_TERM: visitor.onYearTerm(this); break;
|
||||||
|
@ -6,8 +6,6 @@ public interface TokenVisitor {
|
|||||||
void onExcludeTerm(Token token);
|
void onExcludeTerm(Token token);
|
||||||
void onPriorityTerm(Token token);
|
void onPriorityTerm(Token token);
|
||||||
void onAdviceTerm(Token token);
|
void onAdviceTerm(Token token);
|
||||||
void onNearTerm(Token token);
|
|
||||||
|
|
||||||
void onYearTerm(Token token);
|
void onYearTerm(Token token);
|
||||||
void onSizeTerm(Token token);
|
void onSizeTerm(Token token);
|
||||||
void onRankTerm(Token token);
|
void onRankTerm(Token token);
|
||||||
|
@ -28,7 +28,12 @@ dependencies {
|
|||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:service-client')
|
implementation project(':code:common:service-client')
|
||||||
implementation project(':code:api:index-api')
|
implementation project(':code:api:index-api')
|
||||||
|
implementation project(':code:api:query-api')
|
||||||
implementation project(':code:common:service-discovery')
|
implementation project(':code:common:service-discovery')
|
||||||
|
implementation project(':code:features-search:query-parser')
|
||||||
|
implementation project(':code:features-index:index-query')
|
||||||
|
implementation project(':code:libraries:language-processing')
|
||||||
|
implementation project(':code:libraries:term-frequency-dict')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
@ -39,6 +44,7 @@ dependencies {
|
|||||||
implementation libs.guice
|
implementation libs.guice
|
||||||
implementation libs.protobuf
|
implementation libs.protobuf
|
||||||
implementation libs.rxjava
|
implementation libs.rxjava
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
|
||||||
testImplementation libs.bundles.slf4j.test
|
testImplementation libs.bundles.slf4j.test
|
||||||
testImplementation libs.bundles.junit
|
testImplementation libs.bundles.junit
|
||||||
|
@ -2,10 +2,13 @@ package nu.marginalia.query;
|
|||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.inject.AbstractModule;
|
import com.google.inject.AbstractModule;
|
||||||
|
import nu.marginalia.LanguageModels;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
|
|
||||||
public class QueryModule extends AbstractModule {
|
public class QueryModule extends AbstractModule {
|
||||||
public void configure() {
|
public void configure() {
|
||||||
|
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||||
bind(Gson.class).toProvider(GsonFactory::get);
|
bind(Gson.class).toProvider(GsonFactory::get);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,27 +6,54 @@ import nu.marginalia.client.Context;
|
|||||||
import nu.marginalia.index.client.IndexClient;
|
import nu.marginalia.index.client.IndexClient;
|
||||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
import nu.marginalia.index.client.model.results.SearchResultSet;
|
import nu.marginalia.index.client.model.results.SearchResultSet;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
|
import nu.marginalia.query.model.QueryResponse;
|
||||||
|
import nu.marginalia.query.svc.QueryFactory;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.Service;
|
import nu.marginalia.service.server.Service;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
import spark.Spark;
|
import spark.Spark;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class QueryService extends Service {
|
public class QueryService extends Service {
|
||||||
|
|
||||||
private final IndexClient indexClient;
|
private final IndexClient indexClient;
|
||||||
private final Gson gson;
|
private final Gson gson;
|
||||||
|
private final QueryFactory queryFactory;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public QueryService(BaseServiceParams params,
|
public QueryService(BaseServiceParams params,
|
||||||
IndexClient indexClient,
|
IndexClient indexClient,
|
||||||
Gson gson)
|
Gson gson,
|
||||||
|
QueryFactory queryFactory)
|
||||||
{
|
{
|
||||||
super(params);
|
super(params);
|
||||||
this.indexClient = indexClient;
|
this.indexClient = indexClient;
|
||||||
this.gson = gson;
|
this.gson = gson;
|
||||||
|
this.queryFactory = queryFactory;
|
||||||
|
|
||||||
Spark.post("/delegate/", this::delegateToIndex, gson::toJson);
|
Spark.post("/delegate/", this::delegateToIndex, gson::toJson);
|
||||||
|
Spark.post("/search/", this::search, gson::toJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object search(Request request, Response response) {
|
||||||
|
String json = request.body();
|
||||||
|
QueryParams params = gson.fromJson(json, QueryParams.class);
|
||||||
|
|
||||||
|
var query = queryFactory.createQuery(params);
|
||||||
|
var rsp = executeQuery(Context.fromRequest(request), query.specs);
|
||||||
|
|
||||||
|
response.type("application/json");
|
||||||
|
|
||||||
|
return new QueryResponse(
|
||||||
|
query.specs,
|
||||||
|
rsp.results,
|
||||||
|
query.searchTermsHuman,
|
||||||
|
List.of(),
|
||||||
|
query.domain
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private SearchResultSet delegateToIndex(Request request, Response response) {
|
private SearchResultSet delegateToIndex(Request request, Response response) {
|
||||||
@ -35,7 +62,10 @@ public class QueryService extends Service {
|
|||||||
|
|
||||||
response.type("application/json");
|
response.type("application/json");
|
||||||
|
|
||||||
return indexClient.query(Context.fromRequest(request), specsSet);
|
return executeQuery(Context.fromRequest(request), specsSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SearchResultSet executeQuery(Context ctx, SearchSpecification query) {
|
||||||
|
return indexClient.query(ctx, query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.search.query;
|
package nu.marginalia.query.svc;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
@ -6,34 +6,29 @@ import nu.marginalia.LanguageModels;
|
|||||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||||
import nu.marginalia.index.query.limit.QueryLimits;
|
|
||||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
|
||||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
|
||||||
import nu.marginalia.language.EnglishDictionary;
|
import nu.marginalia.language.EnglishDictionary;
|
||||||
|
import nu.marginalia.language.WordPatterns;
|
||||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
import nu.marginalia.query.model.QueryParams;
|
||||||
|
import nu.marginalia.query.model.ProcessedQuery;
|
||||||
import nu.marginalia.query_parser.QueryParser;
|
import nu.marginalia.query_parser.QueryParser;
|
||||||
import nu.marginalia.query_parser.QueryPermutation;
|
import nu.marginalia.query_parser.QueryPermutation;
|
||||||
import nu.marginalia.query_parser.QueryVariants;
|
import nu.marginalia.query_parser.QueryVariants;
|
||||||
import nu.marginalia.query_parser.token.Token;
|
import nu.marginalia.query_parser.token.Token;
|
||||||
import nu.marginalia.query_parser.token.TokenType;
|
import nu.marginalia.query_parser.token.TokenType;
|
||||||
import nu.marginalia.search.db.DbNearDomainsQuery;
|
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
|
||||||
import nu.marginalia.search.query.model.SearchQuery;
|
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
|
||||||
import nu.marginalia.language.WordPatterns;
|
|
||||||
import org.eclipse.jetty.http.HttpStatus;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Spark;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class QueryFactory {
|
public class QueryFactory {
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final DbNearDomainsQuery dbNearDomainsQuery;
|
|
||||||
|
|
||||||
private static final int RETAIN_QUERY_VARIANT_COUNT = 5;
|
private static final int RETAIN_QUERY_VARIANT_COUNT = 5;
|
||||||
private final ThreadLocal<QueryVariants> queryVariants;
|
private final ThreadLocal<QueryVariants> queryVariants;
|
||||||
@ -45,10 +40,7 @@ public class QueryFactory {
|
|||||||
public QueryFactory(LanguageModels lm,
|
public QueryFactory(LanguageModels lm,
|
||||||
TermFrequencyDict dict,
|
TermFrequencyDict dict,
|
||||||
EnglishDictionary englishDictionary,
|
EnglishDictionary englishDictionary,
|
||||||
NGramBloomFilter nGramBloomFilter,
|
NGramBloomFilter nGramBloomFilter) {
|
||||||
DbNearDomainsQuery dbNearDomainsQuery) {
|
|
||||||
this.dbNearDomainsQuery = dbNearDomainsQuery;
|
|
||||||
|
|
||||||
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary));
|
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,7 +52,7 @@ public class QueryFactory {
|
|||||||
return new QueryPermutation(queryVariants.get());
|
return new QueryPermutation(queryVariants.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQuery createQuery(UserSearchParameters params) {
|
public ProcessedQuery createQuery(QueryParams params) {
|
||||||
final var processedQuery = createQuery(getQueryPermutation(), params);
|
final var processedQuery = createQuery(getQueryPermutation(), params);
|
||||||
final List<SearchSubquery> subqueries = processedQuery.specs.subqueries;
|
final List<SearchSubquery> subqueries = processedQuery.specs.subqueries;
|
||||||
|
|
||||||
@ -72,59 +64,25 @@ public class QueryFactory {
|
|||||||
return processedQuery;
|
return processedQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQuery createQuery(SearchProfile profile,
|
|
||||||
int limitPerDomain,
|
|
||||||
int limitTotal,
|
|
||||||
String... termsInclude)
|
|
||||||
{
|
|
||||||
List<SearchSubquery> sqs = new ArrayList<>();
|
|
||||||
|
|
||||||
sqs.add(new SearchSubquery(
|
|
||||||
Arrays.asList(termsInclude),
|
|
||||||
Collections.emptyList(),
|
|
||||||
Collections.emptyList(),
|
|
||||||
Collections.emptyList(),
|
|
||||||
Collections.emptyList()));
|
|
||||||
|
|
||||||
var specs = SearchSpecification.builder()
|
|
||||||
.subqueries(sqs)
|
|
||||||
.domains(Collections.emptyList())
|
|
||||||
.searchSetIdentifier(profile.searchSetIdentifier)
|
|
||||||
.queryLimits(new QueryLimits(limitPerDomain, limitTotal, 250, 8192))
|
|
||||||
.humanQuery("")
|
|
||||||
.year(SpecificationLimit.none())
|
|
||||||
.size(SpecificationLimit.none())
|
|
||||||
.rank(SpecificationLimit.none())
|
|
||||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
|
||||||
.quality(SpecificationLimit.none())
|
|
||||||
.queryStrategy(QueryStrategy.AUTO)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
return new SearchQuery(specs);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void trimArray(List<?> arr, int maxSize) {
|
private void trimArray(List<?> arr, int maxSize) {
|
||||||
if (arr.size() > maxSize) {
|
if (arr.size() > maxSize) {
|
||||||
arr.subList(0, arr.size() - maxSize).clear();
|
arr.subList(0, arr.size() - maxSize).clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQuery createQuery(QueryPermutation queryPermutation,
|
public ProcessedQuery createQuery(QueryPermutation queryPermutation,
|
||||||
UserSearchParameters params)
|
QueryParams params)
|
||||||
{
|
{
|
||||||
final var query = params.humanQuery();
|
final var query = params.humanQuery();
|
||||||
final var profile = params.profile();
|
|
||||||
|
|
||||||
if (query.length() > 1000) {
|
if (query.length() > 1000) {
|
||||||
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");
|
throw new IllegalArgumentException("Query too long");
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> searchTermsHuman = new ArrayList<>();
|
List<String> searchTermsHuman = new ArrayList<>();
|
||||||
List<String> problems = new ArrayList<>();
|
List<String> problems = new ArrayList<>();
|
||||||
|
|
||||||
|
String domain = null;
|
||||||
String near = null,
|
|
||||||
domain = null;
|
|
||||||
|
|
||||||
var basicQuery = queryParser.parse(query);
|
var basicQuery = queryParser.parse(query);
|
||||||
|
|
||||||
@ -134,7 +92,7 @@ public class QueryFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(profile);
|
QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(params);
|
||||||
|
|
||||||
for (Token t : basicQuery) {
|
for (Token t : basicQuery) {
|
||||||
if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) {
|
if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) {
|
||||||
@ -153,50 +111,46 @@ public class QueryFactory {
|
|||||||
List<SearchSubquery> subqueries = new ArrayList<>();
|
List<SearchSubquery> subqueries = new ArrayList<>();
|
||||||
|
|
||||||
for (var parts : queryPermutations) {
|
for (var parts : queryPermutations) {
|
||||||
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(profile, parts);
|
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(parts);
|
||||||
|
|
||||||
SearchSubquery subquery = termsAccumulator.createSubquery();
|
SearchSubquery subquery = termsAccumulator.createSubquery();
|
||||||
|
|
||||||
near = termsAccumulator.near;
|
|
||||||
domain = termsAccumulator.domain;
|
domain = termsAccumulator.domain;
|
||||||
|
|
||||||
params.profile().addTacitTerms(subquery);
|
|
||||||
params.jsSetting().addTacitTerms(subquery);
|
|
||||||
|
|
||||||
subqueries.add(subquery);
|
subqueries.add(subquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Integer> domains = Collections.emptyList();
|
List<Integer> domainIds = params.domainIds();
|
||||||
|
|
||||||
if (near != null) {
|
var limits = params.limits();
|
||||||
if (domain == null) {
|
// Disable limits on number of results per domain if we're searching with a site:-type term
|
||||||
domains = dbNearDomainsQuery.getRelatedDomains(near, problems::add);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int domainLimit;
|
|
||||||
if (domain != null) {
|
if (domain != null) {
|
||||||
domainLimit = 1000;
|
limits = limits.forSingleDomain();
|
||||||
} else {
|
|
||||||
domainLimit = 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var specsBuilder = SearchSpecification.builder()
|
var specsBuilder = SearchSpecification.builder()
|
||||||
.subqueries(subqueries)
|
.subqueries(subqueries)
|
||||||
.queryLimits(new QueryLimits(domainLimit, 100, 250, 4096))
|
|
||||||
.humanQuery(query)
|
.humanQuery(query)
|
||||||
.quality(qualityLimits.qualityLimit)
|
.quality(qualityLimits.qualityLimit)
|
||||||
.year(qualityLimits.year)
|
.year(qualityLimits.year)
|
||||||
.size(qualityLimits.size)
|
.size(qualityLimits.size)
|
||||||
.rank(qualityLimits.rank)
|
.rank(qualityLimits.rank)
|
||||||
.domains(domains)
|
.domains(domainIds)
|
||||||
|
.queryLimits(limits)
|
||||||
|
.searchSetIdentifier(params.identifier())
|
||||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||||
.queryStrategy(qualityLimits.queryStrategy)
|
.queryStrategy(qualityLimits.queryStrategy);
|
||||||
.searchSetIdentifier(profile.searchSetIdentifier);
|
|
||||||
|
|
||||||
SearchSpecification specs = specsBuilder.build();
|
SearchSpecification specs = specsBuilder.build();
|
||||||
|
|
||||||
return new SearchQuery(specs, searchTermsHuman, domain);
|
for (var sq : specs.subqueries) {
|
||||||
|
sq.searchTermsAdvice.addAll(params.tacitAdvice());
|
||||||
|
sq.searchTermsPriority.addAll(params.tacitPriority());
|
||||||
|
sq.searchTermsInclude.addAll(params.tacitIncludes());
|
||||||
|
sq.searchTermsExclude.addAll(params.tacitExcludes());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ProcessedQuery(specs, searchTermsHuman, domain);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
|||||||
package nu.marginalia.search.query;
|
package nu.marginalia.query.svc;
|
||||||
|
|
||||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
import nu.marginalia.query_parser.token.Token;
|
import nu.marginalia.query_parser.token.Token;
|
||||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
|
||||||
|
|
||||||
public class QueryLimitsAccumulator implements TokenVisitor {
|
public class QueryLimitsAccumulator implements TokenVisitor {
|
||||||
public SpecificationLimit qualityLimit;
|
public SpecificationLimit qualityLimit;
|
||||||
@ -14,11 +14,11 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
|||||||
|
|
||||||
public QueryStrategy queryStrategy = QueryStrategy.AUTO;
|
public QueryStrategy queryStrategy = QueryStrategy.AUTO;
|
||||||
|
|
||||||
public QueryLimitsAccumulator(SearchProfile profile) {
|
public QueryLimitsAccumulator(QueryParams params) {
|
||||||
qualityLimit = profile.getQualityLimit();
|
qualityLimit = params.quality();
|
||||||
year = profile.getYearLimit();
|
year = params.year();
|
||||||
size = profile.getSizeLimit();
|
size = params.size();
|
||||||
rank = SpecificationLimit.none();
|
rank = params.rank();
|
||||||
}
|
}
|
||||||
|
|
||||||
private SpecificationLimit parseSpecificationLimit(String str) {
|
private SpecificationLimit parseSpecificationLimit(String str) {
|
||||||
@ -89,7 +89,4 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onAdviceTerm(Token token) {}
|
public void onAdviceTerm(Token token) {}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onNearTerm(Token token) {}
|
|
||||||
}
|
}
|
@ -1,10 +1,10 @@
|
|||||||
package nu.marginalia.search.query;
|
package nu.marginalia.query.svc;
|
||||||
|
|
||||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||||
import nu.marginalia.language.WordPatterns;
|
import nu.marginalia.language.WordPatterns;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
import nu.marginalia.query_parser.token.Token;
|
import nu.marginalia.query_parser.token.Token;
|
||||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -18,16 +18,13 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
|||||||
public List<String> searchTermsPriority = new ArrayList<>();
|
public List<String> searchTermsPriority = new ArrayList<>();
|
||||||
public List<List<String>> searchTermCoherences = new ArrayList<>();
|
public List<List<String>> searchTermCoherences = new ArrayList<>();
|
||||||
|
|
||||||
public String near;
|
|
||||||
public String domain;
|
public String domain;
|
||||||
|
|
||||||
public SearchSubquery createSubquery() {
|
public SearchSubquery createSubquery() {
|
||||||
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
||||||
}
|
}
|
||||||
|
|
||||||
public QuerySearchTermsAccumulator(SearchProfile profile, List<Token> parts) {
|
public QuerySearchTermsAccumulator(List<Token> parts) {
|
||||||
near = profile.getNearDomain();
|
|
||||||
|
|
||||||
for (Token t : parts) {
|
for (Token t : parts) {
|
||||||
t.visit(this);
|
t.visit(this);
|
||||||
}
|
}
|
||||||
@ -100,11 +97,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onNearTerm(Token token) {
|
|
||||||
near = token.str;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onYearTerm(Token token) {
|
public void onYearTerm(Token token) {
|
||||||
|
|
@ -1,14 +1,15 @@
|
|||||||
package nu.marginalia.search.query;
|
package nu.marginalia.query.svc;
|
||||||
|
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||||
import nu.marginalia.language.EnglishDictionary;
|
import nu.marginalia.language.EnglishDictionary;
|
||||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
|
||||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||||
import nu.marginalia.search.command.SearchJsParameter;
|
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@ -30,15 +31,24 @@ public class QueryFactoryTest {
|
|||||||
queryFactory = new QueryFactory(lm,
|
queryFactory = new QueryFactory(lm,
|
||||||
tfd,
|
tfd,
|
||||||
new EnglishDictionary(tfd),
|
new EnglishDictionary(tfd),
|
||||||
new NGramBloomFilter(lm),
|
new NGramBloomFilter(lm)
|
||||||
null
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchSpecification parseAndGetSpecs(String query) {
|
public SearchSpecification parseAndGetSpecs(String query) {
|
||||||
return queryFactory.createQuery(
|
return queryFactory.createQuery(
|
||||||
new UserSearchParameters(query, SearchProfile.CORPO, SearchJsParameter.DEFAULT)
|
new QueryParams(query, null,
|
||||||
).specs;
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
null,
|
||||||
|
new QueryLimits(100, 100, 100, 100),
|
||||||
|
SearchSetIdentifier.BLOGS)).specs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
@ -7,12 +7,12 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
|
|||||||
import nu.marginalia.assistant.client.AssistantClient;
|
import nu.marginalia.assistant.client.AssistantClient;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.query.client.QueryClient;
|
||||||
|
import nu.marginalia.query.model.QueryResponse;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
import nu.marginalia.search.query.QueryFactory;
|
import nu.marginalia.search.model.UserSearchParameters;
|
||||||
import nu.marginalia.search.query.model.SearchQuery;
|
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
|
||||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||||
import nu.marginalia.search.svc.SearchUnitConversionService;
|
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||||
import org.apache.logging.log4j.util.Strings;
|
import org.apache.logging.log4j.util.Strings;
|
||||||
@ -37,58 +37,71 @@ public class SearchOperator {
|
|||||||
|
|
||||||
private final AssistantClient assistantClient;
|
private final AssistantClient assistantClient;
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
private final QueryFactory queryFactory;
|
private final QueryClient queryClient;
|
||||||
|
|
||||||
private final SearchQueryIndexService searchQueryService;
|
private final SearchQueryIndexService searchQueryService;
|
||||||
|
private final SearchQueryParamFactory paramFactory;
|
||||||
private final SearchUnitConversionService searchUnitConversionService;
|
private final SearchUnitConversionService searchUnitConversionService;
|
||||||
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchOperator(AssistantClient assistantClient,
|
public SearchOperator(AssistantClient assistantClient,
|
||||||
DbDomainQueries domainQueries,
|
DbDomainQueries domainQueries,
|
||||||
QueryFactory queryFactory,
|
QueryClient queryClient,
|
||||||
SearchQueryIndexService searchQueryService,
|
SearchQueryIndexService searchQueryService,
|
||||||
SearchUnitConversionService searchUnitConversionService) {
|
SearchQueryParamFactory paramFactory,
|
||||||
|
SearchUnitConversionService searchUnitConversionService)
|
||||||
|
{
|
||||||
|
|
||||||
this.assistantClient = assistantClient;
|
this.assistantClient = assistantClient;
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
this.queryFactory = queryFactory;
|
this.queryClient = queryClient;
|
||||||
|
|
||||||
this.searchQueryService = searchQueryService;
|
this.searchQueryService = searchQueryService;
|
||||||
|
this.paramFactory = paramFactory;
|
||||||
this.searchUnitConversionService = searchUnitConversionService;
|
this.searchUnitConversionService = searchUnitConversionService;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<UrlDetails> doApiSearch(Context ctx,
|
public List<UrlDetails> doApiSearch(Context ctx,
|
||||||
UserSearchParameters params) {
|
UserSearchParameters params) {
|
||||||
|
|
||||||
|
// TODO: This shouldn't route through search-service!
|
||||||
|
var queryParams = paramFactory.forRegularSearch(params);
|
||||||
|
var queryResponse = queryClient.search(ctx, queryParams);
|
||||||
|
|
||||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
logger.info(queryMarker, "Human terms (API): {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||||
|
|
||||||
logger.info(queryMarker, "Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||||
|
|
||||||
return searchQueryService.executeQuery(ctx, processedQuery.specs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters params) {
|
public List<UrlDetails> doSiteSearch(Context ctx,
|
||||||
|
String domain) {
|
||||||
|
|
||||||
Future<String> eval = searchUnitConversionService.tryEval(ctx, params.humanQuery());
|
var queryParams = paramFactory.forSiteSearch(domain);
|
||||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
var queryResponse = queryClient.search(ctx, queryParams);
|
||||||
|
|
||||||
logger.info(queryMarker, "Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||||
|
}
|
||||||
|
|
||||||
List<UrlDetails> queryResults = searchQueryService.executeQuery(ctx, processedQuery.specs);
|
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
|
||||||
|
|
||||||
|
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
|
||||||
|
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||||
|
var queryResponse = queryClient.search(ctx, queryParams);
|
||||||
|
|
||||||
|
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||||
|
|
||||||
|
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||||
|
|
||||||
String evalResult = getFutureOrDefault(eval, "");
|
String evalResult = getFutureOrDefault(eval, "");
|
||||||
|
|
||||||
return DecoratedSearchResults.builder()
|
return DecoratedSearchResults.builder()
|
||||||
.params(params)
|
.params(userParams)
|
||||||
.problems(getProblems(ctx, evalResult, queryResults, processedQuery))
|
.problems(getProblems(ctx, evalResult, queryResults, queryResponse))
|
||||||
.evalResult(evalResult)
|
.evalResult(evalResult)
|
||||||
.results(queryResults)
|
.results(queryResults)
|
||||||
.focusDomain(processedQuery.domain)
|
.focusDomain(queryResponse.domain())
|
||||||
.focusDomainId(getDomainId(processedQuery.domain))
|
.focusDomainId(getDomainId(queryResponse.domain()))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,20 +126,20 @@ public class SearchOperator {
|
|||||||
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, QueryResponse response) {
|
||||||
final List<String> problems = new ArrayList<>(processedQuery.problems);
|
final List<String> problems = new ArrayList<>(response.problems());
|
||||||
boolean siteSearch = processedQuery.domain != null;
|
boolean siteSearch = response.domain() != null;
|
||||||
|
|
||||||
if (!siteSearch) {
|
if (!siteSearch) {
|
||||||
if (queryResults.size() <= 5 && null == evalResult) {
|
if (queryResults.size() <= 5 && null == evalResult) {
|
||||||
spellCheckTerms(ctx, processedQuery).forEach(problems::add);
|
spellCheckTerms(ctx, response).forEach(problems::add);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (queryResults.size() <= 5) {
|
if (queryResults.size() <= 5) {
|
||||||
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
|
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<String> representativeKeywords = processedQuery.getAllKeywords();
|
Set<String> representativeKeywords = response.getAllKeywords();
|
||||||
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
|
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
|
||||||
{
|
{
|
||||||
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
|
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
|
||||||
@ -137,8 +150,8 @@ public class SearchOperator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Iterable<String> spellCheckTerms(Context ctx, SearchQuery disjointedQuery) {
|
private Iterable<String> spellCheckTerms(Context ctx, QueryResponse response) {
|
||||||
return Observable.fromIterable(disjointedQuery.searchTermsHuman)
|
return Observable.fromIterable(response.searchTermsHuman())
|
||||||
.subscribeOn(Schedulers.io())
|
.subscribeOn(Schedulers.io())
|
||||||
.flatMap(term -> assistantClient.spellCheck(ctx, term)
|
.flatMap(term -> assistantClient.spellCheck(ctx, term)
|
||||||
.onErrorReturn(e -> Collections.emptyList())
|
.onErrorReturn(e -> Collections.emptyList())
|
||||||
|
@ -0,0 +1,53 @@
|
|||||||
|
package nu.marginalia.search;
|
||||||
|
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||||
|
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||||
|
import nu.marginalia.index.query.limit.QueryLimits;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
|
import nu.marginalia.search.model.UserSearchParameters;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SearchQueryParamFactory {
|
||||||
|
|
||||||
|
public QueryParams forRegularSearch(UserSearchParameters userParams) {
|
||||||
|
SearchSubquery prototype = new SearchSubquery();
|
||||||
|
var profile = userParams.profile();
|
||||||
|
profile.addTacitTerms(prototype);
|
||||||
|
|
||||||
|
return new QueryParams(
|
||||||
|
userParams.humanQuery(),
|
||||||
|
null,
|
||||||
|
prototype.searchTermsInclude,
|
||||||
|
prototype.searchTermsExclude,
|
||||||
|
prototype.searchTermsPriority,
|
||||||
|
prototype.searchTermsAdvice,
|
||||||
|
profile.getQualityLimit(),
|
||||||
|
profile.getYearLimit(),
|
||||||
|
profile.getSizeLimit(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(),
|
||||||
|
new QueryLimits(2, 100, 200, 8192),
|
||||||
|
profile.searchSetIdentifier
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParams forSiteSearch(String domain) {
|
||||||
|
return new QueryParams("site:"+domain,
|
||||||
|
null,
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
List.of(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
SpecificationLimit.none(),
|
||||||
|
List.of(),
|
||||||
|
new QueryLimits(100, 100, 100, 512),
|
||||||
|
SearchSetIdentifier.NONE
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -8,7 +8,7 @@ import nu.marginalia.search.command.SearchCommandInterface;
|
|||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
import nu.marginalia.search.model.UserSearchParameters;
|
||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
|
||||||
|
@ -3,12 +3,12 @@ package nu.marginalia.search.command.commands;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.search.SearchOperator;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
import nu.marginalia.search.model.DomainInformation;
|
import nu.marginalia.search.model.DomainInformation;
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
import nu.marginalia.search.model.SearchProfile;
|
||||||
import nu.marginalia.search.query.QueryFactory;
|
|
||||||
import nu.marginalia.search.siteinfo.DomainInformationService;
|
import nu.marginalia.search.siteinfo.DomainInformationService;
|
||||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
@ -25,9 +25,9 @@ import java.util.regex.Pattern;
|
|||||||
|
|
||||||
public class SiteListCommand implements SearchCommandInterface {
|
public class SiteListCommand implements SearchCommandInterface {
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
private final QueryFactory queryFactory;
|
|
||||||
private final DomainInformationService domainInformationService;
|
private final DomainInformationService domainInformationService;
|
||||||
private final SearchQueryIndexService searchQueryIndexService;
|
private final SearchQueryIndexService searchQueryIndexService;
|
||||||
|
private final SearchOperator searchOperator;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
|
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
|
||||||
@ -38,16 +38,16 @@ public class SiteListCommand implements SearchCommandInterface {
|
|||||||
public SiteListCommand(
|
public SiteListCommand(
|
||||||
DomainInformationService domainInformationService,
|
DomainInformationService domainInformationService,
|
||||||
DbDomainQueries domainQueries,
|
DbDomainQueries domainQueries,
|
||||||
QueryFactory queryFactory, RendererFactory rendererFactory,
|
RendererFactory rendererFactory,
|
||||||
SearchQueryIndexService searchQueryIndexService)
|
SearchQueryIndexService searchQueryIndexService, SearchOperator searchOperator)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
this.domainInformationService = domainInformationService;
|
this.domainInformationService = domainInformationService;
|
||||||
this.queryFactory = queryFactory;
|
|
||||||
|
|
||||||
siteInfoRenderer = rendererFactory.renderer("search/site-info");
|
siteInfoRenderer = rendererFactory.renderer("search/site-info");
|
||||||
this.searchQueryIndexService = searchQueryIndexService;
|
this.searchQueryIndexService = searchQueryIndexService;
|
||||||
|
this.searchOperator = searchOperator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -63,8 +63,8 @@ public class SiteListCommand implements SearchCommandInterface {
|
|||||||
Path screenshotPath = null;
|
Path screenshotPath = null;
|
||||||
int domainId = -1;
|
int domainId = -1;
|
||||||
if (null != domain) {
|
if (null != domain) {
|
||||||
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
|
resultSet = searchOperator.doSiteSearch(ctx, domain.toString());
|
||||||
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery.specs);
|
|
||||||
var maybeId = domainQueries.tryGetDomainId(domain);
|
var maybeId = domainQueries.tryGetDomainId(domain);
|
||||||
if (maybeId.isPresent()) {
|
if (maybeId.isPresent()) {
|
||||||
domainId = maybeId.getAsInt();
|
domainId = maybeId.getAsInt();
|
||||||
|
@ -3,7 +3,6 @@ package nu.marginalia.search.model;
|
|||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.search.query.model;
|
package nu.marginalia.search.model;
|
||||||
|
|
||||||
import nu.marginalia.search.command.SearchJsParameter;
|
import nu.marginalia.search.command.SearchJsParameter;
|
||||||
import nu.marginalia.search.model.SearchProfile;
|
import nu.marginalia.search.model.SearchProfile;
|
@ -1,28 +0,0 @@
|
|||||||
package nu.marginalia.search.query.model;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@AllArgsConstructor
|
|
||||||
public class SearchQuery {
|
|
||||||
public final SearchSpecification specs;
|
|
||||||
|
|
||||||
public final Set<String> problems = new TreeSet<>();
|
|
||||||
public final List<String> searchTermsHuman;
|
|
||||||
public String domain;
|
|
||||||
|
|
||||||
public SearchQuery(SearchSpecification justSpecs) {
|
|
||||||
searchTermsHuman = new ArrayList<>();
|
|
||||||
specs = justSpecs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<String> getAllKeywords() {
|
|
||||||
Set<String> keywords = new HashSet<>(100);
|
|
||||||
for (var sq : specs.subqueries) {
|
|
||||||
keywords.addAll(sq.searchTermsInclude);
|
|
||||||
}
|
|
||||||
return keywords;
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,7 +14,7 @@ import nu.marginalia.search.client.model.ApiSearchResults;
|
|||||||
import nu.marginalia.search.model.SearchProfile;
|
import nu.marginalia.search.model.SearchProfile;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.search.command.SearchJsParameter;
|
import nu.marginalia.search.command.SearchJsParameter;
|
||||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
import nu.marginalia.search.model.UserSearchParameters;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@ import com.google.inject.Singleton;
|
|||||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||||
import nu.marginalia.query.client.QueryClient;
|
import nu.marginalia.query.client.QueryClient;
|
||||||
|
import nu.marginalia.query.model.QueryParams;
|
||||||
|
import nu.marginalia.query.model.QueryResponse;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.results.SearchResultDecorator;
|
import nu.marginalia.search.results.SearchResultDecorator;
|
||||||
import nu.marginalia.search.results.UrlDeduplicator;
|
import nu.marginalia.search.results.UrlDeduplicator;
|
||||||
@ -15,7 +17,6 @@ import org.slf4j.Marker;
|
|||||||
import org.slf4j.MarkerFactory;
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class SearchQueryIndexService {
|
public class SearchQueryIndexService {
|
||||||
@ -40,12 +41,9 @@ public class SearchQueryIndexService {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<UrlDetails> executeQuery(Context ctx, SearchSpecification specs) {
|
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
|
||||||
// Send the query
|
|
||||||
final var queryResponse = queryClient.delegate(ctx, specs);
|
|
||||||
|
|
||||||
// Remove duplicates and other chaff
|
// Remove duplicates and other chaff
|
||||||
final var results = limitAndDeduplicateResults(specs, queryResponse.results);
|
final var results = limitAndDeduplicateResults(queryResponse.specs(), queryResponse.results());
|
||||||
|
|
||||||
// Update the query count (this is what you see on the front page)
|
// Update the query count (this is what you see on the front page)
|
||||||
searchVisitorCount.registerQuery();
|
searchVisitorCount.registerQuery();
|
||||||
|
Loading…
Reference in New Issue
Block a user