From 2b811fb42203cebd82892e1dbbbf311bd6df2791 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 19 Apr 2024 11:00:56 +0200 Subject: [PATCH] (qs) Basic query debug feature --- .../results/ResultRankingParameters.java | 11 +- .../searchquery/QueryGRPCService.java | 20 ++- .../searchquery/svc/QueryFactory.java | 5 +- .../query/svc/QueryFactoryTest.java | 2 +- .../marginalia/query/QueryBasicInterface.java | 103 +++++++++++++-- .../nu/marginalia/query/QueryService.java | 6 +- .../resources/templates/qdebug.hdb | 121 ++++++++++++++++++ 7 files changed, 246 insertions(+), 22 deletions(-) create mode 100644 code/services-core/query-service/resources/templates/qdebug.hdb diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java index 04a5f8e2..e54a994d 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java @@ -1,11 +1,12 @@ package nu.marginalia.api.searchquery.model.results; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.ToString; +import lombok.*; -@Builder @AllArgsConstructor @ToString @EqualsAndHashCode +@Builder +@AllArgsConstructor +@ToString +@EqualsAndHashCode +@Getter // getter for the mustache template engine's behalf public class ResultRankingParameters { /** Tuning for BM25 when applied to full document matches */ diff --git a/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java index d2cdd27d..8ebcabc4 100644 --- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java @@ -6,7 +6,9 @@ import io.grpc.stub.StreamObserver; import io.prometheus.client.Histogram; import lombok.SneakyThrows; import nu.marginalia.api.searchquery.*; +import nu.marginalia.api.searchquery.model.query.ProcessedQuery; import nu.marginalia.api.searchquery.model.query.QueryParams; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.db.DomainBlacklist; import nu.marginalia.index.api.IndexClient; import nu.marginalia.functions.searchquery.svc.QueryFactory; @@ -51,7 +53,7 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { Integer.toString(request.getQueryLimits().getResultsTotal())) .time(() -> { var params = QueryProtobufCodec.convertRequest(request); - var query = queryFactory.createQuery(params); + var query = queryFactory.createQuery(params, null); RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query); List bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal()); @@ -81,16 +83,26 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId())); } - public List executeDirect(String originalQuery, QueryParams params, int count) { - var query = queryFactory.createQuery(params); + public DetailedDirectResult executeDirect( + String originalQuery, + QueryParams params, + ResultRankingParameters rankingParameters, + int count) { - return executeQueries( + var query = queryFactory.createQuery(params, rankingParameters); + + var items = executeQueries( QueryProtobufCodec.convertQuery(originalQuery, query), count) .stream().map(QueryProtobufCodec::convertQueryResult) .toList(); + + return new DetailedDirectResult(query, items); } + public record DetailedDirectResult(ProcessedQuery processedQuery, + List result) {} + @SneakyThrows List executeQueries(RpcIndexQuery indexRequest, int totalSize) { var results = indexClient.executeQueries(indexRequest); diff --git a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java index ab4018ef..908eb2e2 100644 --- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java @@ -17,6 +17,7 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -37,7 +38,8 @@ public class QueryFactory { - public ProcessedQuery createQuery(QueryParams params) { + public ProcessedQuery createQuery(QueryParams params, + @Nullable ResultRankingParameters rankingParams) { final var query = params.humanQuery(); if (query.length() > 1000) { @@ -156,6 +158,7 @@ public class QueryFactory { .year(year) .size(size) .rank(rank) + .rankingParams(rankingParams) .domains(domainIds) .queryLimits(limits) .searchSetIdentifier(params.identifier()) diff --git a/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java b/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java index 319b4095..1576fd85 100644 --- a/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java +++ b/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java @@ -51,7 +51,7 @@ public class QueryFactoryTest { new QueryLimits(100, 100, 100, 100), "NONE", QueryStrategy.AUTO, - ResultRankingParameters.TemporalBias.NONE)).specs; + ResultRankingParameters.TemporalBias.NONE), null).specs; } diff --git a/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java index dc0ae2a0..916f5176 100644 --- a/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java @@ -1,7 +1,10 @@ package nu.marginalia.query; +import com.google.common.base.Strings; import com.google.gson.Gson; import com.google.inject.Inject; +import nu.marginalia.api.searchquery.model.results.Bm25Parameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.functions.searchquery.QueryGRPCService; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.model.gson.GsonFactory; @@ -15,7 +18,8 @@ import java.io.IOException; import java.util.Map; public class QueryBasicInterface { - private final MustacheRenderer renderer; + private final MustacheRenderer basicRenderer; + private final MustacheRenderer qdebugRenderer; private final Gson gson = GsonFactory.get(); private final QueryGRPCService queryGRPCService; @@ -25,35 +29,114 @@ public class QueryBasicInterface { QueryGRPCService queryGRPCService ) throws IOException { - this.renderer = rendererFactory.renderer("search"); + this.basicRenderer = rendererFactory.renderer("search"); + this.qdebugRenderer = rendererFactory.renderer("qdebug"); this.queryGRPCService = queryGRPCService; } - public Object handle(Request request, Response response) { - String queryParam = request.queryParams("q"); - if (queryParam == null) { - return renderer.render(new Object()); + public Object handleBasic(Request request, Response response) { + String queryParams = request.queryParams("q"); + if (queryParams == null) { + return basicRenderer.render(new Object()); } int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count")); int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount")); String set = request.queryParams("set") == null ? "" : request.queryParams("set"); - var params = new QueryParams(queryParam, new QueryLimits( + var params = new QueryParams(queryParams, new QueryLimits( domainCount, count, 250, 8192 ), set); - var results = queryGRPCService.executeDirect(queryParam, params, count); + var detailedDirectResult = queryGRPCService.executeDirect(queryParams, + params, + ResultRankingParameters.sensibleDefaults(), + count); + + var results = detailedDirectResult.result(); if (request.headers("Accept").contains("application/json")) { response.type("application/json"); return gson.toJson(results); } else { - return renderer.render( - Map.of("query", queryParam, + return basicRenderer.render( + Map.of("query", queryParams, "results", results) ); } } + + public Object handleAdvanced(Request request, Response response) { + String queryString = request.queryParams("q"); + if (queryString == null) { + // Show the default query form if no query is given + return qdebugRenderer.render(Map.of("rankingParams", ResultRankingParameters.sensibleDefaults()) + ); + } + + int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count")); + int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount")); + String set = request.queryParams("set") == null ? "" : request.queryParams("set"); + + var queryParams = new QueryParams(queryString, new QueryLimits( + domainCount, count, 250, 8192 + ), set); + + var rankingParams = rankingParamsFromRequest(request); + + var detailedDirectResult = queryGRPCService.executeDirect(queryString, + queryParams, + rankingParams, + count); + + var results = detailedDirectResult.result(); + + return qdebugRenderer.render( + Map.of("query", queryString, + "specs", detailedDirectResult.processedQuery().specs, + "rankingParams", rankingParams, // we can't grab this from the specs as it will null the object if it's the default values + "results", results) + ); + } + + private ResultRankingParameters rankingParamsFromRequest(Request request) { + var sensibleDefaults = ResultRankingParameters.sensibleDefaults(); + + return ResultRankingParameters.builder() + .domainRankBonus(doubleFromRequest(request, "domainRankBonus", sensibleDefaults.domainRankBonus)) + .qualityPenalty(doubleFromRequest(request, "qualityPenalty", sensibleDefaults.qualityPenalty)) + .shortDocumentThreshold(intFromRequest(request, "shortDocumentThreshold", sensibleDefaults.shortDocumentThreshold)) + .shortDocumentPenalty(doubleFromRequest(request, "shortDocumentPenalty", sensibleDefaults.shortDocumentPenalty)) + .tcfJaccardWeight(doubleFromRequest(request, "tcfJaccardWeight", sensibleDefaults.tcfJaccardWeight)) + .tcfOverlapWeight(doubleFromRequest(request, "tcfOverlapWeight", sensibleDefaults.tcfOverlapWeight)) + .fullParams(new Bm25Parameters( + doubleFromRequest(request, "fullParams.k1", sensibleDefaults.fullParams.k()), + doubleFromRequest(request, "fullParams.b", sensibleDefaults.fullParams.b()) + )) + .prioParams(new Bm25Parameters( + doubleFromRequest(request, "prioParams.k1", sensibleDefaults.prioParams.k()), + doubleFromRequest(request, "prioParams.b", sensibleDefaults.prioParams.b()) + )) + .temporalBias(ResultRankingParameters.TemporalBias.valueOf(stringFromRequest(request, "temporalBias", sensibleDefaults.temporalBias.toString()))) + .temporalBiasWeight(doubleFromRequest(request, "temporalBiasWeight", sensibleDefaults.temporalBiasWeight)) + .shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold)) + .shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty)) + .bm25FullWeight(doubleFromRequest(request, "bm25FullWeight", sensibleDefaults.bm25FullWeight)) + .bm25NgramWeight(doubleFromRequest(request, "bm25NgramWeight", sensibleDefaults.bm25NgramWeight)) + .bm25PrioWeight(doubleFromRequest(request, "bm25PrioWeight", sensibleDefaults.bm25PrioWeight)) + .build(); + } + + double doubleFromRequest(Request request, String param, double defaultValue) { + return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param)); + } + + int intFromRequest(Request request, String param, int defaultValue) { + return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Integer.parseInt(request.queryParams(param)); + } + + String stringFromRequest(Request request, String param, String defaultValue) { + return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : request.queryParams(param); + } } diff --git a/code/services-core/query-service/java/nu/marginalia/query/QueryService.java b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java index d8a9c526..5a2cc82f 100644 --- a/code/services-core/query-service/java/nu/marginalia/query/QueryService.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java @@ -31,7 +31,11 @@ public class QueryService extends Service { List.of(queryGRPCService, domainLinksService)); - Spark.get("/public/search", queryBasicInterface::handle); + Spark.get("/public/search", queryBasicInterface::handleBasic); + + if (!Boolean.getBoolean("noQdebug")) { + Spark.get("/public/qdebug", queryBasicInterface::handleAdvanced); + } Spark.exception(Exception.class, (e, request, response) -> { response.status(500); diff --git a/code/services-core/query-service/resources/templates/qdebug.hdb b/code/services-core/query-service/resources/templates/qdebug.hdb new file mode 100644 index 00000000..f9c3917f --- /dev/null +++ b/code/services-core/query-service/resources/templates/qdebug.hdb @@ -0,0 +1,121 @@ + + + + + + + Query Service + + +
+

Query Debug Service

+
+
+
+
+
+
+ + {{#with rankingParams}} + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + {{/with}} +
+ + + +{{#if specs.query.compiledQuery}} +
+

Specs

+ + + + + + +{{#each specs.query.searchTermCoherences}} + + + + +{{/each}} +
Compiled Query{{specs.query.compiledQuery}}
Search Terms Include{{#each specs.query.searchTermsInclude}} {{.}} {{/each}}
Search Terms Exclude{{#each specs.query.searchTermsExclude}} {{.}} {{/each}}
Search Terms Advice{{#each specs.query.searchTermsAdvice}} {{.}} {{/each}}
Search Terms Priority{{#each specs.query.searchTermsPriority}} {{.}} {{/each}}
Coherence Requirement + {{#each .}} + {{.}} + {{/each}} +
+{{/if}} + +{{#if results}} +
+

Results

+{{#each results}} +
+ {{title}} +
{{url}}
+

{{description}}

+ +
dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}
+
+{{/each}} +{{/if}} + +
+ + \ No newline at end of file