(qs) Basic query debug feature

This commit is contained in:
Viktor Lofgren 2024-04-19 11:00:56 +02:00
parent 36cc62c10c
commit 2b811fb422
7 changed files with 246 additions and 22 deletions

View File

@ -1,11 +1,12 @@
package nu.marginalia.api.searchquery.model.results;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.ToString;
import lombok.*;
@Builder @AllArgsConstructor @ToString @EqualsAndHashCode
@Builder
@AllArgsConstructor
@ToString
@EqualsAndHashCode
@Getter // getter for the mustache template engine's behalf
public class ResultRankingParameters {
/** Tuning for BM25 when applied to full document matches */

View File

@ -6,7 +6,9 @@ import io.grpc.stub.StreamObserver;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.index.api.IndexClient;
import nu.marginalia.functions.searchquery.svc.QueryFactory;
@ -51,7 +53,7 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
Integer.toString(request.getQueryLimits().getResultsTotal()))
.time(() -> {
var params = QueryProtobufCodec.convertRequest(request);
var query = queryFactory.createQuery(params);
var query = queryFactory.createQuery(params, null);
RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query);
List<RpcDecoratedResultItem> bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal());
@ -81,16 +83,26 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId()));
}
public List<DecoratedSearchResultItem> executeDirect(String originalQuery, QueryParams params, int count) {
var query = queryFactory.createQuery(params);
public DetailedDirectResult executeDirect(
String originalQuery,
QueryParams params,
ResultRankingParameters rankingParameters,
int count) {
return executeQueries(
var query = queryFactory.createQuery(params, rankingParameters);
var items = executeQueries(
QueryProtobufCodec.convertQuery(originalQuery, query),
count)
.stream().map(QueryProtobufCodec::convertQueryResult)
.toList();
return new DetailedDirectResult(query, items);
}
public record DetailedDirectResult(ProcessedQuery processedQuery,
List<DecoratedSearchResultItem> result) {}
@SneakyThrows
List<RpcDecoratedResultItem> executeQueries(RpcIndexQuery indexRequest, int totalSize) {
var results = indexClient.executeQueries(indexRequest);

View File

@ -17,6 +17,7 @@ import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -37,7 +38,8 @@ public class QueryFactory {
public ProcessedQuery createQuery(QueryParams params) {
public ProcessedQuery createQuery(QueryParams params,
@Nullable ResultRankingParameters rankingParams) {
final var query = params.humanQuery();
if (query.length() > 1000) {
@ -156,6 +158,7 @@ public class QueryFactory {
.year(year)
.size(size)
.rank(rank)
.rankingParams(rankingParams)
.domains(domainIds)
.queryLimits(limits)
.searchSetIdentifier(params.identifier())

View File

@ -51,7 +51,7 @@ public class QueryFactoryTest {
new QueryLimits(100, 100, 100, 100),
"NONE",
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE)).specs;
ResultRankingParameters.TemporalBias.NONE), null).specs;
}

View File

@ -1,7 +1,10 @@
package nu.marginalia.query;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.inject.Inject;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.gson.GsonFactory;
@ -15,7 +18,8 @@ import java.io.IOException;
import java.util.Map;
public class QueryBasicInterface {
private final MustacheRenderer<Object> renderer;
private final MustacheRenderer<Object> basicRenderer;
private final MustacheRenderer<Object> qdebugRenderer;
private final Gson gson = GsonFactory.get();
private final QueryGRPCService queryGRPCService;
@ -25,35 +29,114 @@ public class QueryBasicInterface {
QueryGRPCService queryGRPCService
) throws IOException
{
this.renderer = rendererFactory.renderer("search");
this.basicRenderer = rendererFactory.renderer("search");
this.qdebugRenderer = rendererFactory.renderer("qdebug");
this.queryGRPCService = queryGRPCService;
}
public Object handle(Request request, Response response) {
String queryParam = request.queryParams("q");
if (queryParam == null) {
return renderer.render(new Object());
public Object handleBasic(Request request, Response response) {
String queryParams = request.queryParams("q");
if (queryParams == null) {
return basicRenderer.render(new Object());
}
int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count"));
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
var params = new QueryParams(queryParam, new QueryLimits(
var params = new QueryParams(queryParams, new QueryLimits(
domainCount, count, 250, 8192
), set);
var results = queryGRPCService.executeDirect(queryParam, params, count);
var detailedDirectResult = queryGRPCService.executeDirect(queryParams,
params,
ResultRankingParameters.sensibleDefaults(),
count);
var results = detailedDirectResult.result();
if (request.headers("Accept").contains("application/json")) {
response.type("application/json");
return gson.toJson(results);
}
else {
return renderer.render(
Map.of("query", queryParam,
return basicRenderer.render(
Map.of("query", queryParams,
"results", results)
);
}
}
public Object handleAdvanced(Request request, Response response) {
String queryString = request.queryParams("q");
if (queryString == null) {
// Show the default query form if no query is given
return qdebugRenderer.render(Map.of("rankingParams", ResultRankingParameters.sensibleDefaults())
);
}
int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count"));
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
var queryParams = new QueryParams(queryString, new QueryLimits(
domainCount, count, 250, 8192
), set);
var rankingParams = rankingParamsFromRequest(request);
var detailedDirectResult = queryGRPCService.executeDirect(queryString,
queryParams,
rankingParams,
count);
var results = detailedDirectResult.result();
return qdebugRenderer.render(
Map.of("query", queryString,
"specs", detailedDirectResult.processedQuery().specs,
"rankingParams", rankingParams, // we can't grab this from the specs as it will null the object if it's the default values
"results", results)
);
}
private ResultRankingParameters rankingParamsFromRequest(Request request) {
var sensibleDefaults = ResultRankingParameters.sensibleDefaults();
return ResultRankingParameters.builder()
.domainRankBonus(doubleFromRequest(request, "domainRankBonus", sensibleDefaults.domainRankBonus))
.qualityPenalty(doubleFromRequest(request, "qualityPenalty", sensibleDefaults.qualityPenalty))
.shortDocumentThreshold(intFromRequest(request, "shortDocumentThreshold", sensibleDefaults.shortDocumentThreshold))
.shortDocumentPenalty(doubleFromRequest(request, "shortDocumentPenalty", sensibleDefaults.shortDocumentPenalty))
.tcfJaccardWeight(doubleFromRequest(request, "tcfJaccardWeight", sensibleDefaults.tcfJaccardWeight))
.tcfOverlapWeight(doubleFromRequest(request, "tcfOverlapWeight", sensibleDefaults.tcfOverlapWeight))
.fullParams(new Bm25Parameters(
doubleFromRequest(request, "fullParams.k1", sensibleDefaults.fullParams.k()),
doubleFromRequest(request, "fullParams.b", sensibleDefaults.fullParams.b())
))
.prioParams(new Bm25Parameters(
doubleFromRequest(request, "prioParams.k1", sensibleDefaults.prioParams.k()),
doubleFromRequest(request, "prioParams.b", sensibleDefaults.prioParams.b())
))
.temporalBias(ResultRankingParameters.TemporalBias.valueOf(stringFromRequest(request, "temporalBias", sensibleDefaults.temporalBias.toString())))
.temporalBiasWeight(doubleFromRequest(request, "temporalBiasWeight", sensibleDefaults.temporalBiasWeight))
.shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold))
.shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty))
.bm25FullWeight(doubleFromRequest(request, "bm25FullWeight", sensibleDefaults.bm25FullWeight))
.bm25NgramWeight(doubleFromRequest(request, "bm25NgramWeight", sensibleDefaults.bm25NgramWeight))
.bm25PrioWeight(doubleFromRequest(request, "bm25PrioWeight", sensibleDefaults.bm25PrioWeight))
.build();
}
double doubleFromRequest(Request request, String param, double defaultValue) {
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param));
}
int intFromRequest(Request request, String param, int defaultValue) {
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Integer.parseInt(request.queryParams(param));
}
String stringFromRequest(Request request, String param, String defaultValue) {
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : request.queryParams(param);
}
}

View File

@ -31,7 +31,11 @@ public class QueryService extends Service {
List.of(queryGRPCService, domainLinksService));
Spark.get("/public/search", queryBasicInterface::handle);
Spark.get("/public/search", queryBasicInterface::handleBasic);
if (!Boolean.getBoolean("noQdebug")) {
Spark.get("/public/qdebug", queryBasicInterface::handleAdvanced);
}
Spark.exception(Exception.class, (e, request, response) -> {
response.status(500);

View File

@ -0,0 +1,121 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Query Service</title>
</head>
<body>
<div class="container">
<h1 class="my-3">Query Debug Service</h1>
<form action="/qdebug" method="get">
<div class="form-group"><label for="q">Search Query</label></div>
<div class="row my-2">
<div class="col-sm-8"><input type="text" class="form-control" id="q" name="q" value="{{query}}"></div>
<div class="col-sm-2"><button type="submit" class="btn btn-primary">Submit</button></div>
</div>
{{#with rankingParams}}
<!-- params -->
<div class="row my-2">
<div class="col-sm-2"><label for="domainRankBonus">Domain Rank Bonus</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="domainRankBonus" name="domainRankBonus" value="{{domainRankBonus}}"></div>
<div class="col-sm-2"><label for="qualityPenalty">Quality Penalty</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="qualityPenalty" name="qualityPenalty" value="{{qualityPenalty}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="shortDocumentThreshold">Short Document Threshold</label></div>
<div class="col-sm-2"><input type="number" class="form-control" id="shortDocumentThreshold" name="shortDocumentThreshold" value="{{shortDocumentThreshold}}"></div>
<div class="col-sm-2"><label for="shortDocumentPenalty">Short Document Penalty</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="shortDocumentPenalty" name="shortDocumentPenalty" value="{{shortDocumentPenalty}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="tcfJaccardWeight">TCF Jaccard Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="tcfJaccardWeight" name="tcfJaccardWeight" value="{{tcfJaccardWeight}}"></div>
<div class="col-sm-2"><label for="tcfOverlapWeight">TCF Overlap Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="tcfOverlapWeight" name="tcfOverlapWeight" value="{{tcfOverlapWeight}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="fullParams.k1">Full Params K1</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="fullParams.k1" name="fullParams.k1" value="{{fullParams.k}}"></div>
<div class="col-sm-2"><label for="fullParams.b">Full Params B</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="fullParams.b" name="fullParams.b" value="{{fullParams.b}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="prioParams.k1">Prio Params K1</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="prioParams.k1" name="prioParams.k1" value="{{prioParams.k}}"></div>
<div class="col-sm-2"><label for="prioParams.b">Prio Params B</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="prioParams.b" name="prioParams.b" value="{{prioParams.b}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="temporalBias">Temporal Bias</label></div>
<div class="col-sm-2">
<select class="form-select" id="temporalBias" name="temporalBias">
<option value="NONE" {{#eq temporalBias.name "NONE"}}selected{{/eq}}>NONE</option>
<option value="RECENT" {{#eq temporalBias.name "RECENT"}}selected{{/eq}}>RECENT</option>
<option value="OLD" {{#eq temporalBias.name "OLD"}}selected{{/eq}}>OLD</option>
</select>
</div>
<div class="col-sm-2"><label for="temporalBiasWeight">Temporal Bias Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="temporalBiasWeight" name="temporalBiasWeight" value="{{temporalBiasWeight}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="shortSentenceThreshold">Short Sentence Threshold</label></div>
<div class="col-sm-2"><input type="number" min="1" max="4" step="1" class="form-control" id="shortSentenceThreshold" name="shortSentenceThreshold" value="{{shortSentenceThreshold}}"></div>
<div class="col-sm-2"><label for="shortSentencePenalty">Short Sentence Penalty</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="shortSentencePenalty" name="shortSentencePenalty" value="{{shortSentencePenalty}}"></div>
</div>
<div class="row my-2">
<div class="col-sm-2"><label for="bm25FullWeight">BM25 Full Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="bm25FullWeight" name="bm25FullWeight" value="{{bm25FullWeight}}"></div>
<div class="col-sm-2"><label for="bm25NgramWeight">BM25 Ngram Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="bm25NgramWeight" name="bm25NgramWeight" value="{{bm25NgramWeight}}"></div>
<div class="col-sm-2"><label for="bm25PrioWeight">BM25 Prio Weight</label></div>
<div class="col-sm-2"><input type="text" class="form-control" id="bm25PrioWeight" name="bm25PrioWeight" value="{{bm25PrioWeight}}"></div>
</div>
{{/with}}
</form>
{{#if specs.query.compiledQuery}}
<hr>
<h2 class="my-3">Specs</h2>
<table class="table">
<tr> <th title="infix notation query language expression">Compiled Query</th><td>{{specs.query.compiledQuery}}</td> </tr>
<tr> <th title="mandatory terms">Search Terms Include</th><td>{{#each specs.query.searchTermsInclude}} {{.}} {{/each}}</td> </tr>
<tr> <th title="terms that must be present">Search Terms Exclude</th><td>{{#each specs.query.searchTermsExclude}} {{.}} {{/each}}</td> </tr>
<tr> <th title="mandatory terms, no effect on ranking">Search Terms Advice</th><td>{{#each specs.query.searchTermsAdvice}} {{.}} {{/each}}</td> </tr>
<tr> <th title="not mandatory, effects ranking">Search Terms Priority</th><td>{{#each specs.query.searchTermsPriority}} {{.}} {{/each}}</td> </tr>
{{#each specs.query.searchTermCoherences}}
<tr>
<th title="terms must appear close by">Coherence Requirement</th>
<td>
{{#each .}}
{{.}}
{{/each}}
</td>
</tr>
{{/each}}
</table>
{{/if}}
{{#if results}}
<hr>
<h2 class="my-3">Results</h2>
{{#each results}}
<div class="mb-3">
<a href="{{url}}">{{title}}</a>
<div><small class="text-muted">{{url}}</small></div>
<p>{{description}}</p>
<div><small class="text-muted">dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}</small></div>
</div>
{{/each}}
{{/if}}
</div>
</body>
</html>