(qs) Additional info in query debug UI

This commit is contained in:
Viktor Lofgren 2024-04-19 11:46:27 +02:00
parent e79ab0c70e
commit eb74d08f2a
16 changed files with 250 additions and 18 deletions

View File

@ -97,7 +97,8 @@ public class IndexProtobufCodec {
params.getTcfJaccardWeight(),
params.getTcfOverlapWeight(),
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
params.getTemporalBiasWeight()
params.getTemporalBiasWeight(),
params.getExportDebugData()
);
}
@ -124,7 +125,8 @@ public class IndexProtobufCodec {
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
.setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
.setExportDebugData(rankingParams.exportDebugData);
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
builder.setTemporalBias(temporalBias);

View File

@ -6,6 +6,9 @@ import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
@ -126,7 +129,51 @@ public class QueryProtobufCodec {
results.getDataHash(),
results.getWordsTotal(),
results.getBestPositions(),
results.getRankingScore()
results.getRankingScore(),
convertRankingDetails(results.getRankingDetails())
);
}
private static ResultRankingDetails convertRankingDetails(RpcResultRankingDetails rankingDetails) {
if (rankingDetails == null)
return null;
var inputs = rankingDetails.getInputs();
var outputs = rankingDetails.getOutput();
return new ResultRankingDetails(
convertRankingInputs(inputs),
convertRankingOutputs(outputs)
);
}
private static ResultRankingOutputs convertRankingOutputs(RpcResultRankingOutputs outputs) {
return new ResultRankingOutputs(
outputs.getAverageSentenceLengthPenalty(),
outputs.getQualityPenalty(),
outputs.getRankingBonus(),
outputs.getTopologyBonus(),
outputs.getDocumentLengthPenalty(),
outputs.getTemporalBias(),
outputs.getFlagsPenalty(),
outputs.getOverallPart(),
outputs.getTcfOverlap(),
outputs.getTcfJaccard(),
outputs.getBM25F(),
outputs.getBM25N(),
outputs.getBM25P()
);
}
private static ResultRankingInputs convertRankingInputs(RpcResultRankingInputs inputs) {
return new ResultRankingInputs(
inputs.getRank(),
inputs.getAsl(),
inputs.getQuality(),
inputs.getSize(),
inputs.getFlagsPenalty(),
inputs.getTopology(),
inputs.getYear()
);
}
@ -209,7 +256,8 @@ public class QueryProtobufCodec {
rpcDecoratedResultItem.getDataHash(),
rpcDecoratedResultItem.getWordsTotal(),
rpcDecoratedResultItem.getBestPositions(),
rpcDecoratedResultItem.getRankingScore()
rpcDecoratedResultItem.getRankingScore(),
convertRankingDetails(rpcDecoratedResultItem.getRankingDetails())
);
}

View File

@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.results;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.model.EdgeUrl;
import org.jetbrains.annotations.NotNull;
@ -33,6 +34,9 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
public final long bestPositions;
public final double rankingScore;
@Nullable
public ResultRankingDetails rankingDetails;
public long documentId() {
return rawIndexResult.getDocumentId();
}
@ -67,7 +71,10 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
long dataHash,
int wordsTotal,
long bestPositions,
double rankingScore)
double rankingScore,
@Nullable
ResultRankingDetails rankingDetails
)
{
this.rawIndexResult = rawIndexResult;
this.url = url;
@ -81,6 +88,7 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
this.wordsTotal = wordsTotal;
this.bestPositions = bestPositions;
this.rankingScore = rankingScore;
this.rankingDetails = rankingDetails;
}
@Override

View File

@ -41,6 +41,8 @@ public class ResultRankingParameters {
public TemporalBias temporalBias;
public double temporalBiasWeight;
public boolean exportDebugData;
public static ResultRankingParameters sensibleDefaults() {
return builder()
.fullParams(new Bm25Parameters(1.2, 0.5))
@ -58,6 +60,7 @@ public class ResultRankingParameters {
.tcfJaccardWeight(1)
.temporalBias(TemporalBias.NONE)
.temporalBiasWeight(1. / (5.))
.exportDebugData(false)
.build();
}

View File

@ -0,0 +1,7 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record ResultRankingDetails(ResultRankingInputs inputs, ResultRankingOutputs outputs)
{
}

View File

@ -0,0 +1,3 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record ResultRankingInputs(int rank, int asl, int quality, int size, int flagsPenalty, int topology, int year) {}

View File

@ -0,0 +1,17 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record ResultRankingOutputs(double averageSentenceLengthPenalty,
double qualityPenalty,
double rankingBonus,
double topologyBonus,
double documentLengthPenalty,
double temporalBias,
double flagsPenalty,
double overallPart,
double tcfOverlap,
double tcfJaccard,
double bM25F,
double bM25N,
double bM25P)
{
}

View File

@ -92,6 +92,7 @@ message RpcDecoratedResultItem {
int32 wordsTotal = 10;
double rankingScore = 11; // The ranking score of this search result item, lower is better
int64 bestPositions = 12;
RpcResultRankingDetails rankingDetails = 13; // optional, only present if exportDebugData is true in RpcResultRankingParameters
}
/** A raw index-service view of a search result */
@ -136,6 +137,38 @@ message RpcResultRankingParameters {
double tcfJaccardWeight = 15;
RpcTemporalBias temporalBias = 16;
double temporalBiasWeight = 17;
bool exportDebugData = 18;
}
message RpcResultRankingDetails {
RpcResultRankingInputs inputs = 1;
RpcResultRankingOutputs output = 2;
}
message RpcResultRankingInputs {
int32 rank = 1;
int32 asl = 2;
int32 quality = 3;
int32 size = 4;
int32 flagsPenalty = 5;
int32 topology = 6;
int32 year = 7;
}
message RpcResultRankingOutputs {
double averageSentenceLengthPenalty = 1;
double qualityPenalty = 2;
double rankingBonus = 3;
double topologyBonus = 4;
double documentLengthPenalty = 5;
double temporalBias = 6;
double flagsPenalty = 7;
double overallPart = 8;
double tcfOverlap = 9;
double tcfJaccard = 10;
double bM25F = 11;
double bM25N = 12;
double bM25P = 13;
}
/* Defines a single subquery */

View File

@ -11,7 +11,6 @@ import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
/** Responsible for expanding a query, that is creating alternative branches of query execution
* to increase the number of results

View File

@ -14,6 +14,9 @@ import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.*;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.model.SearchParameters;
@ -160,6 +163,12 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
.setBestPositions(result.bestPositions)
.setRawItem(rawItem);
var rankingDetails = convertRankingDetails(result.rankingDetails);
if (rankingDetails != null) {
logger.info(queryMarker, "Ranking details: {}", rankingDetails);
decoratedBuilder.setRankingDetails(rankingDetails);
}
if (result.pubYear != null) {
decoratedBuilder.setPubYear(result.pubYear);
}
@ -174,6 +183,47 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
}
}
private RpcResultRankingDetails convertRankingDetails(ResultRankingDetails rankingDetails) {
if (rankingDetails == null) {
return null;
}
return RpcResultRankingDetails.newBuilder()
.setInputs(convertRankingInputs(rankingDetails.inputs()))
.setOutput(convertRankingOutput(rankingDetails.outputs()))
.build();
}
private RpcResultRankingOutputs convertRankingOutput(ResultRankingOutputs outputs) {
return RpcResultRankingOutputs.newBuilder()
.setAverageSentenceLengthPenalty(outputs.averageSentenceLengthPenalty())
.setQualityPenalty(outputs.qualityPenalty())
.setRankingBonus(outputs.rankingBonus())
.setTopologyBonus(outputs.topologyBonus())
.setDocumentLengthPenalty(outputs.documentLengthPenalty())
.setTemporalBias(outputs.temporalBias())
.setFlagsPenalty(outputs.flagsPenalty())
.setOverallPart(outputs.overallPart())
.setTcfOverlap(outputs.tcfOverlap())
.setTcfJaccard(outputs.tcfJaccard())
.setBM25F(outputs.bM25F())
.setBM25N(outputs.bM25N())
.setBM25P(outputs.bM25P())
.build();
}
private RpcResultRankingInputs convertRankingInputs(ResultRankingInputs inputs) {
return RpcResultRankingInputs.newBuilder()
.setRank(inputs.rank())
.setAsl(inputs.asl())
.setQuality(inputs.quality())
.setSize(inputs.size())
.setFlagsPenalty(inputs.flagsPenalty())
.setTopology(inputs.topology())
.setYear(inputs.year())
.build();
}
// exists for test access
@SneakyThrows
SearchResultSet justQuery(SearchSpecification specsSet) {

View File

@ -110,7 +110,8 @@ public class IndexResultValuationContext {
docMetadata,
htmlFeatures,
5000, // use a dummy value here as it's not present in the index
rankingContext);
rankingContext,
null);
if (searchResult.hasPrioTerm) {
score = 0.75 * score;

View File

@ -13,6 +13,7 @@ import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggre
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.model.SearchParameters;
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
@ -25,6 +26,7 @@ import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.*;
import java.util.function.Consumer;
@Singleton
public class IndexResultValuatorService {
@ -155,6 +157,17 @@ public class IndexResultValuatorService {
DocdbUrlDetail docData,
CompiledQueryLong wordMetas,
ResultRankingContext rankingContext) {
ResultRankingDetailsExtractor detailsExtractor = new ResultRankingDetailsExtractor();
Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
double score = resultValuator.calculateSearchResultValue(wordMetas,
result.encodedDocMetadata,
result.htmlFeatures,
docData.wordsTotal(),
rankingContext,
detailConsumer);
return new DecoratedSearchResultItem(
result,
docData.url(),
@ -167,15 +180,22 @@ public class IndexResultValuatorService {
docData.dataHash(),
docData.wordsTotal(),
bestPositions(wordMetas),
resultValuator.calculateSearchResultValue(wordMetas,
result.encodedDocMetadata,
result.htmlFeatures,
docData.wordsTotal(),
rankingContext)
score,
detailsExtractor.get()
);
}
private static class ResultRankingDetailsExtractor {
private ResultRankingDetails value = null;
public ResultRankingDetails get() {
return value;
}
public void set(ResultRankingDetails value) {
this.value = value;
}
}
private long bestPositions(CompiledQueryLong wordMetas) {
LongSet positionsSet = CompiledQueryAggregates.positionsAggregate(wordMetas, WordMetadata::decodePositions);

View File

@ -3,6 +3,9 @@ package nu.marginalia.ranking.results;
import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.model.crawl.PubDate;
import nu.marginalia.model.idx.DocumentFlags;
@ -14,6 +17,9 @@ import com.google.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.function.Consumer;
@Singleton
public class ResultValuator {
final static double scalingFactor = 500.;
@ -31,7 +37,9 @@ public class ResultValuator {
long documentMetadata,
int features,
int length,
ResultRankingContext ctx)
ResultRankingContext ctx,
@Nullable Consumer<ResultRankingDetails> detailsConsumer
)
{
if (wordMeta.isEmpty())
return Double.MAX_VALUE;
@ -84,6 +92,36 @@ public class ResultValuator {
double overallPartPositive = Math.max(0, overallPart);
double overallPartNegative = -Math.min(0, overallPart);
if (null != detailsConsumer) {
var details = new ResultRankingDetails(
new ResultRankingInputs(
rank,
asl,
quality,
size,
flagsPenalty,
topology,
year
),
new ResultRankingOutputs(
averageSentenceLengthPenalty,
qualityPenalty,
rankingBonus,
topologyBonus,
documentLengthPenalty,
temporalBias,
flagsPenalty,
overallPart,
tcfOverlap,
tcfJaccard,
bM25F,
bM25N,
bM25P)
);
detailsConsumer.accept(details);
}
// Renormalize to 0...15, where 0 is the best possible score;
// this is a historical artifact of the original ranking function
return normalize(

View File

@ -62,16 +62,17 @@ class ResultValuatorTest {
when(dict.getTermFreq("bob")).thenReturn(10);
ResultRankingContext context = new ResultRankingContext(100000,
ResultRankingParameters.sensibleDefaults(),
new BitSet(),
frequencyData,
frequencyData);
long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class));
int features = 0;
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context);
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context);
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context);
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context);
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context, null);
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null);
System.out.println(titleOnlyLowCount);
System.out.println(titleLongOnlyLowCount);

View File

@ -125,6 +125,7 @@ public class QueryBasicInterface {
.bm25FullWeight(doubleFromRequest(request, "bm25FullWeight", sensibleDefaults.bm25FullWeight))
.bm25NgramWeight(doubleFromRequest(request, "bm25NgramWeight", sensibleDefaults.bm25NgramWeight))
.bm25PrioWeight(doubleFromRequest(request, "bm25PrioWeight", sensibleDefaults.bm25PrioWeight))
.exportDebugData(true)
.build();
}

View File

@ -112,6 +112,7 @@
<p>{{description}}</p>
<div><small class="text-muted">dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}</small></div>
<div>{{rankingDetails}}</div>
</div>
{{/each}}
{{/if}}