mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(qs) Additional info in query debug UI
This commit is contained in:
parent
e79ab0c70e
commit
eb74d08f2a
@ -97,7 +97,8 @@ public class IndexProtobufCodec {
|
||||
params.getTcfJaccardWeight(),
|
||||
params.getTcfOverlapWeight(),
|
||||
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
||||
params.getTemporalBiasWeight()
|
||||
params.getTemporalBiasWeight(),
|
||||
params.getExportDebugData()
|
||||
);
|
||||
}
|
||||
|
||||
@ -124,7 +125,8 @@ public class IndexProtobufCodec {
|
||||
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
|
||||
.setTcfOverlapWeight(rankingParams.tcfOverlapWeight)
|
||||
.setTcfJaccardWeight(rankingParams.tcfJaccardWeight)
|
||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
|
||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
|
||||
.setExportDebugData(rankingParams.exportDebugData);
|
||||
|
||||
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
||||
builder.setTemporalBias(temporalBias);
|
||||
|
@ -6,6 +6,9 @@ import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
||||
@ -126,7 +129,51 @@ public class QueryProtobufCodec {
|
||||
results.getDataHash(),
|
||||
results.getWordsTotal(),
|
||||
results.getBestPositions(),
|
||||
results.getRankingScore()
|
||||
results.getRankingScore(),
|
||||
convertRankingDetails(results.getRankingDetails())
|
||||
);
|
||||
}
|
||||
|
||||
private static ResultRankingDetails convertRankingDetails(RpcResultRankingDetails rankingDetails) {
|
||||
if (rankingDetails == null)
|
||||
return null;
|
||||
var inputs = rankingDetails.getInputs();
|
||||
var outputs = rankingDetails.getOutput();
|
||||
|
||||
return new ResultRankingDetails(
|
||||
convertRankingInputs(inputs),
|
||||
convertRankingOutputs(outputs)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
private static ResultRankingOutputs convertRankingOutputs(RpcResultRankingOutputs outputs) {
|
||||
return new ResultRankingOutputs(
|
||||
outputs.getAverageSentenceLengthPenalty(),
|
||||
outputs.getQualityPenalty(),
|
||||
outputs.getRankingBonus(),
|
||||
outputs.getTopologyBonus(),
|
||||
outputs.getDocumentLengthPenalty(),
|
||||
outputs.getTemporalBias(),
|
||||
outputs.getFlagsPenalty(),
|
||||
outputs.getOverallPart(),
|
||||
outputs.getTcfOverlap(),
|
||||
outputs.getTcfJaccard(),
|
||||
outputs.getBM25F(),
|
||||
outputs.getBM25N(),
|
||||
outputs.getBM25P()
|
||||
);
|
||||
}
|
||||
|
||||
private static ResultRankingInputs convertRankingInputs(RpcResultRankingInputs inputs) {
|
||||
return new ResultRankingInputs(
|
||||
inputs.getRank(),
|
||||
inputs.getAsl(),
|
||||
inputs.getQuality(),
|
||||
inputs.getSize(),
|
||||
inputs.getFlagsPenalty(),
|
||||
inputs.getTopology(),
|
||||
inputs.getYear()
|
||||
);
|
||||
}
|
||||
|
||||
@ -209,7 +256,8 @@ public class QueryProtobufCodec {
|
||||
rpcDecoratedResultItem.getDataHash(),
|
||||
rpcDecoratedResultItem.getWordsTotal(),
|
||||
rpcDecoratedResultItem.getBestPositions(),
|
||||
rpcDecoratedResultItem.getRankingScore()
|
||||
rpcDecoratedResultItem.getRankingScore(),
|
||||
convertRankingDetails(rpcDecoratedResultItem.getRankingDetails())
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
@ -33,6 +34,9 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
|
||||
public final long bestPositions;
|
||||
public final double rankingScore;
|
||||
|
||||
@Nullable
|
||||
public ResultRankingDetails rankingDetails;
|
||||
|
||||
public long documentId() {
|
||||
return rawIndexResult.getDocumentId();
|
||||
}
|
||||
@ -67,7 +71,10 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
|
||||
long dataHash,
|
||||
int wordsTotal,
|
||||
long bestPositions,
|
||||
double rankingScore)
|
||||
double rankingScore,
|
||||
@Nullable
|
||||
ResultRankingDetails rankingDetails
|
||||
)
|
||||
{
|
||||
this.rawIndexResult = rawIndexResult;
|
||||
this.url = url;
|
||||
@ -81,6 +88,7 @@ public class DecoratedSearchResultItem implements Comparable<DecoratedSearchResu
|
||||
this.wordsTotal = wordsTotal;
|
||||
this.bestPositions = bestPositions;
|
||||
this.rankingScore = rankingScore;
|
||||
this.rankingDetails = rankingDetails;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -41,6 +41,8 @@ public class ResultRankingParameters {
|
||||
public TemporalBias temporalBias;
|
||||
public double temporalBiasWeight;
|
||||
|
||||
public boolean exportDebugData;
|
||||
|
||||
public static ResultRankingParameters sensibleDefaults() {
|
||||
return builder()
|
||||
.fullParams(new Bm25Parameters(1.2, 0.5))
|
||||
@ -58,6 +60,7 @@ public class ResultRankingParameters {
|
||||
.tcfJaccardWeight(1)
|
||||
.temporalBias(TemporalBias.NONE)
|
||||
.temporalBiasWeight(1. / (5.))
|
||||
.exportDebugData(false)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,7 @@
|
||||
package nu.marginalia.api.searchquery.model.results.debug;
|
||||
|
||||
public record ResultRankingDetails(ResultRankingInputs inputs, ResultRankingOutputs outputs)
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,3 @@
|
||||
package nu.marginalia.api.searchquery.model.results.debug;
|
||||
|
||||
public record ResultRankingInputs(int rank, int asl, int quality, int size, int flagsPenalty, int topology, int year) {}
|
@ -0,0 +1,17 @@
|
||||
package nu.marginalia.api.searchquery.model.results.debug;
|
||||
|
||||
public record ResultRankingOutputs(double averageSentenceLengthPenalty,
|
||||
double qualityPenalty,
|
||||
double rankingBonus,
|
||||
double topologyBonus,
|
||||
double documentLengthPenalty,
|
||||
double temporalBias,
|
||||
double flagsPenalty,
|
||||
double overallPart,
|
||||
double tcfOverlap,
|
||||
double tcfJaccard,
|
||||
double bM25F,
|
||||
double bM25N,
|
||||
double bM25P)
|
||||
{
|
||||
}
|
@ -92,6 +92,7 @@ message RpcDecoratedResultItem {
|
||||
int32 wordsTotal = 10;
|
||||
double rankingScore = 11; // The ranking score of this search result item, lower is better
|
||||
int64 bestPositions = 12;
|
||||
RpcResultRankingDetails rankingDetails = 13; // optional, only present if exportDebugData is true in RpcResultRankingParameters
|
||||
}
|
||||
|
||||
/** A raw index-service view of a search result */
|
||||
@ -136,6 +137,38 @@ message RpcResultRankingParameters {
|
||||
double tcfJaccardWeight = 15;
|
||||
RpcTemporalBias temporalBias = 16;
|
||||
double temporalBiasWeight = 17;
|
||||
bool exportDebugData = 18;
|
||||
}
|
||||
|
||||
message RpcResultRankingDetails {
|
||||
RpcResultRankingInputs inputs = 1;
|
||||
RpcResultRankingOutputs output = 2;
|
||||
}
|
||||
|
||||
message RpcResultRankingInputs {
|
||||
int32 rank = 1;
|
||||
int32 asl = 2;
|
||||
int32 quality = 3;
|
||||
int32 size = 4;
|
||||
int32 flagsPenalty = 5;
|
||||
int32 topology = 6;
|
||||
int32 year = 7;
|
||||
}
|
||||
|
||||
message RpcResultRankingOutputs {
|
||||
double averageSentenceLengthPenalty = 1;
|
||||
double qualityPenalty = 2;
|
||||
double rankingBonus = 3;
|
||||
double topologyBonus = 4;
|
||||
double documentLengthPenalty = 5;
|
||||
double temporalBias = 6;
|
||||
double flagsPenalty = 7;
|
||||
double overallPart = 8;
|
||||
double tcfOverlap = 9;
|
||||
double tcfJaccard = 10;
|
||||
double bM25F = 11;
|
||||
double bM25N = 12;
|
||||
double bM25P = 13;
|
||||
}
|
||||
|
||||
/* Defines a single subquery */
|
||||
|
@ -11,7 +11,6 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/** Responsible for expanding a query, that is creating alternative branches of query execution
|
||||
* to increase the number of results
|
||||
|
@ -14,6 +14,9 @@ import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
|
||||
import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.results.*;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.index.model.SearchParameters;
|
||||
@ -160,6 +163,12 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
.setBestPositions(result.bestPositions)
|
||||
.setRawItem(rawItem);
|
||||
|
||||
var rankingDetails = convertRankingDetails(result.rankingDetails);
|
||||
if (rankingDetails != null) {
|
||||
logger.info(queryMarker, "Ranking details: {}", rankingDetails);
|
||||
decoratedBuilder.setRankingDetails(rankingDetails);
|
||||
}
|
||||
|
||||
if (result.pubYear != null) {
|
||||
decoratedBuilder.setPubYear(result.pubYear);
|
||||
}
|
||||
@ -174,6 +183,47 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
}
|
||||
}
|
||||
|
||||
private RpcResultRankingDetails convertRankingDetails(ResultRankingDetails rankingDetails) {
|
||||
if (rankingDetails == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return RpcResultRankingDetails.newBuilder()
|
||||
.setInputs(convertRankingInputs(rankingDetails.inputs()))
|
||||
.setOutput(convertRankingOutput(rankingDetails.outputs()))
|
||||
.build();
|
||||
}
|
||||
|
||||
private RpcResultRankingOutputs convertRankingOutput(ResultRankingOutputs outputs) {
|
||||
return RpcResultRankingOutputs.newBuilder()
|
||||
.setAverageSentenceLengthPenalty(outputs.averageSentenceLengthPenalty())
|
||||
.setQualityPenalty(outputs.qualityPenalty())
|
||||
.setRankingBonus(outputs.rankingBonus())
|
||||
.setTopologyBonus(outputs.topologyBonus())
|
||||
.setDocumentLengthPenalty(outputs.documentLengthPenalty())
|
||||
.setTemporalBias(outputs.temporalBias())
|
||||
.setFlagsPenalty(outputs.flagsPenalty())
|
||||
.setOverallPart(outputs.overallPart())
|
||||
.setTcfOverlap(outputs.tcfOverlap())
|
||||
.setTcfJaccard(outputs.tcfJaccard())
|
||||
.setBM25F(outputs.bM25F())
|
||||
.setBM25N(outputs.bM25N())
|
||||
.setBM25P(outputs.bM25P())
|
||||
.build();
|
||||
}
|
||||
|
||||
private RpcResultRankingInputs convertRankingInputs(ResultRankingInputs inputs) {
|
||||
return RpcResultRankingInputs.newBuilder()
|
||||
.setRank(inputs.rank())
|
||||
.setAsl(inputs.asl())
|
||||
.setQuality(inputs.quality())
|
||||
.setSize(inputs.size())
|
||||
.setFlagsPenalty(inputs.flagsPenalty())
|
||||
.setTopology(inputs.topology())
|
||||
.setYear(inputs.year())
|
||||
.build();
|
||||
}
|
||||
|
||||
// exists for test access
|
||||
@SneakyThrows
|
||||
SearchResultSet justQuery(SearchSpecification specsSet) {
|
||||
|
@ -110,7 +110,8 @@ public class IndexResultValuationContext {
|
||||
docMetadata,
|
||||
htmlFeatures,
|
||||
5000, // use a dummy value here as it's not present in the index
|
||||
rankingContext);
|
||||
rankingContext,
|
||||
null);
|
||||
|
||||
if (searchResult.hasPrioTerm) {
|
||||
score = 0.75 * score;
|
||||
|
@ -13,6 +13,7 @@ import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggre
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.index.model.SearchParameters;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
@ -25,6 +26,7 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
@Singleton
|
||||
public class IndexResultValuatorService {
|
||||
@ -155,6 +157,17 @@ public class IndexResultValuatorService {
|
||||
DocdbUrlDetail docData,
|
||||
CompiledQueryLong wordMetas,
|
||||
ResultRankingContext rankingContext) {
|
||||
|
||||
ResultRankingDetailsExtractor detailsExtractor = new ResultRankingDetailsExtractor();
|
||||
Consumer<ResultRankingDetails> detailConsumer = rankingContext.params.exportDebugData ? detailsExtractor::set : null;
|
||||
|
||||
double score = resultValuator.calculateSearchResultValue(wordMetas,
|
||||
result.encodedDocMetadata,
|
||||
result.htmlFeatures,
|
||||
docData.wordsTotal(),
|
||||
rankingContext,
|
||||
detailConsumer);
|
||||
|
||||
return new DecoratedSearchResultItem(
|
||||
result,
|
||||
docData.url(),
|
||||
@ -167,15 +180,22 @@ public class IndexResultValuatorService {
|
||||
docData.dataHash(),
|
||||
docData.wordsTotal(),
|
||||
bestPositions(wordMetas),
|
||||
|
||||
resultValuator.calculateSearchResultValue(wordMetas,
|
||||
result.encodedDocMetadata,
|
||||
result.htmlFeatures,
|
||||
docData.wordsTotal(),
|
||||
rankingContext)
|
||||
score,
|
||||
detailsExtractor.get()
|
||||
);
|
||||
}
|
||||
|
||||
private static class ResultRankingDetailsExtractor {
|
||||
private ResultRankingDetails value = null;
|
||||
|
||||
public ResultRankingDetails get() {
|
||||
return value;
|
||||
}
|
||||
public void set(ResultRankingDetails value) {
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
private long bestPositions(CompiledQueryLong wordMetas) {
|
||||
LongSet positionsSet = CompiledQueryAggregates.positionsAggregate(wordMetas, WordMetadata::decodePositions);
|
||||
|
||||
|
@ -3,6 +3,9 @@ package nu.marginalia.ranking.results;
|
||||
import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
|
||||
import nu.marginalia.model.crawl.HtmlFeature;
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
import nu.marginalia.model.idx.DocumentFlags;
|
||||
@ -14,6 +17,9 @@ import com.google.inject.Singleton;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
@Singleton
|
||||
public class ResultValuator {
|
||||
final static double scalingFactor = 500.;
|
||||
@ -31,7 +37,9 @@ public class ResultValuator {
|
||||
long documentMetadata,
|
||||
int features,
|
||||
int length,
|
||||
ResultRankingContext ctx)
|
||||
ResultRankingContext ctx,
|
||||
@Nullable Consumer<ResultRankingDetails> detailsConsumer
|
||||
)
|
||||
{
|
||||
if (wordMeta.isEmpty())
|
||||
return Double.MAX_VALUE;
|
||||
@ -84,6 +92,36 @@ public class ResultValuator {
|
||||
double overallPartPositive = Math.max(0, overallPart);
|
||||
double overallPartNegative = -Math.min(0, overallPart);
|
||||
|
||||
if (null != detailsConsumer) {
|
||||
var details = new ResultRankingDetails(
|
||||
new ResultRankingInputs(
|
||||
rank,
|
||||
asl,
|
||||
quality,
|
||||
size,
|
||||
flagsPenalty,
|
||||
topology,
|
||||
year
|
||||
),
|
||||
new ResultRankingOutputs(
|
||||
averageSentenceLengthPenalty,
|
||||
qualityPenalty,
|
||||
rankingBonus,
|
||||
topologyBonus,
|
||||
documentLengthPenalty,
|
||||
temporalBias,
|
||||
flagsPenalty,
|
||||
overallPart,
|
||||
tcfOverlap,
|
||||
tcfJaccard,
|
||||
bM25F,
|
||||
bM25N,
|
||||
bM25P)
|
||||
);
|
||||
|
||||
detailsConsumer.accept(details);
|
||||
}
|
||||
|
||||
// Renormalize to 0...15, where 0 is the best possible score;
|
||||
// this is a historical artifact of the original ranking function
|
||||
return normalize(
|
||||
|
@ -62,16 +62,17 @@ class ResultValuatorTest {
|
||||
when(dict.getTermFreq("bob")).thenReturn(10);
|
||||
ResultRankingContext context = new ResultRankingContext(100000,
|
||||
ResultRankingParameters.sensibleDefaults(),
|
||||
new BitSet(),
|
||||
frequencyData,
|
||||
frequencyData);
|
||||
|
||||
long docMeta = docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class));
|
||||
int features = 0;
|
||||
|
||||
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context);
|
||||
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context);
|
||||
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context);
|
||||
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context);
|
||||
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
|
||||
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, docMeta, features, 10_000, context, null);
|
||||
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, docMeta, features, 10_000, context, null);
|
||||
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, docMeta, features, 10_000, context, null);
|
||||
|
||||
System.out.println(titleOnlyLowCount);
|
||||
System.out.println(titleLongOnlyLowCount);
|
||||
|
@ -125,6 +125,7 @@ public class QueryBasicInterface {
|
||||
.bm25FullWeight(doubleFromRequest(request, "bm25FullWeight", sensibleDefaults.bm25FullWeight))
|
||||
.bm25NgramWeight(doubleFromRequest(request, "bm25NgramWeight", sensibleDefaults.bm25NgramWeight))
|
||||
.bm25PrioWeight(doubleFromRequest(request, "bm25PrioWeight", sensibleDefaults.bm25PrioWeight))
|
||||
.exportDebugData(true)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,7 @@
|
||||
<p>{{description}}</p>
|
||||
|
||||
<div><small class="text-muted">dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}</small></div>
|
||||
<div>{{rankingDetails}}</div>
|
||||
</div>
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
|
Loading…
Reference in New Issue
Block a user