(wip) Repair qdebug utility and show new ranking details

This commit is contained in:
Viktor Lofgren 2024-08-09 12:57:25 +02:00
parent 7babdb87d5
commit 2e89b55593
18 changed files with 361 additions and 149 deletions

View File

@ -4,9 +4,6 @@ import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.query.limit.SpecificationLimitType;
@ -147,43 +144,4 @@ public class IndexProtobufCodec {
return builder.build();
}
public static RpcResultRankingDetails convertRankingDetails(ResultRankingDetails rankingDetails) {
if (rankingDetails == null) {
return null;
}
return RpcResultRankingDetails.newBuilder()
.setInputs(convertRankingInputs(rankingDetails.inputs()))
.setOutput(convertRankingOutput(rankingDetails.outputs()))
.build();
}
private static RpcResultRankingOutputs convertRankingOutput(ResultRankingOutputs outputs) {
return RpcResultRankingOutputs.newBuilder()
.setAverageSentenceLengthPenalty(outputs.averageSentenceLengthPenalty())
.setQualityPenalty(outputs.qualityPenalty())
.setRankingBonus(outputs.rankingBonus())
.setTopologyBonus(outputs.topologyBonus())
.setDocumentLengthPenalty(outputs.documentLengthPenalty())
.setTemporalBias(outputs.temporalBias())
.setFlagsPenalty(outputs.flagsPenalty())
.setOverallPart(outputs.overallPart())
.setTcfAvgDist(outputs.tcfAvgDist())
.setTcfFirstPosition(outputs.tcfFirstPosition())
.setBm25Part(outputs.bm25())
.build();
}
private static RpcResultRankingInputs convertRankingInputs(ResultRankingInputs inputs) {
return RpcResultRankingInputs.newBuilder()
.setRank(inputs.rank())
.setAsl(inputs.asl())
.setQuality(inputs.quality())
.setSize(inputs.size())
.setTopology(inputs.topology())
.setYear(inputs.year())
.addAllFlags(inputs.flags())
.build();
}
}

View File

@ -9,13 +9,17 @@ import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.debug.DebugFactor;
import nu.marginalia.api.searchquery.model.results.debug.DebugFactorGroup;
import nu.marginalia.api.searchquery.model.results.debug.DebugTermFactorGroup;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.model.EdgeUrl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class QueryProtobufCodec {
@ -138,45 +142,109 @@ public class QueryProtobufCodec {
private static ResultRankingDetails convertRankingDetails(RpcResultRankingDetails rankingDetails) {
if (rankingDetails == null)
return null;
var inputs = rankingDetails.getInputs();
var outputs = rankingDetails.getOutput();
var docData = rankingDetails.getDocumentOutputs();
var termData = rankingDetails.getTermOutputs();
return new ResultRankingDetails(
convertRankingInputs(inputs),
convertRankingOutputs(outputs)
convertDocumentOutputs(docData),
convertTermData(termData)
);
}
private static ResultRankingOutputs convertRankingOutputs(RpcResultRankingOutputs outputs) {
return new ResultRankingOutputs(
outputs.getAverageSentenceLengthPenalty(),
outputs.getQualityPenalty(),
outputs.getRankingBonus(),
outputs.getTopologyBonus(),
outputs.getDocumentLengthPenalty(),
outputs.getTemporalBias(),
outputs.getFlagsPenalty(),
outputs.getOverallPart(),
outputs.getBm25Part(),
outputs.getTcfAvgDist(),
outputs.getTcfFirstPosition()
private static List<DebugTermFactorGroup> convertTermData(RpcResultTermRankingOutputs termData) {
Map<String, Long> termIdByName = new HashMap<>();
Map<String, List<DebugFactor>> factorsByTerm = new HashMap<>();
);
for (int i = 0; i < termData.getTermCount(); i++) {
termIdByName.put(termData.getTerm(i), termData.getTermId(i));
factorsByTerm.computeIfAbsent(termData.getTerm(i), k -> new ArrayList<>())
.add(new DebugFactor(termData.getFactor(i), termData.getValue(i)));
}
Map<String, List<DebugFactorGroup>> factorGroupsByTerm = new HashMap<>();
for (var entry : factorsByTerm.entrySet()) {
String term = entry.getKey();
var factorsList = entry.getValue();
Map<String, List<DebugFactor>> factorsByGroup = new HashMap<>();
for (var factor : factorsList) {
String[] parts = factor.factor().split("\\.");
String group, name;
if (parts.length != 2) {
group = "unknown";
name = parts[0];
} else {
group = parts[0];
name = parts[1];
}
factorsByGroup.computeIfAbsent(group, k -> new ArrayList<>())
.add(new DebugFactor(name, factor.value()));
}
factorsByGroup.forEach((groupName, groupData) -> {
factorGroupsByTerm.computeIfAbsent(term, k -> new ArrayList<>())
.add(new DebugFactorGroup(groupName, groupData));
});
}
List<DebugTermFactorGroup> groups = new ArrayList<>();
for (var entry : factorGroupsByTerm.entrySet()) {
groups.add(new DebugTermFactorGroup(entry.getKey(), termIdByName.get(entry.getKey()), entry.getValue()));
}
return groups;
}
private static ResultRankingInputs convertRankingInputs(RpcResultRankingInputs inputs) {
return new ResultRankingInputs(
inputs.getRank(),
inputs.getAsl(),
inputs.getQuality(),
inputs.getSize(),
inputs.getTopology(),
inputs.getYear(),
inputs.getFlagsList()
);
private static List<DebugFactorGroup> convertDocumentOutputs(RpcResultDocumentRankingOutputs docData) {
List<DebugFactor> unclusteredFactors = new ArrayList<>();
for (int i = 0; i < docData.getFactorCount(); i++) {
String factor = docData.getFactor(i);
String value = docData.getValue(i);
unclusteredFactors.add(new DebugFactor(factor, value));
}
Map<String, List<DebugFactor>> factorsByGroup = new HashMap<>();
for (var factor : unclusteredFactors) {
String factorName = factor.factor();
String value = factor.value();
String[] parts = factorName.split("\\.");
String group, name;
if (parts.length != 2) {
group = "unknown";
name = factorName;
}
else {
group = parts[0];
name = parts[1];
}
factorsByGroup.computeIfAbsent(group, k -> new ArrayList<>())
.add(new DebugFactor(name, value));
}
List<DebugFactorGroup> groups = new ArrayList<>();
for (var entry : factorsByGroup.entrySet()) {
groups.add(new DebugFactorGroup(entry.getKey(), entry.getValue()));
}
return groups;
}
private static SearchResultItem convertRawResult(RpcRawResultItem rawItem) {
var keywordScores = new ArrayList<SearchResultKeywordScore>(rawItem.getKeywordScoresCount());
@ -189,6 +257,7 @@ public class QueryProtobufCodec {
rawItem.getHtmlFeatures(),
keywordScores,
rawItem.getHasPriorityTerms(),
null, // Not set
Double.NaN // Not set
);
}

View File

@ -2,6 +2,7 @@ package nu.marginalia.api.searchquery.model.results;
import lombok.AllArgsConstructor;
import lombok.Getter;
import nu.marginalia.api.searchquery.model.results.debug.DebugRankingFactors;
import nu.marginalia.model.id.UrlIdCodec;
import org.jetbrains.annotations.NotNull;
@ -27,6 +28,8 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
public boolean hasPrioTerm;
public DebugRankingFactors debugRankingFactors;
public SearchResultItem(long combinedId,
long encodedDocMetadata,
int htmlFeatures) {

View File

@ -0,0 +1,4 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record DebugFactor(String factor, String value) {
}

View File

@ -0,0 +1,5 @@
package nu.marginalia.api.searchquery.model.results.debug;
import java.util.List;
public record DebugFactorGroup(String name, List<DebugFactor> factors) {}

View File

@ -0,0 +1,38 @@
package nu.marginalia.api.searchquery.model.results.debug;
import it.unimi.dsi.fastutil.ints.IntIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.StringJoiner;
public class DebugRankingFactors {
private final List<DebugFactor> documentFactors = new ArrayList<>();
private final List<DebugTermFactor> termFactors = new ArrayList<>();
public DebugRankingFactors() {}
public void addDocumentFactor(String factor, String value) {
documentFactors.add(new DebugFactor(factor, value));
}
public void addTermFactor(long termId, String factor, String value) {
termFactors.add(new DebugTermFactor(termId, null, factor, value));
}
public void addTermFactor(long termId, String factor, IntIterator sequenceIter) {
if (!sequenceIter.hasNext()) return;
StringJoiner joiner = new StringJoiner(",");
while (sequenceIter.hasNext()) {
joiner.add(String.valueOf(sequenceIter.nextInt()));
}
termFactors.add(new DebugTermFactor(termId, null, factor, joiner.toString()));
}
public List<DebugFactor> getDocumentFactors() {
return documentFactors;
}
public List<DebugTermFactor> getTermFactors() {
return termFactors;
}
}

View File

@ -0,0 +1,4 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record DebugTermFactor(long termId, String term, String factor, String value) {
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.api.searchquery.model.results.debug;
import java.util.List;
public record DebugTermFactorGroup(String term, long termId, List<DebugFactorGroup> factorList) {
}

View File

@ -1,6 +1,9 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record ResultRankingDetails(ResultRankingInputs inputs, ResultRankingOutputs outputs)
import java.util.List;
public record ResultRankingDetails(List<DebugFactorGroup> docFactorGroups,
List<DebugTermFactorGroup> termFactorGroups)
{
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.api.searchquery.model.results.debug;
import java.util.List;
public record ResultRankingInputs(int rank, int asl, int quality, int size, int topology, int year, List<String> flags) {}

View File

@ -1,16 +0,0 @@
package nu.marginalia.api.searchquery.model.results.debug;
public record ResultRankingOutputs(double averageSentenceLengthPenalty,
double qualityPenalty,
double rankingBonus,
double topologyBonus,
double documentLengthPenalty,
double temporalBias,
double flagsPenalty,
double overallPart,
double bm25,
double tcfAvgDist,
double tcfFirstPosition)
{
}

View File

@ -143,8 +143,8 @@ message RpcResultRankingParameters {
}
message RpcResultRankingDetails {
RpcResultRankingInputs inputs = 1;
RpcResultRankingOutputs output = 2;
RpcResultDocumentRankingOutputs documentOutputs = 1;
RpcResultTermRankingOutputs termOutputs = 2;
}
message RpcResultRankingInputs {
@ -158,19 +158,16 @@ message RpcResultRankingInputs {
}
/** Summary of the output of the ranking function */
message RpcResultRankingOutputs {
double averageSentenceLengthPenalty = 1;
double qualityPenalty = 2;
double rankingBonus = 3;
double topologyBonus = 4;
double documentLengthPenalty = 5;
double temporalBias = 6;
double flagsPenalty = 7;
double overallPart = 8;
double bm25Part = 9;
// 10-14 unused
double tcfAvgDist = 15;
double tcfFirstPosition = 16;
message RpcResultDocumentRankingOutputs {
repeated string factor = 1;
repeated string value = 2;
}
message RpcResultTermRankingOutputs {
repeated int64 termId = 1;
repeated string term = 2;
repeated string factor = 3;
repeated string value = 4;
}
/* Defines a single subquery */

View File

@ -118,7 +118,13 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
.labels(nodeName, "GRPC")
.time(() -> {
// Perform the search
return executeSearch(params);
try {
return executeSearch(params);
}
catch (Exception ex) {
logger.error("Error in handling request", ex);
return List.of();
}
});
// Prometheus bookkeeping
@ -286,7 +292,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
awaitCompletion();
// Return the best results
return resultValuator.selectBestResults(parameters, resultHeap);
return resultValuator.selectBestResults(parameters, resultRankingContext, resultHeap);
}
/** Wait for all tasks to complete */
@ -399,6 +405,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
}
}
}
private boolean execute() throws InterruptedException {
long start = System.currentTimeMillis();
@ -417,7 +424,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
stallTime.addAndGet(System.currentTimeMillis() - start);
resultHeap.addAll(
resultValuator.rankResults(parameters, rankingContext, resultIds)
resultValuator.rankResults(parameters, false, rankingContext, resultIds)
);
}

View File

@ -6,13 +6,13 @@ import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import gnu.trove.map.hash.TObjectLongHashMap;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
import nu.marginalia.api.searchquery.RpcRawResultItem;
import nu.marginalia.api.searchquery.RpcResultKeywordScore;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.debug.DebugRankingFactors;
import nu.marginalia.index.index.CombinedIndexReader;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.model.SearchParameters;
@ -48,6 +48,7 @@ public class IndexResultRankingService {
}
public List<SearchResultItem> rankResults(SearchParameters params,
boolean exportDebugData,
ResultRankingContext rankingContext,
CombinedDocIdList resultIds)
{
@ -99,10 +100,19 @@ public class IndexResultRankingService {
continue;
}
// Calculate the preliminary score
var score = resultRanker.calculateScore(arena, resultIds.at(i), searchTerms, flags, positions);
if (score != null) {
results.add(score);
if (!exportDebugData) {
var score = resultRanker.calculateScore(arena, null, resultIds.at(i), searchTerms, flags, positions);
if (score != null) {
results.add(score);
}
}
else {
var rankingFactors = new DebugRankingFactors();
var score = resultRanker.calculateScore(arena, rankingFactors, resultIds.at(i), searchTerms, flags, positions);
if (score != null) {
score.debugRankingFactors = rankingFactors;
results.add(score);
}
}
}
@ -112,6 +122,7 @@ public class IndexResultRankingService {
public List<RpcDecoratedResultItem> selectBestResults(SearchParameters params,
ResultRankingContext resultRankingContext,
Collection<SearchResultItem> results) throws SQLException {
var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain);
@ -136,6 +147,25 @@ public class IndexResultRankingService {
}
}
// If we're exporting debug data from the ranking, we need to re-run the ranking calculation
// for the selected results, as this would be comically expensive to do for all the results we
// discard along the way
if (params.rankingParams.exportDebugData) {
var combinedIdsList = new LongArrayList(resultsList.size());
for (var item : resultsList) {
combinedIdsList.add(item.combinedId);
}
resultsList.clear();
resultsList.addAll(this.rankResults(
params,
true,
resultRankingContext,
new CombinedDocIdList(combinedIdsList))
);
}
// Fetch the document details for the selected results in one go, from the local document database
// for this index partition
Map<Long, DocdbUrlDetail> detailsById = new HashMap<>(idsList.size());
@ -189,11 +219,45 @@ public class IndexResultRankingService {
decoratedBuilder.setPubYear(docData.pubYear());
}
/* FIXME
var rankingDetails = IndexProtobufCodec.convertRankingDetails(result.rankingDetails);
if (rankingDetails != null) {
decoratedBuilder.setRankingDetails(rankingDetails);
}*/
if (result.debugRankingFactors != null) {
var debugFactors = result.debugRankingFactors;
var detailsBuilder = RpcResultRankingDetails.newBuilder();
var documentOutputs = RpcResultDocumentRankingOutputs.newBuilder();
for (var factor : debugFactors.getDocumentFactors()) {
documentOutputs.addFactor(factor.factor());
documentOutputs.addValue(factor.value());
}
detailsBuilder.setDocumentOutputs(documentOutputs);
var termOutputs = RpcResultTermRankingOutputs.newBuilder();
CqDataLong termIds = params.compiledQueryIds.data;;
for (var entry : debugFactors.getTermFactors()) {
String term = "[ERROR IN LOOKUP]";
// CURSED: This is a linear search, but the number of terms is small, and it's in a debug path
for (int i = 0; i < termIds.size(); i++) {
if (termIds.get(i) == entry.termId()) {
term = params.compiledQuery.at(i);
break;
}
}
termOutputs
.addTermId(entry.termId())
.addTerm(term)
.addFactor(entry.factor())
.addValue(entry.value());
}
detailsBuilder.setTermOutputs(termOutputs);
decoratedBuilder.setRankingDetails(detailsBuilder);
}
resultItems.add(decoratedBuilder.build());
}
return resultItems;

View File

@ -7,6 +7,7 @@ import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.debug.DebugRankingFactors;
import nu.marginalia.index.forward.spans.DocumentSpans;
import nu.marginalia.index.index.CombinedIndexReader;
import nu.marginalia.index.index.StatefulIndex;
@ -57,6 +58,7 @@ public class IndexResultScoreCalculator {
@Nullable
public SearchResultItem calculateScore(Arena arena,
@Nullable DebugRankingFactors rankingFactors,
long combinedId,
QuerySearchTerms searchTerms,
long[] wordFlags,
@ -88,6 +90,8 @@ public class IndexResultScoreCalculator {
DocumentSpans spans = index.getDocumentSpans(arena, docId);
double score = calculateSearchResultValue(
rankingFactors,
searchTerms,
wordFlagsQuery,
positionsQuery,
docMetadata,
@ -157,7 +161,9 @@ public class IndexResultScoreCalculator {
return true;
}
public double calculateSearchResultValue(CompiledQueryLong wordFlagsQuery,
public double calculateSearchResultValue(DebugRankingFactors rankingFactors,
QuerySearchTerms searchTerms,
CompiledQueryLong wordFlagsQuery,
CompiledQuery<CodedSequence> positionsQuery,
long documentMetadata,
int features,
@ -344,12 +350,82 @@ public class IndexResultScoreCalculator {
+ verbatimMatchScore
+ keywordMinDistFac;
double tcfAvgDist = rankingParams.tcfAvgDist * (1.0 / calculateAvgMinDistance(positionsQuery, ctx));
double tcfFirstPosition = rankingParams.tcfFirstPosition * (1.0 / Math.sqrt(firstPosition));
double bM25 = rankingParams.bm25Weight * wordFlagsQuery.root.visit(new Bm25GraphVisitor(rankingParams.bm25Params, weightedCounts, length, ctx));
double bFlags = rankingParams.bm25Weight * wordFlagsQuery.root.visit(new TermFlagsGraphVisitor(rankingParams.bm25Params, wordFlagsQuery.data, weightedCounts, ctx));
if (rankingFactors != null) {
rankingFactors.addDocumentFactor("overall.averageSentenceLengthPenalty", Double.toString(averageSentenceLengthPenalty));
rankingFactors.addDocumentFactor("overall.documentLengthPenalty", Double.toString(documentLengthPenalty));
rankingFactors.addDocumentFactor("overall.qualityPenalty", Double.toString(qualityPenalty));
rankingFactors.addDocumentFactor("overall.rankingBonus", Double.toString(rankingBonus));
rankingFactors.addDocumentFactor("overall.topologyBonus", Double.toString(topologyBonus));
rankingFactors.addDocumentFactor("overall.temporalBias", Double.toString(temporalBias));
rankingFactors.addDocumentFactor("overall.flagsPenalty", Double.toString(flagsPenalty));
rankingFactors.addDocumentFactor("overall.verbatimMatchScore", Double.toString(verbatimMatchScore));
rankingFactors.addDocumentFactor("overall.keywordMinDistFac", Double.toString(keywordMinDistFac));
rankingFactors.addDocumentFactor("tcf.avgDist", Double.toString(tcfAvgDist));
rankingFactors.addDocumentFactor("tcf.firstPosition", Double.toString(tcfFirstPosition));
rankingFactors.addDocumentFactor("bm25.main", Double.toString(bM25));
rankingFactors.addDocumentFactor("bm25.flags", Double.toString(bFlags));
for (int i = 0; i < searchTerms.termIdsAll.size(); i++) {
long termId = searchTerms.termIdsAll.at(i);
rankingFactors.addTermFactor(termId, "factor.weightedCount", Double.toString(weightedCounts[i]));
byte flags = (byte) wordFlagsQuery.at(i);
for (var flag : WordFlags.values()) {
if (flag.isPresent(flags)) {
rankingFactors.addTermFactor(termId, "flags." + flag.name(), "true");
}
}
if (verbatimMatchInAnchor) {
rankingFactors.addTermFactor(termId, "verbatim.anchor", "true");
}
if (verbatimMatchInBody) {
rankingFactors.addTermFactor(termId, "verbatim.body", "true");
}
if (verbatimMatchInCode) {
rankingFactors.addTermFactor(termId, "verbatim.code", "true");
}
if (verbatimMatchInExtLink) {
rankingFactors.addTermFactor(termId, "verbatim.extLink", "true");
}
if (verbatimMatchInHeading) {
rankingFactors.addTermFactor(termId, "verbatim.heading", "true");
}
if (verbatimMatchInNav) {
rankingFactors.addTermFactor(termId, "verbatim.nav", "true");
}
if (verbatimMatchInTitle) {
rankingFactors.addTermFactor(termId, "verbatim.title", "true");
}
rankingFactors.addTermFactor(termId, "unordered.title", Integer.toString(unorderedMatchInTitleCount));
rankingFactors.addTermFactor(termId, "unordered.heading", Integer.toString(unorderedMatchInHeadingCount));
if (positions[i] != null) {
rankingFactors.addTermFactor(termId, "positions.all", positions[i].iterator());
rankingFactors.addTermFactor(termId, "positions.title", SequenceOperations.findIntersections(spans.title.iterator(), positions[i].iterator()).iterator());
rankingFactors.addTermFactor(termId, "positions.heading", SequenceOperations.findIntersections(spans.heading.iterator(), positions[i].iterator()).iterator());
rankingFactors.addTermFactor(termId, "positions.anchor", SequenceOperations.findIntersections(spans.anchor.iterator(), positions[i].iterator()).iterator());
rankingFactors.addTermFactor(termId, "positions.code", SequenceOperations.findIntersections(spans.code.iterator(), positions[i].iterator()).iterator());
rankingFactors.addTermFactor(termId, "positions.nav", SequenceOperations.findIntersections(spans.nav.iterator(), positions[i].iterator()).iterator());
rankingFactors.addTermFactor(termId, "positions.externalLinkText", SequenceOperations.findIntersections(spans.externalLinkText.iterator(), positions[i].iterator()).iterator());
}
}
}
// Renormalize to 0...15, where 0 is the best possible score;
// this is a historical artifact of the original ranking function
double ret = normalize(

View File

@ -25,7 +25,7 @@ class IndexResultDomainDeduplicatorTest {
}
SearchResultItem forId(int domain, int ordinal) {
return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal), 0, 0, List.of(),false, Double.NaN);
return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal), 0, 0, List.of(),false, null, Double.NaN);
}
}

View File

@ -3,12 +3,12 @@ package nu.marginalia.query;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.inject.Inject;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
@ -82,7 +82,7 @@ public class QueryBasicInterface {
domainCount, count, 250, 8192
), set);
var rankingParams = rankingParamsFromRequest(request);
var rankingParams = debugRankingParamsFromRequest(request);
var detailedDirectResult = queryGRPCService.executeDirect(
queryString, queryParams, rankingParams
@ -98,7 +98,7 @@ public class QueryBasicInterface {
);
}
private ResultRankingParameters rankingParamsFromRequest(Request request) {
private ResultRankingParameters debugRankingParamsFromRequest(Request request) {
var sensibleDefaults = ResultRankingParameters.sensibleDefaults();
return ResultRankingParameters.builder()

View File

@ -102,27 +102,26 @@
<p>{{description}}</p>
<div><small class="text-muted">dataHash: {{dataHash}} wordsTotal: {{wordsTotal}} bestPositions: {{bestPositions}} rankingScore: {{rankingScore}} urlQuality: {{urlQuality}}</small></div>
{{#with rankingDetails.inputs}}
<div><small class="text-muted">Rank: {{rank}}</small></div>
<div><small class="text-muted">ASL: {{asl}}</small></div>
<div><small class="text-muted">Quality: {{quality}}</small></div>
<div><small class="text-muted">Size: {{size}}</small></div>
<div><small class="text-muted">Topology: {{topology}}</small></div>
<div><small class="text-muted">Year: {{year}}</small></div>
<div><small class="text-muted">Flags: {{#each flags}} {{.}} {{/each}}</small></div>
{{#with rankingDetails.docFactorGroups}}
{{#each .}}
<div><small>{{name}}</small></div>
{{#each factors}}
<div><small class="text-muted">{{factor}}: {{value}}</small></div>
{{/each}}
{{/each}}
{{/with}}
{{#with rankingDetails.outputs}}
<div><small class="text-muted">Average Sentence Length Penalty: {{averageSentenceLengthPenalty}}</small></div>
<div><small class="text-muted">Quality Penalty: {{qualityPenalty}}</small></div>
<div><small class="text-muted">Ranking Bonus: {{rankingBonus}}</small></div>
<div><small class="text-muted">Topology Bonus: {{topologyBonus}}</small></div>
<div><small class="text-muted">Document Length Penalty: {{documentLengthPenalty}}</small></div>
<div><small class="text-muted">Temporal Bias: {{temporalBias}}</small></div>
<div><small class="text-muted">Flags Penalty: {{flagsPenalty}}</small></div>
<div><small class="text-muted">Overall Part: {{overallPart}}</small></div>
<div><small class="text-muted">TCF Avg Distance: {{tcfAvgDist}}</small></div>
<div><small class="text-muted">TCF First Position: {{tcfFirstPosition}}</small></div>
<div><small class="text-muted">BM25: {{bM25}}</small></div>
{{#with rankingDetails.termFactorGroups}}
{{#each .}}
<div>{{termId}}:{{term}}</div>
{{#each factorList}}
<div>{{name}}</div>
{{#each factors}}
<div><small class="text-muted">{{factor}}: {{value}}</small></div>
{{/each}}
{{/each}}
{{/each}}
{{/with}}
</div>