(index) Retire count operation, clean up index code.

This commit is contained in:
Viktor Lofgren 2024-02-25 12:46:30 +01:00
parent 823ca73a3f
commit 427f3e922f
19 changed files with 18 additions and 77 deletions

View File

@ -34,7 +34,6 @@ public class QueryProtobufCodec {
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year)); builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size)); builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank)); builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
@ -63,7 +62,6 @@ public class QueryProtobufCodec {
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year)); builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size)); builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank)); builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
@ -92,7 +90,6 @@ public class QueryProtobufCodec {
IndexProtobufCodec.convertSpecLimit(request.getYear()), IndexProtobufCodec.convertSpecLimit(request.getYear()),
IndexProtobufCodec.convertSpecLimit(request.getSize()), IndexProtobufCodec.convertSpecLimit(request.getSize()),
IndexProtobufCodec.convertSpecLimit(request.getRank()), IndexProtobufCodec.convertSpecLimit(request.getRank()),
IndexProtobufCodec.convertSpecLimit(request.getDomainCount()),
request.getDomainIdsList(), request.getDomainIdsList(),
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()), IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
request.getSearchSetIdentifier(), request.getSearchSetIdentifier(),
@ -174,7 +171,6 @@ public class QueryProtobufCodec {
IndexProtobufCodec.convertSpecLimit(specs.getYear()), IndexProtobufCodec.convertSpecLimit(specs.getYear()),
IndexProtobufCodec.convertSpecLimit(specs.getSize()), IndexProtobufCodec.convertSpecLimit(specs.getSize()),
IndexProtobufCodec.convertSpecLimit(specs.getRank()), IndexProtobufCodec.convertSpecLimit(specs.getRank()),
IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()),
IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()), IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()),
QueryStrategy.valueOf(specs.getQueryStrategy()), QueryStrategy.valueOf(specs.getQueryStrategy()),
IndexProtobufCodec.convertRankingParameterss(specs.getParameters()) IndexProtobufCodec.convertRankingParameterss(specs.getParameters())

View File

@ -19,7 +19,6 @@ public record QueryParams(
SpecificationLimit year, SpecificationLimit year,
SpecificationLimit size, SpecificationLimit size,
SpecificationLimit rank, SpecificationLimit rank,
SpecificationLimit domainCount,
List<Integer> domainIds, List<Integer> domainIds,
QueryLimits limits, QueryLimits limits,
String identifier, String identifier,
@ -37,7 +36,6 @@ public record QueryParams(
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(), List.of(),
limits, limits,
identifier, identifier,

View File

@ -24,8 +24,6 @@ public class SearchSpecification {
public final SpecificationLimit size; public final SpecificationLimit size;
public final SpecificationLimit rank; public final SpecificationLimit rank;
public final SpecificationLimit domainCount;
public final QueryLimits queryLimits; public final QueryLimits queryLimits;
public final QueryStrategy queryStrategy; public final QueryStrategy queryStrategy;

View File

@ -25,7 +25,6 @@ message RpcQsQuery {
RpcSpecLimit year = 8; RpcSpecLimit year = 8;
RpcSpecLimit size = 9; RpcSpecLimit size = 9;
RpcSpecLimit rank = 10; RpcSpecLimit rank = 10;
RpcSpecLimit domainCount = 11;
repeated int32 domainIds = 12; repeated int32 domainIds = 12;
RpcQueryLimits queryLimits = 13; RpcQueryLimits queryLimits = 13;
string searchSetIdentifier = 14; string searchSetIdentifier = 14;
@ -61,7 +60,6 @@ message RpcIndexQuery {
RpcSpecLimit year = 6; RpcSpecLimit year = 6;
RpcSpecLimit size = 7; RpcSpecLimit size = 7;
RpcSpecLimit rank = 8; RpcSpecLimit rank = 8;
RpcSpecLimit domainCount = 9;
RpcQueryLimits queryLimits = 10; RpcQueryLimits queryLimits = 10;
string queryStrategy = 11; // Named query configuration string queryStrategy = 11; // Named query configuration
RpcResultRankingParameters parameters = 12; RpcResultRankingParameters parameters = 12;

View File

@ -82,8 +82,6 @@ public class QueryParser {
entity.replace(new Token(TokenType.SIZE_TERM, t.str.substring(4), t.displayStr)); entity.replace(new Token(TokenType.SIZE_TERM, t.str.substring(4), t.displayStr));
} else if (t.str.startsWith("rank") && t.str.matches("rank[=><]\\d+")) { } else if (t.str.startsWith("rank") && t.str.matches("rank[=><]\\d+")) {
entity.replace(new Token(TokenType.RANK_TERM, t.str.substring(4), t.displayStr)); entity.replace(new Token(TokenType.RANK_TERM, t.str.substring(4), t.displayStr));
} else if (t.str.startsWith("count") && t.str.matches("count[=><]\\d+")) {
entity.replace(new Token(TokenType.DOMAIN_COUNT_TERM, t.str.substring(5), t.displayStr));
} else if (t.str.startsWith("qs=")) { } else if (t.str.startsWith("qs=")) {
entity.replace(new Token(TokenType.QS_TERM, t.str.substring(3), t.displayStr)); entity.replace(new Token(TokenType.QS_TERM, t.str.substring(3), t.displayStr));
} else if (t.str.contains(":")) { } else if (t.str.contains(":")) {

View File

@ -40,7 +40,6 @@ public class Token {
case YEAR_TERM: visitor.onYearTerm(this); break; case YEAR_TERM: visitor.onYearTerm(this); break;
case RANK_TERM: visitor.onRankTerm(this); break; case RANK_TERM: visitor.onRankTerm(this); break;
case DOMAIN_COUNT_TERM: visitor.onDomainCountTerm(this); break;
case SIZE_TERM: visitor.onSizeTerm(this); break; case SIZE_TERM: visitor.onSizeTerm(this); break;
case QS_TERM: visitor.onQsTerm(this); break; case QS_TERM: visitor.onQsTerm(this); break;

View File

@ -16,7 +16,6 @@ public enum TokenType implements Predicate<Token> {
YEAR_TERM, YEAR_TERM,
SIZE_TERM, SIZE_TERM,
RANK_TERM, RANK_TERM,
DOMAIN_COUNT_TERM,
NEAR_TERM, NEAR_TERM,
QS_TERM, QS_TERM,

View File

@ -9,7 +9,6 @@ public interface TokenVisitor {
void onYearTerm(Token token); void onYearTerm(Token token);
void onSizeTerm(Token token); void onSizeTerm(Token token);
void onRankTerm(Token token); void onRankTerm(Token token);
void onDomainCountTerm(Token token);
void onQualityTerm(Token token); void onQualityTerm(Token token);
void onQsTerm(Token token); void onQsTerm(Token token);
} }

View File

@ -127,7 +127,6 @@ public class QueryFactory {
.subqueries(subqueries) .subqueries(subqueries)
.humanQuery(query) .humanQuery(query)
.quality(qualityLimits.qualityLimit) .quality(qualityLimits.qualityLimit)
.domainCount(qualityLimits.domainCount)
.year(qualityLimits.year) .year(qualityLimits.year)
.size(qualityLimits.size) .size(qualityLimits.size)
.rank(qualityLimits.rank) .rank(qualityLimits.rank)

View File

@ -11,7 +11,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
public SpecificationLimit year; public SpecificationLimit year;
public SpecificationLimit size; public SpecificationLimit size;
public SpecificationLimit rank; public SpecificationLimit rank;
public SpecificationLimit domainCount;
public QueryStrategy queryStrategy = QueryStrategy.AUTO; public QueryStrategy queryStrategy = QueryStrategy.AUTO;
@ -20,7 +19,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
year = params.year(); year = params.year();
size = params.size(); size = params.size();
rank = params.rank(); rank = params.rank();
domainCount = params.domainCount();
} }
private SpecificationLimit parseSpecificationLimit(String str) { private SpecificationLimit parseSpecificationLimit(String str) {
@ -67,11 +65,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
rank = parseSpecificationLimit(token.str); rank = parseSpecificationLimit(token.str);
} }
@Override
public void onDomainCountTerm(Token token) {
domainCount = parseSpecificationLimit(token.str);
}
@Override @Override
public void onQualityTerm(Token token) { public void onQualityTerm(Token token) {
qualityLimit = parseSpecificationLimit(token.str); qualityLimit = parseSpecificationLimit(token.str);

View File

@ -103,8 +103,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
@Override @Override
public void onRankTerm(Token token) {} public void onRankTerm(Token token) {}
@Override @Override
public void onDomainCountTerm(Token token) {}
@Override
public void onQualityTerm(Token token) {} public void onQualityTerm(Token token) {}
@Override @Override
public void onQsTerm(Token token) {} public void onQsTerm(Token token) {}

View File

@ -48,7 +48,6 @@ public class QueryFactoryTest {
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
null, null,
new QueryLimits(100, 100, 100, 100), new QueryLimits(100, 100, 100, 100),
"NONE", "NONE",

View File

@ -79,7 +79,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
private final IndexQueryService indexQueryService; private final IndexQueryService indexQueryService;
private final IndexResultValuatorService resultValuator; private final IndexResultValuatorService resultValuator;
private final int nodeId;
private final String nodeName; private final String nodeName;
private final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 8); private final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 8);
@ -91,7 +90,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
IndexQueryService indexQueryService, IndexQueryService indexQueryService,
IndexResultValuatorService resultValuator) IndexResultValuatorService resultValuator)
{ {
this.nodeId = serviceConfiguration.node(); var nodeId = serviceConfiguration.node();
this.nodeName = Integer.toString(nodeId); this.nodeName = Integer.toString(nodeId);
this.index = index; this.index = index;
this.searchSetsService = searchSetsService; this.searchSetsService = searchSetsService;
@ -107,6 +106,8 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
try { try {
var params = new SearchParameters(request, getSearchSet(request)); var params = new SearchParameters(request, getSearchSet(request));
long endTime = System.currentTimeMillis() + request.getQueryLimits().getTimeoutMs();
SearchResultSet results = wmsa_query_time SearchResultSet results = wmsa_query_time
.labels(nodeName, "GRPC") .labels(nodeName, "GRPC")
.time(() -> { .time(() -> {
@ -119,7 +120,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
.labels(nodeName, "GRPC") .labels(nodeName, "GRPC")
.set(params.getDataCost()); .set(params.getDataCost());
if (!params.hasTimeLeft()) { if (System.currentTimeMillis() >= endTime) {
wmsa_query_timeouts wmsa_query_timeouts
.labels(nodeName, "GRPC") .labels(nodeName, "GRPC")
.inc(); .inc();

View File

@ -10,8 +10,6 @@ import nu.marginalia.index.query.limit.SpecificationLimit;
* @param year The year limit. * @param year The year limit.
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria. * @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
* @param domainCount The domain count limit. Filters out results from domains that do not contain enough
* documents that match the query.
* @param searchSet The search set. Limits the search to a set of domains. * @param searchSet The search set. Limits the search to a set of domains.
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
* the keywords to appear in the title, or in the domain. * the keywords to appear in the title, or in the domain.
@ -20,7 +18,6 @@ public record QueryParams(SpecificationLimit qualityLimit,
SpecificationLimit year, SpecificationLimit year,
SpecificationLimit size, SpecificationLimit size,
SpecificationLimit rank, SpecificationLimit rank,
SpecificationLimit domainCount,
SearchSet searchSet, SearchSet searchSet,
QueryStrategy queryStrategy QueryStrategy queryStrategy
) )

View File

@ -52,7 +52,6 @@ public class SearchParameters {
specsSet.year, specsSet.year,
specsSet.size, specsSet.size,
specsSet.rank, specsSet.rank,
specsSet.domainCount,
searchSet, searchSet,
specsSet.queryStrategy); specsSet.queryStrategy);
@ -80,7 +79,6 @@ public class SearchParameters {
convertSpecLimit(request.getYear()), convertSpecLimit(request.getYear()),
convertSpecLimit(request.getSize()), convertSpecLimit(request.getSize()),
convertSpecLimit(request.getRank()), convertSpecLimit(request.getRank()),
convertSpecLimit(request.getDomainCount()),
searchSet, searchSet,
QueryStrategy.valueOf(request.getQueryStrategy())); QueryStrategy.valueOf(request.getQueryStrategy()));

View File

@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.*; import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@Singleton @Singleton
@ -77,33 +78,15 @@ public class IndexResultValuatorService {
for (var item : results) { for (var item : results) {
if (domainCountFilter.test(item)) { if (domainCountFilter.test(item)) {
// It's important that this filter runs across all results, not just the top N
if (resultsList.size() < params.limitTotal) {
resultsList.add(item); resultsList.add(item);
} }
} }
if (!params.queryParams.domainCount().isNone()) {
// Remove items that don't meet the domain count requirement
// This isn't perfect because the domain count is calculated
// after the results are sorted
resultsList.removeIf(item -> !params.queryParams.domainCount().test(domainCountFilter.getCount(item)));
} }
if (resultsList.size() > params.limitTotal) { for (var item : resultsList) {
// This can't be made a stream limit() operation because we need domainCountFilter item.resultsFromDomain = domainCountFilter.getCount(item);
// to run over the entire list to provide accurate statistics
resultsList.subList(params.limitTotal, resultsList.size()).clear();
}
// populate results with the total number of results encountered from
// the same domain so this information can be presented to the user
for (var result : resultsList) {
result.resultsFromDomain = domainCountFilter.getCount(result);
}
LongArrayList idsList = new LongArrayList(resultsList.size());
for (var result : resultsList) {
idsList.add(result.getCombinedId());
} }
return decorateAndRerank(resultsList, rankingContext); return decorateAndRerank(resultsList, rankingContext);
@ -125,23 +108,19 @@ public class IndexResultValuatorService {
for (var item : documentDbReader.getUrlDetails(idsList)) for (var item : documentDbReader.getUrlDetails(idsList))
urlDetailsById.put(item.urlId(), item); urlDetailsById.put(item.urlId(), item);
List<DecoratedSearchResultItem> decoratedItems = new ArrayList<>(); List<DecoratedSearchResultItem> resultItems = new ArrayList<>(rawResults.size());
for (var result : rawResults) { for (var result : rawResults) {
var docData = urlDetailsById.get(result.getDocumentId()); var id = result.getDocumentId();
var docData = urlDetailsById.get(id);
if (null == docData) { if (docData == null) {
logger.warn("No data for document id {}", result.getDocumentId()); logger.warn("No document data for id {}", id);
continue; continue;
} }
decoratedItems.add(createCombinedItem(result, docData, rankingContext)); resultItems.add(createCombinedItem(result, docData, rankingContext));
} }
return resultItems;
if (decoratedItems.size() != rawResults.size())
logger.warn("Result list shrunk during decoration?");
decoratedItems.sort(Comparator.naturalOrder());
return decoratedItems;
} }
private DecoratedSearchResultItem createCombinedItem(SearchResultItem result, private DecoratedSearchResultItem createCombinedItem(SearchResultItem result,

View File

@ -120,7 +120,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
.quality(SpecificationLimit.none()) .quality(SpecificationLimit.none())
.size(SpecificationLimit.none()) .size(SpecificationLimit.none())
.rank(SpecificationLimit.none()) .rank(SpecificationLimit.none())
.domainCount(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults()) .rankingParams(ResultRankingParameters.sensibleDefaults())
.domains(new ArrayList<>()) .domains(new ArrayList<>())
.searchSetIdentifier("NONE") .searchSetIdentifier("NONE")
@ -164,7 +163,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
.quality(SpecificationLimit.none()) .quality(SpecificationLimit.none())
.size(SpecificationLimit.none()) .size(SpecificationLimit.none())
.rank(SpecificationLimit.none()) .rank(SpecificationLimit.none())
.domainCount(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults()) .rankingParams(ResultRankingParameters.sensibleDefaults())
.queryStrategy(QueryStrategy.SENTENCE) .queryStrategy(QueryStrategy.SENTENCE)
.domains(List.of(2)) .domains(List.of(2))
@ -201,7 +199,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
.year(SpecificationLimit.equals(1998)) .year(SpecificationLimit.equals(1998))
.size(SpecificationLimit.none()) .size(SpecificationLimit.none())
.rank(SpecificationLimit.none()) .rank(SpecificationLimit.none())
.domainCount(SpecificationLimit.none())
.queryStrategy(QueryStrategy.SENTENCE) .queryStrategy(QueryStrategy.SENTENCE)
.searchSetIdentifier("NONE") .searchSetIdentifier("NONE")
.rankingParams(ResultRankingParameters.sensibleDefaults()) .rankingParams(ResultRankingParameters.sensibleDefaults())

View File

@ -422,7 +422,6 @@ public class IndexQueryServiceIntegrationTest {
.quality(SpecificationLimit.none()) .quality(SpecificationLimit.none())
.size(SpecificationLimit.none()) .size(SpecificationLimit.none())
.rank(SpecificationLimit.none()) .rank(SpecificationLimit.none())
.domainCount(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults()) .rankingParams(ResultRankingParameters.sensibleDefaults())
.domains(new ArrayList<>()) .domains(new ArrayList<>())
.searchSetIdentifier("NONE") .searchSetIdentifier("NONE")

View File

@ -33,7 +33,6 @@ public class SearchQueryParamFactory {
profile.getYearLimit(), profile.getYearLimit(),
profile.getSizeLimit(), profile.getSizeLimit(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(), List.of(),
new QueryLimits(5, 100, 200, 8192), new QueryLimits(5, 100, 200, 8192),
profile.searchSetIdentifier.name(), profile.searchSetIdentifier.name(),
@ -54,7 +53,6 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(), List.of(),
new QueryLimits(count, count, 100, 512), new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE.name(), SearchSetIdentifier.NONE.name(),
@ -74,7 +72,6 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(), List.of(),
new QueryLimits(100, 100, 100, 512), new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name(), SearchSetIdentifier.NONE.name(),
@ -94,7 +91,6 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(), SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(), List.of(),
new QueryLimits(100, 100, 100, 512), new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name(), SearchSetIdentifier.NONE.name(),