Merge pull request #119 from MarginaliaSearch/result-pagination

Add pagination support for the search results
This commit is contained in:
Viktor 2024-09-25 14:29:24 +02:00 committed by GitHub
commit 9224176202
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 228 additions and 60 deletions

View File

@ -99,7 +99,8 @@ public class QueryProtobufCodec {
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
request.getSearchSetIdentifier(),
QueryStrategy.valueOf(request.getQueryStrategy()),
ResultRankingParameters.TemporalBias.valueOf(request.getTemporalBias().getBias().name())
ResultRankingParameters.TemporalBias.valueOf(request.getTemporalBias().getBias().name()),
request.getPagination().getPage()
);
}
@ -107,14 +108,22 @@ public class QueryProtobufCodec {
public static QueryResponse convertQueryResponse(RpcQsResponse query) {
var results = new ArrayList<DecoratedSearchResultItem>(query.getResultsCount());
for (int i = 0; i < query.getResultsCount(); i++)
for (int i = 0; i < query.getResultsCount(); i++) {
results.add(convertDecoratedResult(query.getResults(i)));
}
var requestPagination = query.getPagination();
int totalResults = requestPagination.getTotalResults();
int pageSize = requestPagination.getPageSize();
int totalPages = (totalResults + pageSize - 1) / pageSize;
return new QueryResponse(
convertSearchSpecification(query.getSpecs()),
results,
query.getSearchTermsHumanList(),
query.getProblemsList(),
query.getPagination().getPage(),
totalPages,
query.getDomain()
);
}
@ -304,6 +313,10 @@ public class QueryProtobufCodec {
.setQueryStrategy(params.queryStrategy().name())
.setTemporalBias(RpcTemporalBias.newBuilder()
.setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name()))
.build())
.setPagination(RpcQsQueryPagination.newBuilder()
.setPage(params.page())
.setPageSize(Math.min(100, params.limits().resultsTotal()))
.build());
if (params.nearDomain() != null)

View File

@ -4,6 +4,7 @@ import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import javax.annotation.Nullable;
import java.util.List;
@ -23,7 +24,8 @@ public record QueryParams(
QueryLimits limits,
String identifier,
QueryStrategy queryStrategy,
ResultRankingParameters.TemporalBias temporalBias
ResultRankingParameters.TemporalBias temporalBias,
int page
)
{
public QueryParams(String query, QueryLimits limits, String identifier) {
@ -40,7 +42,8 @@ public record QueryParams(
limits,
identifier,
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
ResultRankingParameters.TemporalBias.NONE,
1 // page
);
}
}

View File

@ -11,6 +11,8 @@ public record QueryResponse(SearchSpecification specs,
List<DecoratedSearchResultItem> results,
List<String> searchTermsHuman,
List<String> problems,
int currentPage,
int totalPages,
@Nullable String domain)
{
public Set<String> getAllKeywords() {

View File

@ -30,6 +30,8 @@ message RpcQsQuery {
string searchSetIdentifier = 14;
string queryStrategy = 15; // Named query configuration
RpcTemporalBias temporalBias = 16;
RpcQsQueryPagination pagination = 17;
}
/* Query service query response */
@ -39,6 +41,19 @@ message RpcQsResponse {
repeated string searchTermsHuman = 3;
repeated string problems = 4;
string domain = 5;
RpcQsResultPagination pagination = 6;
}
message RpcQsQueryPagination {
int32 page = 1;
int32 pageSize = 2;
}
message RpcQsResultPagination {
int32 page = 1;
int32 pageSize = 2;
int32 totalResults = 3;
}
message RpcTemporalBias {

View File

@ -8,13 +8,13 @@ import io.prometheus.client.Histogram;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.api.IndexClient;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.List;
@Singleton
public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
@ -49,17 +49,30 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
.labels(Integer.toString(request.getQueryLimits().getTimeoutMs()),
Integer.toString(request.getQueryLimits().getResultsTotal()))
.time(() -> {
var params = QueryProtobufCodec.convertRequest(request);
var query = queryFactory.createQuery(params, ResultRankingParameters.sensibleDefaults());
var indexRequest = QueryProtobufCodec.convertQuery(request, query);
var requestPagination = request.getPagination();
IndexClient.Pagination pagination = new IndexClient.Pagination(
requestPagination.getPage(),
requestPagination.getPageSize());
// Execute the query on the index partitions
List<RpcDecoratedResultItem> bestItems = indexClient.executeQueries(indexRequest);
IndexClient.AggregateQueryResponse response = indexClient.executeQueries(indexRequest, pagination);
// Convert results to response and send it back
var responseBuilder = RpcQsResponse.newBuilder()
.addAllResults(bestItems)
.addAllResults(response.results())
.setPagination(
RpcQsResultPagination.newBuilder()
.setPage(requestPagination.getPage())
.setPageSize(requestPagination.getPageSize())
.setTotalResults(response.totalResults())
)
.setSpecs(indexRequest)
.addAllSearchTermsHuman(query.searchTermsHuman);
@ -77,18 +90,22 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
}
public record DetailedDirectResult(ProcessedQuery processedQuery,
List<DecoratedSearchResultItem> result) {}
List<DecoratedSearchResultItem> result,
int totalResults) {}
/** Local query execution, without GRPC. */
public DetailedDirectResult executeDirect(
String originalQuery,
QueryParams params,
IndexClient.Pagination pagination,
ResultRankingParameters rankingParameters) {
var query = queryFactory.createQuery(params, rankingParameters);
var items = indexClient.executeQueries(QueryProtobufCodec.convertQuery(originalQuery, query));
IndexClient.AggregateQueryResponse response = indexClient.executeQueries(QueryProtobufCodec.convertQuery(originalQuery, query), pagination);
return new DetailedDirectResult(query, Lists.transform(items, QueryProtobufCodec::convertQueryResult));
return new DetailedDirectResult(query,
Lists.transform(response.results(), QueryProtobufCodec::convertQueryResult),
response.totalResults());
}
}

View File

@ -17,10 +17,14 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static java.lang.Math.clamp;
@Singleton
public class IndexClient {
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
@ -39,17 +43,23 @@ public class IndexClient {
private static final Comparator<RpcDecoratedResultItem> comparator =
Comparator.comparing(RpcDecoratedResultItem::getRankingScore);
public record Pagination(int page, int pageSize) {}
public record AggregateQueryResponse(List<RpcDecoratedResultItem> results,
int page,
int totalResults
) {}
/** Execute a query on the index partitions and return the combined results. */
@SneakyThrows
public List<RpcDecoratedResultItem> executeQueries(RpcIndexQuery indexRequest) {
var futures =
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
List<CompletableFuture<Iterator<RpcDecoratedResultItem>>> futures =
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
.async(executor)
.runEach(indexRequest);
final int resultsTotal = indexRequest.getQueryLimits().getResultsTotal();
final int resultsUpperBound = resultsTotal * channelPool.getNumNodes();
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
final int resultsUpperBound = requestedMaxResults * channelPool.getNumNodes();
List<RpcDecoratedResultItem> results = new ArrayList<>(resultsUpperBound);
@ -66,12 +76,17 @@ public class IndexClient {
results.sort(comparator);
results.removeIf(this::isBlacklisted);
// Keep only as many results as were requested
if (results.size() > resultsTotal) {
results = results.subList(0, resultsTotal);
}
int numReceivedResults = results.size();
return results;
// pagination is typically 1-indexed, so we need to adjust the start and end indices
int indexStart = (pagination.page - 1) * pagination.pageSize;
int indexEnd = (pagination.page) * pagination.pageSize;
results = results.subList(
clamp(indexStart, 0, results.size() - 1), // from is inclusive, so subtract 1 from size()
clamp(indexEnd, 0, results.size()));
return new AggregateQueryResponse(results, pagination.page(), numReceivedResults);
}
private boolean isBlacklisted(RpcDecoratedResultItem item) {

View File

@ -37,6 +37,7 @@ import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@Singleton
public class SearchOperator {
@ -132,6 +133,14 @@ public class SearchOperator {
List<String> problems = getProblems(evalResult, queryResults, queryResponse);
List<DecoratedSearchResults.Page> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new DecoratedSearchResults.Page(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl(websiteUrl)
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
@ -141,6 +150,7 @@ public class SearchOperator {
.filters(new SearchFilters(websiteUrl, userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
.build();
}

View File

@ -36,7 +36,8 @@ public class SearchQueryParamFactory {
new QueryLimits(5, 100, 200, 8192),
profile.searchSetIdentifier.name(),
userParams.strategy(),
userParams.temporalBias()
userParams.temporalBias(),
userParams.page()
);
}
@ -56,7 +57,8 @@ public class SearchQueryParamFactory {
new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
ResultRankingParameters.TemporalBias.NONE,
1
);
}
@ -75,7 +77,8 @@ public class SearchQueryParamFactory {
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
ResultRankingParameters.TemporalBias.NONE,
1
);
}
@ -94,7 +97,8 @@ public class SearchQueryParamFactory {
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
ResultRankingParameters.TemporalBias.NONE,
1
);
}
}

View File

@ -5,9 +5,11 @@ import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.search.model.SearchProfile;
import spark.Request;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import static nu.marginalia.search.command.SearchRecentParameter.RECENT;
@ -17,40 +19,60 @@ public record SearchParameters(String query,
SearchRecentParameter recent,
SearchTitleParameter searchTitle,
SearchAdtechParameter adtech,
boolean poisonResults,
boolean newFilter
boolean newFilter,
int page
) {
public SearchParameters(String queryString, Request request) {
this(
queryString,
SearchProfile.getSearchProfile(request.queryParams("profile")),
SearchJsParameter.parse(request.queryParams("js")),
SearchRecentParameter.parse(request.queryParams("recent")),
SearchTitleParameter.parse(request.queryParams("searchTitle")),
SearchAdtechParameter.parse(request.queryParams("adtech")),
"true".equals(request.queryParams("newfilter")),
Integer.parseInt(Objects.requireNonNullElse(request.queryParams("page"), "1"))
);
}
public String profileStr() {
return profile.filterId;
}
public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults, true);
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults, true);
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults, true);
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withRecent(SearchRecentParameter recent) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults, true);
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withTitle(SearchTitleParameter title) {
return new SearchParameters(query, profile, js, recent, title, adtech, poisonResults, true);
return new SearchParameters(query, profile, js, recent, title, adtech, true, page);
}
public SearchParameters withPage(int page) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, false, page);
}
public String renderUrl(WebsiteUrl baseUrl) {
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=true",
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=%s&page=%d",
URLEncoder.encode(query, StandardCharsets.UTF_8),
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
URLEncoder.encode(js.value, StandardCharsets.UTF_8),
URLEncoder.encode(adtech.value, StandardCharsets.UTF_8),
URLEncoder.encode(recent.value, StandardCharsets.UTF_8),
URLEncoder.encode(searchTitle.value, StandardCharsets.UTF_8)
URLEncoder.encode(searchTitle.value, StandardCharsets.UTF_8),
Boolean.valueOf(newFilter).toString(),
page
);
return baseUrl.withPath(path);

View File

@ -25,6 +25,10 @@ public class DecoratedSearchResults {
private final int focusDomainId;
private final SearchFilters filters;
private final List<Page> resultPages;
public record Page(int number, boolean current, String href) {}
// These are used by the search form, they look unused in the IDE but are used by the mustache template,
// DO NOT REMOVE THEM
public int getResultCount() { return results.size(); }
@ -34,5 +38,7 @@ public class DecoratedSearchResults {
public String getAdtech() { return params.adtech().value; }
public String getRecent() { return params.recent().value; }
public String getSearchTitle() { return params.searchTitle().value; }
public int page() { return params.page(); }
public Boolean isNewFilter() { return params.newFilter(); }
}

View File

@ -3,8 +3,8 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.*;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.command.CommandEvaluator;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.exceptions.RedirectException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -52,15 +52,7 @@ public class SearchQueryService {
throw new RedirectException(websiteUrl.url());
}
return new SearchParameters(queryParam.trim(),
SearchProfile.getSearchProfile(request.queryParams("profile")),
SearchJsParameter.parse(request.queryParams("js")),
SearchRecentParameter.parse(request.queryParams("recent")),
SearchTitleParameter.parse(request.queryParams("searchTitle")),
SearchAdtechParameter.parse(request.queryParams("adtech")),
"1".equals(request.headers("X-Poison-Results")),
"true".equals(request.queryParams("newfilter"))
);
return new SearchParameters(queryParam.trim(), request);
}
catch (Exception ex) {
// Bots keep sending bad requests, suppress the error otherwise it will

View File

@ -794,6 +794,25 @@ footer {
}
}
.page-link {
padding-top: 0.25ch;
padding-bottom: 0.25ch;
padding-left: 0.5ch;
padding-right: 0.5ch;
margin-right: 0.5ch;
font-size: 12pt;
border: 1px solid var(--clr-border);
background-color: var(--clr-bg-highlight);
color: var(--clr-text-ui) !important;
text-decoration: none;
}
.page-link.active {
border: 1px solid var(--clr-text-ui);
background-color: var(--clr-bg-ui);
}
// The search results page is very confusing on text-based browsers, so we add a hr to separate the search results. This is
// hidden on modern browsers via CSS.

View File

@ -4,7 +4,7 @@
Search The Internet
</h1>
<div id="suggestions-anchor"></div>
<input autofocus type="text" id="query" name="query" placeholder="Search..." value="{{query}}">
<input {{#unless query}}autofocus{{/unless}} type="text" id="query" name="query" placeholder="Search..." value="{{query}}">
<input type="hidden" name="js" value="{{js}}">
<input type="hidden" name="adtech" value="{{adtech}}">
<input type="hidden" name="searchTitle" value="{{searchTitle}}">

View File

@ -53,6 +53,12 @@
{{/with}}
{{/if}}
{{/each}}
<nav aria-label="pagination">
{{#each resultPages}}
<a {{#unless current}}href="{{{href}}}"{{/unless}} class="page-link {{#if current}}active{{/if}}">{{number}}</a>
{{/each}}
</nav>
</section>
{{#with filters}}

View File

@ -7,6 +7,7 @@ import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.index.api.IndexClient;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.MustacheRenderer;
@ -15,8 +16,14 @@ import spark.Request;
import spark.Response;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static java.lang.Integer.min;
import static java.lang.Integer.parseInt;
import static java.util.Objects.requireNonNullElse;
public class QueryBasicInterface {
private final MustacheRenderer<Object> basicRenderer;
private final MustacheRenderer<Object> qdebugRenderer;
@ -34,38 +41,53 @@ public class QueryBasicInterface {
this.queryGRPCService = queryGRPCService;
}
/** Handle the basic search endpoint exposed in the bare-bones search interface. */
public Object handleBasic(Request request, Response response) {
String queryParams = request.queryParams("q");
if (queryParams == null) {
String queryString = request.queryParams("q");
if (queryString == null) {
return basicRenderer.render(new Object());
}
int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count"));
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
int count = parseInt(requireNonNullElse(request.queryParams("count"), "10"));
int page = parseInt(requireNonNullElse(request.queryParams("page"), "1"));
int domainCount = parseInt(requireNonNullElse(request.queryParams("domainCount"), "5"));
String set = requireNonNullElse(request.queryParams("set"), "");
var params = new QueryParams(queryParams, new QueryLimits(
domainCount, count, 250, 8192
var params = new QueryParams(queryString, new QueryLimits(
domainCount, min(100, count * 10), 250, 8192
), set);
var pagination = new IndexClient.Pagination(page, count);
var detailedDirectResult = queryGRPCService.executeDirect(
queryParams, params, ResultRankingParameters.sensibleDefaults()
queryString,
params,
pagination,
ResultRankingParameters.sensibleDefaults()
);
var results = detailedDirectResult.result();
List<PaginationInfoPage> paginationInfo = new ArrayList<>();
for (int i = 1; i <= detailedDirectResult.totalResults() / pagination.pageSize(); i++) {
paginationInfo.add(new PaginationInfoPage(i, i == pagination.page()));
}
if (request.headers("Accept").contains("application/json")) {
response.type("application/json");
return gson.toJson(results);
}
else {
return basicRenderer.render(
Map.of("query", queryParams,
Map.of("query", queryString,
"pages", paginationInfo,
"results", results)
);
}
}
/** Handle the qdebug endpoint, which allows for query debugging and ranking parameter tuning. */
public Object handleAdvanced(Request request, Response response) {
String queryString = request.queryParams("q");
if (queryString == null) {
@ -74,18 +96,24 @@ public class QueryBasicInterface {
);
}
int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count"));
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
int count = parseInt(requireNonNullElse(request.queryParams("count"), "10"));
int page = parseInt(requireNonNullElse(request.queryParams("page"), "1"));
int domainCount = parseInt(requireNonNullElse(request.queryParams("domainCount"), "5"));
String set = requireNonNullElse(request.queryParams("set"), "");
var queryParams = new QueryParams(queryString, new QueryLimits(
domainCount, count, 250, 8192
domainCount, min(100, count * 10), 250, 8192
), set);
var pagination = new IndexClient.Pagination(page, count);
var rankingParams = debugRankingParamsFromRequest(request);
var detailedDirectResult = queryGRPCService.executeDirect(
queryString, queryParams, rankingParams
queryString,
queryParams,
pagination,
rankingParams
);
var results = detailedDirectResult.result();
@ -127,10 +155,12 @@ public class QueryBasicInterface {
}
int intFromRequest(Request request, String param, int defaultValue) {
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Integer.parseInt(request.queryParams(param));
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : parseInt(request.queryParams(param));
}
String stringFromRequest(Request request, String param, String defaultValue) {
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : request.queryParams(param);
}
record PaginationInfoPage(int number, boolean current) {}
}

View File

@ -24,6 +24,20 @@
<div><small class="text-muted">{{url}}</small></div>
<p>{{description}}</p>
</div>
{{/each}}
<nav aria-label="pagination">
<ul class="pagination">
{{#each pages}}
<form action="/search">
<input type="hidden" name="q" value="{{query}}">
<input type="hidden" name="page" value="{{number}}">
<li class="page-item {{#if current}}active{{/if}}"><input type="submit" class="page-link" value="{{number}}"></li>
</form>
{{/each}}
</ul>
</nav>
{{#each pages}}
{{/each}}
{{/if}}
</div>