From 66b3e71e56d1266cf247ca735f7757f8090e79a0 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 15 Feb 2024 13:39:51 +0100 Subject: [PATCH] (search) Expose more search options This change set updates the query APIs to enable the search service to add additional criteria, such as QueryStrategy and TemporalBias. The QueryStrategy makes it possible to e.g. require a match is in the title of a result, and TemporalBias enables penalizing results that are not within a particular time period. These options are added to the search interface. The old 'recent results' is modified to use TemporalBias, and a new filter 'Search In Title' is added as well. The vintage filter is modified to add a temporal bias for the past. --- .../index/client/IndexProtobufCodec.java | 22 ++++++---- .../results/ResultRankingParameters.java | 2 +- .../src/main/protobuf/index-api.proto | 20 +++++---- .../index/client/IndexProtobufCodecTest.java | 3 +- .../marginalia/query/QueryProtobufCodec.java | 23 ++++++++--- .../marginalia/query/model/QueryParams.java | 10 ++++- .../search/SearchQueryParamFactory.java | 22 ++++++---- .../search/command/SearchParameters.java | 41 ++++++++++++++++--- .../search/command/SearchRecentParameter.java | 12 ------ .../search/command/SearchTitleParameter.java | 21 ++++++++++ .../search/model/SearchFilters.java | 34 +++++++++++++-- .../search/svc/SearchQueryService.java | 5 ++- .../templates/search/parts/search-filters.hdb | 10 ++++- .../templates/search/parts/search-form.hdb | 1 + .../command/commands/BangCommandTest.java | 2 +- .../query/svc/QueryFactoryTest.java | 6 ++- 16 files changed, 176 insertions(+), 58 deletions(-) create mode 100644 code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java index 1178ea2d..633da26e 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java @@ -88,14 +88,15 @@ public class IndexProtobufCodec { params.getBm25FullWeight(), params.getBm25PrioWeight(), params.getTcfWeight(), - ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().name()), + ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()), params.getTemporalBiasWeight() ); }; - public static RpcResultRankingParameters convertRankingParameterss(ResultRankingParameters rankingParams) { - return - RpcResultRankingParameters.newBuilder() + public static RpcResultRankingParameters convertRankingParameterss(ResultRankingParameters rankingParams, + RpcTemporalBias temporalBias) + { + var builder = RpcResultRankingParameters.newBuilder() .setFullB(rankingParams.fullParams.b()) .setFullK(rankingParams.fullParams.k()) .setPrioB(rankingParams.prioParams.b()) @@ -109,9 +110,16 @@ public class IndexProtobufCodec { .setBm25FullWeight(rankingParams.bm25FullWeight) .setBm25PrioWeight(rankingParams.bm25PrioWeight) .setTcfWeight(rankingParams.tcfWeight) - .setTemporalBias(RpcResultRankingParameters.TEMPORAL_BIAS.valueOf(rankingParams.temporalBias.name())) - .setTemporalBiasWeight(rankingParams.temporalBiasWeight) - .build(); + .setTemporalBiasWeight(rankingParams.temporalBiasWeight); + if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) { + builder.setTemporalBias(temporalBias); + } + else { + builder.setTemporalBias(RpcTemporalBias.newBuilder() + .setBias(RpcTemporalBias.Bias.valueOf(rankingParams.temporalBias.name()))); + } + + return builder.build(); } } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java index a77d9a9a..b7bb9075 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java @@ -52,7 +52,7 @@ public class ResultRankingParameters { .bm25PrioWeight(1.) .tcfWeight(2.) .temporalBias(TemporalBias.NONE) - .temporalBiasWeight(1. / (10.)) + .temporalBiasWeight(1. / (5.)) .build(); } diff --git a/code/api/index-api/src/main/protobuf/index-api.proto b/code/api/index-api/src/main/protobuf/index-api.proto index e7b5ae69..ad05152e 100644 --- a/code/api/index-api/src/main/protobuf/index-api.proto +++ b/code/api/index-api/src/main/protobuf/index-api.proto @@ -51,7 +51,8 @@ message RpcQsQuery { repeated int32 domainIds = 12; RpcQueryLimits queryLimits = 13; string searchSetIdentifier = 14; - + string queryStrategy = 15; // Named query configuration + RpcTemporalBias temporalBias = 16; } /* Query service query response */ @@ -63,6 +64,15 @@ message RpcQsResponse { string domain = 5; } +message RpcTemporalBias { + enum Bias { + NONE = 0; + RECENT = 1; + OLD = 2; + } + Bias bias = 1; +} + /* Index service query request */ message RpcIndexQuery { repeated RpcSubquery subqueries = 1; @@ -146,14 +156,8 @@ message RpcResultRankingParameters { double bm25FullWeight = 11; double bm25PrioWeight = 12; double tcfWeight = 13; - TEMPORAL_BIAS temporalBias = 14; + RpcTemporalBias temporalBias = 14; double temporalBiasWeight = 15; - - enum TEMPORAL_BIAS { - NONE = 0; - RECENT = 1; // Prefer recent documents - OLD = 2; // Prefer older documents - } } /* Defines a single subquery */ diff --git a/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java b/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java index 36e85429..2ea6d9ea 100644 --- a/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java +++ b/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java @@ -1,5 +1,6 @@ package nu.marginalia.index.client; +import nu.marginalia.index.api.RpcTemporalBias; import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; @@ -23,7 +24,7 @@ class IndexProtobufCodecTest { @Test public void testRankingParameters() { verifyIsIdentityTransformation(ResultRankingParameters.sensibleDefaults(), - p -> IndexProtobufCodec.convertRankingParameterss(IndexProtobufCodec.convertRankingParameterss(p))); + p -> IndexProtobufCodec.convertRankingParameterss(IndexProtobufCodec.convertRankingParameterss(p, null))); } @Test diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java index 36b16a55..d24a727e 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java @@ -3,10 +3,10 @@ package nu.marginalia.query; import lombok.SneakyThrows; import nu.marginalia.index.api.*; import nu.marginalia.index.client.IndexProtobufCodec; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.index.query.limit.QueryStrategy; @@ -41,8 +41,14 @@ public class QueryProtobufCodec { builder.setDomainCount(convertSpecLimit(query.specs.domainCount)); builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); - builder.setQueryStrategy(query.specs.queryStrategy.name()); - builder.setParameters(IndexProtobufCodec.convertRankingParameterss(query.specs.rankingParams)); + + // Query strategy may be overridden by the query, but if not, use the one from the request + if (query.specs.queryStrategy != null && query.specs.queryStrategy != QueryStrategy.AUTO) + builder.setQueryStrategy(query.specs.queryStrategy.name()); + else + builder.setQueryStrategy(request.getQueryStrategy()); + + builder.setParameters(IndexProtobufCodec.convertRankingParameterss(query.specs.rankingParams, request.getTemporalBias())); return builder.build(); } @@ -62,7 +68,10 @@ public class QueryProtobufCodec { convertSpecLimit(request.getDomainCount()), request.getDomainIdsList(), IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()), - request.getSearchSetIdentifier()); + request.getSearchSetIdentifier(), + QueryStrategy.valueOf(request.getQueryStrategy()), + ResultRankingParameters.TemporalBias.valueOf(request.getTemporalBias().getBias().name()) + ); } @@ -159,7 +168,11 @@ public class QueryProtobufCodec { .setYear(convertSpecLimit(params.year())) .setSize(convertSpecLimit(params.size())) .setRank(convertSpecLimit(params.rank())) - .setSearchSetIdentifier(params.identifier()); + .setSearchSetIdentifier(params.identifier()) + .setQueryStrategy(params.queryStrategy().name()) + .setTemporalBias(RpcTemporalBias.newBuilder() + .setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name())) + .build()); if (params.nearDomain() != null) builder.setNearDomain(params.nearDomain()); diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java index 35ca1828..d588ac2f 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java @@ -1,6 +1,8 @@ package nu.marginalia.query.model; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; +import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; import javax.annotation.Nullable; @@ -21,7 +23,9 @@ public record QueryParams( SpecificationLimit domainCount, List domainIds, QueryLimits limits, - String identifier + String identifier, + QueryStrategy queryStrategy, + ResultRankingParameters.TemporalBias temporalBias ) { public QueryParams(String query, QueryLimits limits, String identifier) { @@ -37,7 +41,9 @@ public record QueryParams( SpecificationLimit.none(), List.of(), limits, - identifier + identifier, + QueryStrategy.AUTO, + ResultRankingParameters.TemporalBias.NONE ); } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java index 38c3961e..456fad91 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -2,7 +2,9 @@ package nu.marginalia.search; import nu.marginalia.index.client.model.query.SearchSetIdentifier; import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; +import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.query.model.QueryParams; import nu.marginalia.search.command.SearchParameters; @@ -19,8 +21,6 @@ public class SearchQueryParamFactory { userParams.js().addTacitTerms(prototype); userParams.adtech().addTacitTerms(prototype); - SpecificationLimit yearLimit = userParams.recent().yearLimit(); - return new QueryParams( userParams.query(), null, @@ -29,13 +29,15 @@ public class SearchQueryParamFactory { prototype.searchTermsPriority, prototype.searchTermsAdvice, profile.getQualityLimit(), - yearLimit, + SpecificationLimit.none(), profile.getSizeLimit(), SpecificationLimit.none(), SpecificationLimit.none(), List.of(), new QueryLimits(5, 100, 200, 8192), - profile.searchSetIdentifier.name() + profile.searchSetIdentifier.name(), + userParams.strategy(), + userParams.temporalBias() ); } @@ -54,7 +56,9 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(count, count, 100, 512), - SearchSetIdentifier.NONE.name() + SearchSetIdentifier.NONE.name(), + QueryStrategy.AUTO, + ResultRankingParameters.TemporalBias.NONE ); } @@ -72,7 +76,9 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), - SearchSetIdentifier.NONE.name() + SearchSetIdentifier.NONE.name(), + QueryStrategy.AUTO, + ResultRankingParameters.TemporalBias.NONE ); } @@ -90,7 +96,9 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), - SearchSetIdentifier.NONE.name() + SearchSetIdentifier.NONE.name(), + QueryStrategy.AUTO, + ResultRankingParameters.TemporalBias.NONE ); } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java index 865fe785..7a55c1cc 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java @@ -1,15 +1,20 @@ package nu.marginalia.search.command; import nu.marginalia.WebsiteUrl; +import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.search.model.SearchProfile; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import static nu.marginalia.search.command.SearchRecentParameter.RECENT; + public record SearchParameters(String query, SearchProfile profile, SearchJsParameter js, SearchRecentParameter recent, + SearchTitleParameter title, SearchAdtechParameter adtech ) { public String profileStr() { @@ -17,29 +22,53 @@ public record SearchParameters(String query, } public SearchParameters withProfile(SearchProfile profile) { - return new SearchParameters(query, profile, js, recent, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech); } public SearchParameters withJs(SearchJsParameter js) { - return new SearchParameters(query, profile, js, recent, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech); } public SearchParameters withAdtech(SearchAdtechParameter adtech) { - return new SearchParameters(query, profile, js, recent, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech); } public SearchParameters withRecent(SearchRecentParameter recent) { - return new SearchParameters(query, profile, js, recent, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech); + } + + public SearchParameters withTitle(SearchTitleParameter title) { + return new SearchParameters(query, profile, js, recent, title, adtech); } public String renderUrl(WebsiteUrl baseUrl) { - String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s", + String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s", URLEncoder.encode(query, StandardCharsets.UTF_8), URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8), URLEncoder.encode(js.value, StandardCharsets.UTF_8), URLEncoder.encode(adtech.value, StandardCharsets.UTF_8), - URLEncoder.encode(recent.value, StandardCharsets.UTF_8) + URLEncoder.encode(recent.value, StandardCharsets.UTF_8), + URLEncoder.encode(title.value, StandardCharsets.UTF_8) ); return baseUrl.withPath(path); } + + public ResultRankingParameters.TemporalBias temporalBias() { + if (recent == RECENT) { + return ResultRankingParameters.TemporalBias.RECENT; + } + else if (profile == SearchProfile.VINTAGE) { + return ResultRankingParameters.TemporalBias.OLD; + } + + return ResultRankingParameters.TemporalBias.NONE; + } + + public QueryStrategy strategy() { + if (title == SearchTitleParameter.TITLE) { + return QueryStrategy.REQUIRE_FIELD_TITLE; + } + + return QueryStrategy.AUTO; + } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java index c6c17453..9b1223b5 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java @@ -1,11 +1,6 @@ package nu.marginalia.search.command; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.query.limit.SpecificationLimit; - import javax.annotation.Nullable; -import java.time.LocalDateTime; -import java.util.Arrays; public enum SearchRecentParameter { DEFAULT("default"), @@ -23,11 +18,4 @@ public enum SearchRecentParameter { return DEFAULT; } - public SpecificationLimit yearLimit() { - if (this == RECENT) { - return SpecificationLimit.greaterThan(LocalDateTime.now().getYear() - 1); - } else { - return SpecificationLimit.none(); - } - } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java new file mode 100644 index 00000000..ca1f4ccb --- /dev/null +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java @@ -0,0 +1,21 @@ +package nu.marginalia.search.command; + +import javax.annotation.Nullable; + +public enum SearchTitleParameter { + DEFAULT("default"), + TITLE("title"); + + public final String value; + + SearchTitleParameter(String value) { + this.value = value; + } + + public static SearchTitleParameter parse(@Nullable String value) { + if (TITLE.value.equals(value)) return TITLE; + + return DEFAULT; + } + +} diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java index 1f2895af..9e02ff2b 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java @@ -2,10 +2,7 @@ package nu.marginalia.search.model; import lombok.Getter; import nu.marginalia.WebsiteUrl; -import nu.marginalia.search.command.SearchAdtechParameter; -import nu.marginalia.search.command.SearchJsParameter; -import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.search.command.SearchRecentParameter; +import nu.marginalia.search.command.*; import java.util.List; @@ -23,6 +20,8 @@ public class SearchFilters { public final ReduceAdtechOption reduceAdtechOption; @Getter public final ShowRecentOption showRecentOption; + @Getter + public final SearchTitleOption searchTitleOption; @Getter public final List> filterGroups; @@ -35,6 +34,7 @@ public class SearchFilters { removeJsOption = new RemoveJsOption(parameters); reduceAdtechOption = new ReduceAdtechOption(parameters); showRecentOption = new ShowRecentOption(parameters); + searchTitleOption = new SearchTitleOption(parameters); currentFilter = parameters.profile().filterId; @@ -141,6 +141,32 @@ public class SearchFilters { } } + public class SearchTitleOption { + private final SearchTitleParameter value; + + @Getter + public final String url; + + public boolean isSet() { + return value.equals(SearchTitleParameter.TITLE); + } + + public String name() { + return "Search In Title"; + } + + public SearchTitleOption(SearchParameters parameters) { + this.value = parameters.title(); + + var toggledValue = switch (parameters.title()) { + case TITLE -> SearchTitleParameter.DEFAULT; + default -> SearchTitleParameter.TITLE; + }; + + this.url = parameters.withTitle(toggledValue).renderUrl(SearchFilters.this.url); + } + } + public class Filter { @Getter public final String displayName; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java index 5b4e5c3a..906c4caf 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java @@ -1,7 +1,6 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import io.prometheus.client.Histogram; import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; import nu.marginalia.search.command.*; @@ -61,7 +60,9 @@ public class SearchQueryService { SearchProfile.getSearchProfile(request.queryParams("profile")), SearchJsParameter.parse(request.queryParams("js")), SearchRecentParameter.parse(request.queryParams("recent")), - SearchAdtechParameter.parse(request.queryParams("adtech"))); + SearchTitleParameter.parse(request.queryParams("searchTitle")), + SearchAdtechParameter.parse(request.queryParams("adtech")) + ); } catch (Exception ex) { // Bots keep sending bad requests, suppress the error otherwise it will diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb index 64452d63..efb020cf 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb @@ -17,7 +17,15 @@ {{/with}} {{#with showRecentOption}} - + {{/with}} + {{#with searchTitleOption}} +