diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java index 61a37b41..166327e3 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java @@ -1,9 +1,12 @@ package nu.marginalia.wmsa.edge.model.search; -import lombok.*; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; import nu.marginalia.wmsa.edge.index.model.IndexBlock; import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; @ToString @Getter @@ -23,7 +26,10 @@ public class EdgeSearchSubquery { } public EdgeSearchSubquery withBlock(IndexBlock block) { - return new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, block); + return new EdgeSearchSubquery( + new CopyOnWriteArrayList<>(searchTermsInclude), + new CopyOnWriteArrayList<>(searchTermsExclude), + block); } public int termSize() { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java index d801965d..7ba8f167 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java @@ -88,7 +88,7 @@ public class EdgeSearchOperator { } public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future eval) { - Observable definitions = getWikiArticle(ctx, params.getHumanQuery()); + Observable definitions = getWikiArticle(ctx, params.humanQuery()); EdgeSearchQuery processedQuery = queryFactory.createQuery(params); logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ',')); @@ -98,7 +98,7 @@ public class EdgeSearchOperator { String evalResult = getEvalResult(eval); return new DecoratedSearchResults(params, - getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery), + getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery), evalResult, definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(), queryResults.resultSet, diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java index 212d09ab..c08d9ca3 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java @@ -3,49 +3,39 @@ package nu.marginalia.wmsa.edge.search; import nu.marginalia.wmsa.edge.index.model.IndexBlock; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.stream.Collectors; public enum EdgeSearchProfile { DEFAULT("default", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 0, 1), MODERN("modern", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 2), CORPO("corpo", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 4, 5, 6, 7), YOLO("yolo", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 0, 2, 1, 3, 4, 6), CORPO_CLEAN("corpo-clean", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 4, 5), ACADEMIA("academia", - Collections.emptyList(), List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), 3), ; public final String name; - public final List additionalSearchTerm; public final List buckets; public final List indexBlocks; EdgeSearchProfile(String name, - List additionalSearchTerm, List indexBlocks, int... buckets) { this.name = name; - this.additionalSearchTerm = additionalSearchTerm; this.indexBlocks = indexBlocks; this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList()); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java index 42830179..24e7eba4 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java @@ -15,6 +15,7 @@ import nu.marginalia.wmsa.configuration.server.MetricsServer; import nu.marginalia.wmsa.configuration.server.Service; import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; import nu.marginalia.wmsa.edge.search.command.CommandEvaluator; +import nu.marginalia.wmsa.edge.search.command.SearchJsParameter; import nu.marginalia.wmsa.edge.search.command.SearchParameters; import nu.marginalia.wmsa.edge.search.exceptions.RedirectException; import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters; @@ -130,7 +131,7 @@ public class EdgeSearchService extends Service { final String humanQuery = queryParam.trim(); - var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, "")); + var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, SearchJsParameter.DEFAULT)); return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList())); } @@ -151,7 +152,9 @@ public class EdgeSearchService extends Service { var params = new SearchParameters( EdgeSearchProfile.getSearchProfile(profileStr), - Optional.ofNullable(request.queryParams("js")).orElse("default")); + SearchJsParameter.parse(request.queryParams("js")) + ); + try { return searchCommandEvaulator.eval(ctx, params, humanQuery); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchJsParameter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchJsParameter.java new file mode 100644 index 00000000..29367549 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchJsParameter.java @@ -0,0 +1,24 @@ +package nu.marginalia.wmsa.edge.search.command; + +import javax.annotation.Nullable; + +public enum SearchJsParameter { + DEFAULT("default"), + DENY_JS("no-js", "js:true"), + REQUIRE_JS("yes-js", "js:false"); + + public final String value; + public final String[] implictExcludeSearchTerms; + + SearchJsParameter(String value, String... implictExcludeSearchTerms) { + this.value = value; + this.implictExcludeSearchTerms = implictExcludeSearchTerms; + } + + public static SearchJsParameter parse(@Nullable String value) { + if (DENY_JS.value.equals(value)) return DENY_JS; + if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS; + + return DEFAULT; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchParameters.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchParameters.java index dc6ae832..33ebb1bc 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchParameters.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/SearchParameters.java @@ -2,7 +2,7 @@ package nu.marginalia.wmsa.edge.search.command; import nu.marginalia.wmsa.edge.search.EdgeSearchProfile; -public record SearchParameters(EdgeSearchProfile profile, String js) { +public record SearchParameters(EdgeSearchProfile profile, SearchJsParameter js) { public String profileStr() { return profile.name; } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/SearchCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/SearchCommand.java index 66a4e056..67df810a 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/SearchCommand.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/SearchCommand.java @@ -43,7 +43,8 @@ public class SearchCommand implements SearchCommandInterface { public Optional process(Context ctx, SearchParameters parameters, String query) { @CheckForNull Future eval = unitConversion.tryEval(ctx, query); - DecoratedSearchResults results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query, parameters.profile(), parameters.js()), eval); + EdgeUserSearchParameters params = new EdgeUserSearchParameters(query, parameters.profile(), parameters.js()); + DecoratedSearchResults results = searchOperator.doSearch(ctx, params, eval); results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain))); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/DecoratedSearchResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/DecoratedSearchResults.java index bb0016cb..30288099 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/DecoratedSearchResults.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/DecoratedSearchResults.java @@ -20,12 +20,12 @@ public class DecoratedSearchResults { private final int focusDomainId; public String getQuery() { - return params.humanQuery; + return params.humanQuery(); } public String getProfile() { - return params.getProfile().name; + return params.profile().name; } public String getJs() { - return params.jsSetting; + return params.jsSetting().value; } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java index 1d77a9d0..53d80cd6 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java @@ -2,9 +2,9 @@ package nu.marginalia.wmsa.edge.search.query; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.util.language.WordPatterns; import nu.marginalia.util.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; import nu.marginalia.wmsa.edge.index.model.IndexBlock; import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification; import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery; @@ -39,15 +39,26 @@ public class QueryFactory { } public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) { - final var profile = params.getProfile(); - final var jsSetting = params.getJsSetting(); - + final var profile = params.profile(); final var processedQuery = createQuery(getParser(), params); processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile); processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile); - List subqueries = new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size()); + final var newSubqueries = reevaluateSubqueries(processedQuery, params); + + processedQuery.specs.subqueries.clear(); + processedQuery.specs.subqueries.addAll(newSubqueries); + + return processedQuery; + } + + private List reevaluateSubqueries(EdgeSearchQuery processedQuery, EdgeUserSearchParameters params) { + final var jsSetting = params.jsSetting(); + final var profile = params.profile(); + + List subqueries = + new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size()); for (var sq : processedQuery.specs.subqueries) { for (var block : profile.indexBlocks) { @@ -55,28 +66,19 @@ public class QueryFactory { } } - processedQuery.specs.subqueries.clear(); - processedQuery.specs.subqueries.addAll(subqueries); - - processedQuery.specs.subqueries.forEach(sq -> { - sq.searchTermsInclude.addAll(profile.additionalSearchTerm); - if (jsSetting.equals("yes-js")) { - sq.searchTermsExclude.add("js:false"); - } - if (jsSetting.equals("no-js")) { - sq.searchTermsExclude.add("js:true"); - } + subqueries.forEach(sq -> { + sq.searchTermsExclude.addAll(Arrays.asList(jsSetting.implictExcludeSearchTerms)); }); - processedQuery.specs.subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder)); + subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder)); - return processedQuery; + return subqueries; } public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) { - final var query = params.humanQuery; - final var profile = params.getProfile(); + final var query = params.humanQuery(); + final var profile = params.profile(); if (query.length() > 1000) { Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man"); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java index ee58f099..3bc35c90 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java @@ -1,12 +1,7 @@ package nu.marginalia.wmsa.edge.search.query.model; -import lombok.AllArgsConstructor; -import lombok.Getter; import nu.marginalia.wmsa.edge.search.EdgeSearchProfile; +import nu.marginalia.wmsa.edge.search.command.SearchJsParameter; -@AllArgsConstructor @Getter -public class EdgeUserSearchParameters { - public final String humanQuery; - public final EdgeSearchProfile profile; - public final String jsSetting; +public record EdgeUserSearchParameters (String humanQuery, EdgeSearchProfile profile, SearchJsParameter jsSetting){ }