Fix exclude term duplication from js flag.

This commit is contained in:
vlofgren 2022-07-28 14:51:51 +02:00
parent 0903d9f727
commit fd1f3f796e
10 changed files with 69 additions and 48 deletions

View File

@ -1,9 +1,12 @@
package nu.marginalia.wmsa.edge.model.search;
import lombok.*;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
@ToString
@Getter
@ -23,7 +26,10 @@ public class EdgeSearchSubquery {
}
public EdgeSearchSubquery withBlock(IndexBlock block) {
return new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, block);
return new EdgeSearchSubquery(
new CopyOnWriteArrayList<>(searchTermsInclude),
new CopyOnWriteArrayList<>(searchTermsExclude),
block);
}
public int termSize() {

View File

@ -88,7 +88,7 @@ public class EdgeSearchOperator {
}
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.getHumanQuery());
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.humanQuery());
EdgeSearchQuery processedQuery = queryFactory.createQuery(params);
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
@ -98,7 +98,7 @@ public class EdgeSearchOperator {
String evalResult = getEvalResult(eval);
return new DecoratedSearchResults(params,
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery),
evalResult,
definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(),
queryResults.resultSet,

View File

@ -3,49 +3,39 @@ package nu.marginalia.wmsa.edge.search;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
public enum EdgeSearchProfile {
DEFAULT("default",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
0, 1),
MODERN("modern",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
2),
CORPO("corpo",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
4, 5, 6, 7),
YOLO("yolo",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
0, 2, 1, 3, 4, 6),
CORPO_CLEAN("corpo-clean",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
4, 5),
ACADEMIA("academia",
Collections.emptyList(),
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
3),
;
public final String name;
public final List<String> additionalSearchTerm;
public final List<Integer> buckets;
public final List<IndexBlock> indexBlocks;
EdgeSearchProfile(String name,
List<String> additionalSearchTerm,
List<IndexBlock> indexBlocks,
int... buckets) {
this.name = name;
this.additionalSearchTerm = additionalSearchTerm;
this.indexBlocks = indexBlocks;
this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList());
}

View File

@ -15,6 +15,7 @@ import nu.marginalia.wmsa.configuration.server.MetricsServer;
import nu.marginalia.wmsa.configuration.server.Service;
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
import nu.marginalia.wmsa.edge.search.exceptions.RedirectException;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
@ -130,7 +131,7 @@ public class EdgeSearchService extends Service {
final String humanQuery = queryParam.trim();
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, ""));
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, SearchJsParameter.DEFAULT));
return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList()));
}
@ -151,7 +152,9 @@ public class EdgeSearchService extends Service {
var params = new SearchParameters(
EdgeSearchProfile.getSearchProfile(profileStr),
Optional.ofNullable(request.queryParams("js")).orElse("default"));
SearchJsParameter.parse(request.queryParams("js"))
);
try {
return searchCommandEvaulator.eval(ctx, params, humanQuery);
}

View File

@ -0,0 +1,24 @@
package nu.marginalia.wmsa.edge.search.command;
import javax.annotation.Nullable;
public enum SearchJsParameter {
DEFAULT("default"),
DENY_JS("no-js", "js:true"),
REQUIRE_JS("yes-js", "js:false");
public final String value;
public final String[] implictExcludeSearchTerms;
SearchJsParameter(String value, String... implictExcludeSearchTerms) {
this.value = value;
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
}
public static SearchJsParameter parse(@Nullable String value) {
if (DENY_JS.value.equals(value)) return DENY_JS;
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
return DEFAULT;
}
}

View File

@ -2,7 +2,7 @@ package nu.marginalia.wmsa.edge.search.command;
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
public record SearchParameters(EdgeSearchProfile profile, String js) {
public record SearchParameters(EdgeSearchProfile profile, SearchJsParameter js) {
public String profileStr() {
return profile.name;
}

View File

@ -43,7 +43,8 @@ public class SearchCommand implements SearchCommandInterface {
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
DecoratedSearchResults results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query, parameters.profile(), parameters.js()), eval);
EdgeUserSearchParameters params = new EdgeUserSearchParameters(query, parameters.profile(), parameters.js());
DecoratedSearchResults results = searchOperator.doSearch(ctx, params, eval);
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));

View File

@ -20,12 +20,12 @@ public class DecoratedSearchResults {
private final int focusDomainId;
public String getQuery() {
return params.humanQuery;
return params.humanQuery();
}
public String getProfile() {
return params.getProfile().name;
return params.profile().name;
}
public String getJs() {
return params.jsSetting;
return params.jsSetting().value;
}
}

View File

@ -2,9 +2,9 @@ package nu.marginalia.wmsa.edge.search.query;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import nu.marginalia.util.language.WordPatterns;
import nu.marginalia.util.language.conf.LanguageModels;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery;
@ -39,15 +39,26 @@ public class QueryFactory {
}
public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) {
final var profile = params.getProfile();
final var jsSetting = params.getJsSetting();
final var profile = params.profile();
final var processedQuery = createQuery(getParser(), params);
processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile);
processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile);
List<EdgeSearchSubquery> subqueries = new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
final var newSubqueries = reevaluateSubqueries(processedQuery, params);
processedQuery.specs.subqueries.clear();
processedQuery.specs.subqueries.addAll(newSubqueries);
return processedQuery;
}
private List<EdgeSearchSubquery> reevaluateSubqueries(EdgeSearchQuery processedQuery, EdgeUserSearchParameters params) {
final var jsSetting = params.jsSetting();
final var profile = params.profile();
List<EdgeSearchSubquery> subqueries =
new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
for (var sq : processedQuery.specs.subqueries) {
for (var block : profile.indexBlocks) {
@ -55,28 +66,19 @@ public class QueryFactory {
}
}
processedQuery.specs.subqueries.clear();
processedQuery.specs.subqueries.addAll(subqueries);
processedQuery.specs.subqueries.forEach(sq -> {
sq.searchTermsInclude.addAll(profile.additionalSearchTerm);
if (jsSetting.equals("yes-js")) {
sq.searchTermsExclude.add("js:false");
}
if (jsSetting.equals("no-js")) {
sq.searchTermsExclude.add("js:true");
}
subqueries.forEach(sq -> {
sq.searchTermsExclude.addAll(Arrays.asList(jsSetting.implictExcludeSearchTerms));
});
processedQuery.specs.subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
return processedQuery;
return subqueries;
}
public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) {
final var query = params.humanQuery;
final var profile = params.getProfile();
final var query = params.humanQuery();
final var profile = params.profile();
if (query.length() > 1000) {
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");

View File

@ -1,12 +1,7 @@
package nu.marginalia.wmsa.edge.search.query.model;
import lombok.AllArgsConstructor;
import lombok.Getter;
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
@AllArgsConstructor @Getter
public class EdgeUserSearchParameters {
public final String humanQuery;
public final EdgeSearchProfile profile;
public final String jsSetting;
public record EdgeUserSearchParameters (String humanQuery, EdgeSearchProfile profile, SearchJsParameter jsSetting){
}