mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Fix exclude term duplication from js flag.
This commit is contained in:
parent
0903d9f727
commit
fd1f3f796e
@ -1,9 +1,12 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
|
||||
import lombok.*;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
|
||||
@ToString
|
||||
@Getter
|
||||
@ -23,7 +26,10 @@ public class EdgeSearchSubquery {
|
||||
}
|
||||
|
||||
public EdgeSearchSubquery withBlock(IndexBlock block) {
|
||||
return new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, block);
|
||||
return new EdgeSearchSubquery(
|
||||
new CopyOnWriteArrayList<>(searchTermsInclude),
|
||||
new CopyOnWriteArrayList<>(searchTermsExclude),
|
||||
block);
|
||||
}
|
||||
|
||||
public int termSize() {
|
||||
|
@ -88,7 +88,7 @@ public class EdgeSearchOperator {
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
|
||||
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.getHumanQuery());
|
||||
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.humanQuery());
|
||||
EdgeSearchQuery processedQuery = queryFactory.createQuery(params);
|
||||
|
||||
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
@ -98,7 +98,7 @@ public class EdgeSearchOperator {
|
||||
String evalResult = getEvalResult(eval);
|
||||
|
||||
return new DecoratedSearchResults(params,
|
||||
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
|
||||
getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery),
|
||||
evalResult,
|
||||
definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(),
|
||||
queryResults.resultSet,
|
||||
|
@ -3,49 +3,39 @@ package nu.marginalia.wmsa.edge.search;
|
||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public enum EdgeSearchProfile {
|
||||
DEFAULT("default",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
0, 1),
|
||||
MODERN("modern",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
2),
|
||||
CORPO("corpo",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
4, 5, 6, 7),
|
||||
YOLO("yolo",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
0, 2, 1, 3, 4, 6),
|
||||
CORPO_CLEAN("corpo-clean",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
4, 5),
|
||||
ACADEMIA("academia",
|
||||
Collections.emptyList(),
|
||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||
3),
|
||||
;
|
||||
|
||||
|
||||
public final String name;
|
||||
public final List<String> additionalSearchTerm;
|
||||
public final List<Integer> buckets;
|
||||
public final List<IndexBlock> indexBlocks;
|
||||
|
||||
EdgeSearchProfile(String name,
|
||||
List<String> additionalSearchTerm,
|
||||
List<IndexBlock> indexBlocks,
|
||||
int... buckets) {
|
||||
this.name = name;
|
||||
this.additionalSearchTerm = additionalSearchTerm;
|
||||
this.indexBlocks = indexBlocks;
|
||||
this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList());
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ import nu.marginalia.wmsa.configuration.server.MetricsServer;
|
||||
import nu.marginalia.wmsa.configuration.server.Service;
|
||||
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
|
||||
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
|
||||
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
|
||||
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
|
||||
import nu.marginalia.wmsa.edge.search.exceptions.RedirectException;
|
||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
||||
@ -130,7 +131,7 @@ public class EdgeSearchService extends Service {
|
||||
|
||||
final String humanQuery = queryParam.trim();
|
||||
|
||||
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, ""));
|
||||
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, SearchJsParameter.DEFAULT));
|
||||
|
||||
return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList()));
|
||||
}
|
||||
@ -151,7 +152,9 @@ public class EdgeSearchService extends Service {
|
||||
|
||||
var params = new SearchParameters(
|
||||
EdgeSearchProfile.getSearchProfile(profileStr),
|
||||
Optional.ofNullable(request.queryParams("js")).orElse("default"));
|
||||
SearchJsParameter.parse(request.queryParams("js"))
|
||||
);
|
||||
|
||||
try {
|
||||
return searchCommandEvaulator.eval(ctx, params, humanQuery);
|
||||
}
|
||||
|
@ -0,0 +1,24 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public enum SearchJsParameter {
|
||||
DEFAULT("default"),
|
||||
DENY_JS("no-js", "js:true"),
|
||||
REQUIRE_JS("yes-js", "js:false");
|
||||
|
||||
public final String value;
|
||||
public final String[] implictExcludeSearchTerms;
|
||||
|
||||
SearchJsParameter(String value, String... implictExcludeSearchTerms) {
|
||||
this.value = value;
|
||||
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
|
||||
}
|
||||
|
||||
public static SearchJsParameter parse(@Nullable String value) {
|
||||
if (DENY_JS.value.equals(value)) return DENY_JS;
|
||||
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
|
||||
|
||||
return DEFAULT;
|
||||
}
|
||||
}
|
@ -2,7 +2,7 @@ package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||
|
||||
public record SearchParameters(EdgeSearchProfile profile, String js) {
|
||||
public record SearchParameters(EdgeSearchProfile profile, SearchJsParameter js) {
|
||||
public String profileStr() {
|
||||
return profile.name;
|
||||
}
|
||||
|
@ -43,7 +43,8 @@ public class SearchCommand implements SearchCommandInterface {
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
|
||||
|
||||
DecoratedSearchResults results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query, parameters.profile(), parameters.js()), eval);
|
||||
EdgeUserSearchParameters params = new EdgeUserSearchParameters(query, parameters.profile(), parameters.js());
|
||||
DecoratedSearchResults results = searchOperator.doSearch(ctx, params, eval);
|
||||
|
||||
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
|
||||
|
||||
|
@ -20,12 +20,12 @@ public class DecoratedSearchResults {
|
||||
private final int focusDomainId;
|
||||
|
||||
public String getQuery() {
|
||||
return params.humanQuery;
|
||||
return params.humanQuery();
|
||||
}
|
||||
public String getProfile() {
|
||||
return params.getProfile().name;
|
||||
return params.profile().name;
|
||||
}
|
||||
public String getJs() {
|
||||
return params.jsSetting;
|
||||
return params.jsSetting().value;
|
||||
}
|
||||
}
|
||||
|
@ -2,9 +2,9 @@ package nu.marginalia.wmsa.edge.search.query;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.util.language.WordPatterns;
|
||||
import nu.marginalia.util.language.conf.LanguageModels;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
|
||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery;
|
||||
@ -39,15 +39,26 @@ public class QueryFactory {
|
||||
}
|
||||
|
||||
public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) {
|
||||
final var profile = params.getProfile();
|
||||
final var jsSetting = params.getJsSetting();
|
||||
|
||||
final var profile = params.profile();
|
||||
final var processedQuery = createQuery(getParser(), params);
|
||||
|
||||
processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile);
|
||||
processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile);
|
||||
|
||||
List<EdgeSearchSubquery> subqueries = new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
|
||||
final var newSubqueries = reevaluateSubqueries(processedQuery, params);
|
||||
|
||||
processedQuery.specs.subqueries.clear();
|
||||
processedQuery.specs.subqueries.addAll(newSubqueries);
|
||||
|
||||
return processedQuery;
|
||||
}
|
||||
|
||||
private List<EdgeSearchSubquery> reevaluateSubqueries(EdgeSearchQuery processedQuery, EdgeUserSearchParameters params) {
|
||||
final var jsSetting = params.jsSetting();
|
||||
final var profile = params.profile();
|
||||
|
||||
List<EdgeSearchSubquery> subqueries =
|
||||
new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
|
||||
|
||||
for (var sq : processedQuery.specs.subqueries) {
|
||||
for (var block : profile.indexBlocks) {
|
||||
@ -55,28 +66,19 @@ public class QueryFactory {
|
||||
}
|
||||
}
|
||||
|
||||
processedQuery.specs.subqueries.clear();
|
||||
processedQuery.specs.subqueries.addAll(subqueries);
|
||||
|
||||
processedQuery.specs.subqueries.forEach(sq -> {
|
||||
sq.searchTermsInclude.addAll(profile.additionalSearchTerm);
|
||||
if (jsSetting.equals("yes-js")) {
|
||||
sq.searchTermsExclude.add("js:false");
|
||||
}
|
||||
if (jsSetting.equals("no-js")) {
|
||||
sq.searchTermsExclude.add("js:true");
|
||||
}
|
||||
subqueries.forEach(sq -> {
|
||||
sq.searchTermsExclude.addAll(Arrays.asList(jsSetting.implictExcludeSearchTerms));
|
||||
});
|
||||
|
||||
processedQuery.specs.subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
|
||||
subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
|
||||
|
||||
return processedQuery;
|
||||
return subqueries;
|
||||
}
|
||||
|
||||
|
||||
public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) {
|
||||
final var query = params.humanQuery;
|
||||
final var profile = params.getProfile();
|
||||
final var query = params.humanQuery();
|
||||
final var profile = params.profile();
|
||||
|
||||
if (query.length() > 1000) {
|
||||
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");
|
||||
|
@ -1,12 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.search.query.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
|
||||
|
||||
@AllArgsConstructor @Getter
|
||||
public class EdgeUserSearchParameters {
|
||||
public final String humanQuery;
|
||||
public final EdgeSearchProfile profile;
|
||||
public final String jsSetting;
|
||||
public record EdgeUserSearchParameters (String humanQuery, EdgeSearchProfile profile, SearchJsParameter jsSetting){
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user