mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Fix exclude term duplication from js flag.
This commit is contained in:
parent
0903d9f727
commit
fd1f3f796e
@ -1,9 +1,12 @@
|
|||||||
package nu.marginalia.wmsa.edge.model.search;
|
package nu.marginalia.wmsa.edge.model.search;
|
||||||
|
|
||||||
import lombok.*;
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.ToString;
|
||||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.concurrent.CopyOnWriteArrayList;
|
||||||
|
|
||||||
@ToString
|
@ToString
|
||||||
@Getter
|
@Getter
|
||||||
@ -23,7 +26,10 @@ public class EdgeSearchSubquery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public EdgeSearchSubquery withBlock(IndexBlock block) {
|
public EdgeSearchSubquery withBlock(IndexBlock block) {
|
||||||
return new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, block);
|
return new EdgeSearchSubquery(
|
||||||
|
new CopyOnWriteArrayList<>(searchTermsInclude),
|
||||||
|
new CopyOnWriteArrayList<>(searchTermsExclude),
|
||||||
|
block);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int termSize() {
|
public int termSize() {
|
||||||
|
@ -88,7 +88,7 @@ public class EdgeSearchOperator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
|
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
|
||||||
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.getHumanQuery());
|
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.humanQuery());
|
||||||
EdgeSearchQuery processedQuery = queryFactory.createQuery(params);
|
EdgeSearchQuery processedQuery = queryFactory.createQuery(params);
|
||||||
|
|
||||||
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||||
@ -98,7 +98,7 @@ public class EdgeSearchOperator {
|
|||||||
String evalResult = getEvalResult(eval);
|
String evalResult = getEvalResult(eval);
|
||||||
|
|
||||||
return new DecoratedSearchResults(params,
|
return new DecoratedSearchResults(params,
|
||||||
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
|
getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery),
|
||||||
evalResult,
|
evalResult,
|
||||||
definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(),
|
definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(),
|
||||||
queryResults.resultSet,
|
queryResults.resultSet,
|
||||||
|
@ -3,49 +3,39 @@ package nu.marginalia.wmsa.edge.search;
|
|||||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public enum EdgeSearchProfile {
|
public enum EdgeSearchProfile {
|
||||||
DEFAULT("default",
|
DEFAULT("default",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
0, 1),
|
0, 1),
|
||||||
MODERN("modern",
|
MODERN("modern",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
2),
|
2),
|
||||||
CORPO("corpo",
|
CORPO("corpo",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
4, 5, 6, 7),
|
4, 5, 6, 7),
|
||||||
YOLO("yolo",
|
YOLO("yolo",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
0, 2, 1, 3, 4, 6),
|
0, 2, 1, 3, 4, 6),
|
||||||
CORPO_CLEAN("corpo-clean",
|
CORPO_CLEAN("corpo-clean",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
4, 5),
|
4, 5),
|
||||||
ACADEMIA("academia",
|
ACADEMIA("academia",
|
||||||
Collections.emptyList(),
|
|
||||||
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords),
|
||||||
3),
|
3),
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
public final String name;
|
public final String name;
|
||||||
public final List<String> additionalSearchTerm;
|
|
||||||
public final List<Integer> buckets;
|
public final List<Integer> buckets;
|
||||||
public final List<IndexBlock> indexBlocks;
|
public final List<IndexBlock> indexBlocks;
|
||||||
|
|
||||||
EdgeSearchProfile(String name,
|
EdgeSearchProfile(String name,
|
||||||
List<String> additionalSearchTerm,
|
|
||||||
List<IndexBlock> indexBlocks,
|
List<IndexBlock> indexBlocks,
|
||||||
int... buckets) {
|
int... buckets) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.additionalSearchTerm = additionalSearchTerm;
|
|
||||||
this.indexBlocks = indexBlocks;
|
this.indexBlocks = indexBlocks;
|
||||||
this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList());
|
this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,7 @@ import nu.marginalia.wmsa.configuration.server.MetricsServer;
|
|||||||
import nu.marginalia.wmsa.configuration.server.Service;
|
import nu.marginalia.wmsa.configuration.server.Service;
|
||||||
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
|
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
|
||||||
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
|
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
|
||||||
|
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
|
||||||
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
|
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
|
||||||
import nu.marginalia.wmsa.edge.search.exceptions.RedirectException;
|
import nu.marginalia.wmsa.edge.search.exceptions.RedirectException;
|
||||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
||||||
@ -130,7 +131,7 @@ public class EdgeSearchService extends Service {
|
|||||||
|
|
||||||
final String humanQuery = queryParam.trim();
|
final String humanQuery = queryParam.trim();
|
||||||
|
|
||||||
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, ""));
|
var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, SearchJsParameter.DEFAULT));
|
||||||
|
|
||||||
return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList()));
|
return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
@ -151,7 +152,9 @@ public class EdgeSearchService extends Service {
|
|||||||
|
|
||||||
var params = new SearchParameters(
|
var params = new SearchParameters(
|
||||||
EdgeSearchProfile.getSearchProfile(profileStr),
|
EdgeSearchProfile.getSearchProfile(profileStr),
|
||||||
Optional.ofNullable(request.queryParams("js")).orElse("default"));
|
SearchJsParameter.parse(request.queryParams("js"))
|
||||||
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return searchCommandEvaulator.eval(ctx, params, humanQuery);
|
return searchCommandEvaulator.eval(ctx, params, humanQuery);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,24 @@
|
|||||||
|
package nu.marginalia.wmsa.edge.search.command;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
public enum SearchJsParameter {
|
||||||
|
DEFAULT("default"),
|
||||||
|
DENY_JS("no-js", "js:true"),
|
||||||
|
REQUIRE_JS("yes-js", "js:false");
|
||||||
|
|
||||||
|
public final String value;
|
||||||
|
public final String[] implictExcludeSearchTerms;
|
||||||
|
|
||||||
|
SearchJsParameter(String value, String... implictExcludeSearchTerms) {
|
||||||
|
this.value = value;
|
||||||
|
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SearchJsParameter parse(@Nullable String value) {
|
||||||
|
if (DENY_JS.value.equals(value)) return DENY_JS;
|
||||||
|
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
|
||||||
|
|
||||||
|
return DEFAULT;
|
||||||
|
}
|
||||||
|
}
|
@ -2,7 +2,7 @@ package nu.marginalia.wmsa.edge.search.command;
|
|||||||
|
|
||||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||||
|
|
||||||
public record SearchParameters(EdgeSearchProfile profile, String js) {
|
public record SearchParameters(EdgeSearchProfile profile, SearchJsParameter js) {
|
||||||
public String profileStr() {
|
public String profileStr() {
|
||||||
return profile.name;
|
return profile.name;
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,8 @@ public class SearchCommand implements SearchCommandInterface {
|
|||||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||||
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
|
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
|
||||||
|
|
||||||
DecoratedSearchResults results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query, parameters.profile(), parameters.js()), eval);
|
EdgeUserSearchParameters params = new EdgeUserSearchParameters(query, parameters.profile(), parameters.js());
|
||||||
|
DecoratedSearchResults results = searchOperator.doSearch(ctx, params, eval);
|
||||||
|
|
||||||
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
|
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
|
||||||
|
|
||||||
|
@ -20,12 +20,12 @@ public class DecoratedSearchResults {
|
|||||||
private final int focusDomainId;
|
private final int focusDomainId;
|
||||||
|
|
||||||
public String getQuery() {
|
public String getQuery() {
|
||||||
return params.humanQuery;
|
return params.humanQuery();
|
||||||
}
|
}
|
||||||
public String getProfile() {
|
public String getProfile() {
|
||||||
return params.getProfile().name;
|
return params.profile().name;
|
||||||
}
|
}
|
||||||
public String getJs() {
|
public String getJs() {
|
||||||
return params.jsSetting;
|
return params.jsSetting().value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,9 +2,9 @@ package nu.marginalia.wmsa.edge.search.query;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
|
||||||
import nu.marginalia.util.language.WordPatterns;
|
import nu.marginalia.util.language.WordPatterns;
|
||||||
import nu.marginalia.util.language.conf.LanguageModels;
|
import nu.marginalia.util.language.conf.LanguageModels;
|
||||||
|
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
|
||||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
|
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
|
||||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery;
|
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery;
|
||||||
@ -39,15 +39,26 @@ public class QueryFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) {
|
public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) {
|
||||||
final var profile = params.getProfile();
|
final var profile = params.profile();
|
||||||
final var jsSetting = params.getJsSetting();
|
|
||||||
|
|
||||||
final var processedQuery = createQuery(getParser(), params);
|
final var processedQuery = createQuery(getParser(), params);
|
||||||
|
|
||||||
processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile);
|
processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile);
|
||||||
processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile);
|
processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile);
|
||||||
|
|
||||||
List<EdgeSearchSubquery> subqueries = new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
|
final var newSubqueries = reevaluateSubqueries(processedQuery, params);
|
||||||
|
|
||||||
|
processedQuery.specs.subqueries.clear();
|
||||||
|
processedQuery.specs.subqueries.addAll(newSubqueries);
|
||||||
|
|
||||||
|
return processedQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<EdgeSearchSubquery> reevaluateSubqueries(EdgeSearchQuery processedQuery, EdgeUserSearchParameters params) {
|
||||||
|
final var jsSetting = params.jsSetting();
|
||||||
|
final var profile = params.profile();
|
||||||
|
|
||||||
|
List<EdgeSearchSubquery> subqueries =
|
||||||
|
new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size());
|
||||||
|
|
||||||
for (var sq : processedQuery.specs.subqueries) {
|
for (var sq : processedQuery.specs.subqueries) {
|
||||||
for (var block : profile.indexBlocks) {
|
for (var block : profile.indexBlocks) {
|
||||||
@ -55,28 +66,19 @@ public class QueryFactory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
processedQuery.specs.subqueries.clear();
|
subqueries.forEach(sq -> {
|
||||||
processedQuery.specs.subqueries.addAll(subqueries);
|
sq.searchTermsExclude.addAll(Arrays.asList(jsSetting.implictExcludeSearchTerms));
|
||||||
|
|
||||||
processedQuery.specs.subqueries.forEach(sq -> {
|
|
||||||
sq.searchTermsInclude.addAll(profile.additionalSearchTerm);
|
|
||||||
if (jsSetting.equals("yes-js")) {
|
|
||||||
sq.searchTermsExclude.add("js:false");
|
|
||||||
}
|
|
||||||
if (jsSetting.equals("no-js")) {
|
|
||||||
sq.searchTermsExclude.add("js:true");
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
processedQuery.specs.subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
|
subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder));
|
||||||
|
|
||||||
return processedQuery;
|
return subqueries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) {
|
public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) {
|
||||||
final var query = params.humanQuery;
|
final var query = params.humanQuery();
|
||||||
final var profile = params.getProfile();
|
final var profile = params.profile();
|
||||||
|
|
||||||
if (query.length() > 1000) {
|
if (query.length() > 1000) {
|
||||||
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");
|
Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man");
|
||||||
|
@ -1,12 +1,7 @@
|
|||||||
package nu.marginalia.wmsa.edge.search.query.model;
|
package nu.marginalia.wmsa.edge.search.query.model;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Getter;
|
|
||||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||||
|
import nu.marginalia.wmsa.edge.search.command.SearchJsParameter;
|
||||||
|
|
||||||
@AllArgsConstructor @Getter
|
public record EdgeUserSearchParameters (String humanQuery, EdgeSearchProfile profile, SearchJsParameter jsSetting){
|
||||||
public class EdgeUserSearchParameters {
|
|
||||||
public final String humanQuery;
|
|
||||||
public final EdgeSearchProfile profile;
|
|
||||||
public final String jsSetting;
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user