mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(query) Tidy up code
This commit is contained in:
parent
02df421c94
commit
6973712480
@ -36,8 +36,8 @@ public class SearchQuery {
|
||||
@Deprecated // why does this exist?
|
||||
private double value = 0;
|
||||
|
||||
public static SearchQueryBuilder builder(String compiledQuery) {
|
||||
return new SearchQueryBuilder(compiledQuery);
|
||||
public static SearchQueryBuilder builder() {
|
||||
return new SearchQueryBuilder();
|
||||
}
|
||||
|
||||
public SearchQuery() {
|
||||
@ -86,15 +86,19 @@ public class SearchQuery {
|
||||
}
|
||||
|
||||
public static class SearchQueryBuilder {
|
||||
private final String compiledQuery;
|
||||
private List<String> searchTermsInclude = new ArrayList<>();
|
||||
private List<String> searchTermsExclude = new ArrayList<>();
|
||||
private List<String> searchTermsAdvice = new ArrayList<>();
|
||||
private List<String> searchTermsPriority = new ArrayList<>();
|
||||
private List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
||||
private String compiledQuery;
|
||||
public final List<String> searchTermsInclude = new ArrayList<>();
|
||||
public final List<String> searchTermsExclude = new ArrayList<>();
|
||||
public final List<String> searchTermsAdvice = new ArrayList<>();
|
||||
public final List<String> searchTermsPriority = new ArrayList<>();
|
||||
public final List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
||||
|
||||
private SearchQueryBuilder(String compiledQuery) {
|
||||
this.compiledQuery = compiledQuery;
|
||||
private SearchQueryBuilder() {
|
||||
}
|
||||
|
||||
public SearchQueryBuilder compiledQuery(String query) {
|
||||
this.compiledQuery = query;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SearchQueryBuilder include(String... terms) {
|
||||
@ -117,7 +121,7 @@ public class SearchQuery {
|
||||
return this;
|
||||
}
|
||||
|
||||
public SearchQueryBuilder coherences(SearchCoherenceConstraint constraint) {
|
||||
public SearchQueryBuilder coherenceConstraint(SearchCoherenceConstraint constraint) {
|
||||
searchTermCoherences.add(constraint);
|
||||
return this;
|
||||
}
|
||||
@ -125,5 +129,13 @@ public class SearchQuery {
|
||||
public SearchQuery build() {
|
||||
return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
||||
}
|
||||
|
||||
/** If there are no ranking terms, promote the advice terms to ranking terms */
|
||||
public void promoteNonRankingTerms() {
|
||||
if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) {
|
||||
searchTermsInclude.addAll(searchTermsAdvice);
|
||||
searchTermsAdvice.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.util.transform_list.TransformList;
|
||||
|
||||
@ -104,15 +105,19 @@ public class QueryParser {
|
||||
String str = t.str();
|
||||
|
||||
if (str.startsWith("q") && str.matches("q[=><]\\d+")) {
|
||||
entity.replace(new QueryToken.QualityTerm(str.substring(1)));
|
||||
var limit = parseSpecificationLimit(str.substring(1));
|
||||
entity.replace(new QueryToken.QualityTerm(limit, str));
|
||||
} else if (str.startsWith("near:")) {
|
||||
entity.replace(new QueryToken.NearTerm(str.substring(5)));
|
||||
} else if (str.startsWith("year") && str.matches("year[=><]\\d{4}")) {
|
||||
entity.replace(new QueryToken.YearTerm(str.substring(4)));
|
||||
var limit = parseSpecificationLimit(str.substring(4));
|
||||
entity.replace(new QueryToken.YearTerm(limit, str));
|
||||
} else if (str.startsWith("size") && str.matches("size[=><]\\d+")) {
|
||||
entity.replace(new QueryToken.SizeTerm(str.substring(4)));
|
||||
var limit = parseSpecificationLimit(str.substring(4));
|
||||
entity.replace(new QueryToken.SizeTerm(limit, str));
|
||||
} else if (str.startsWith("rank") && str.matches("rank[=><]\\d+")) {
|
||||
entity.replace(new QueryToken.RankTerm(str.substring(4)));
|
||||
var limit = parseSpecificationLimit(str.substring(4));
|
||||
entity.replace(new QueryToken.RankTerm(limit, str));
|
||||
} else if (str.startsWith("qs=")) {
|
||||
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
||||
} else if (str.contains(":")) {
|
||||
@ -120,6 +125,21 @@ public class QueryParser {
|
||||
}
|
||||
}
|
||||
|
||||
private static SpecificationLimit parseSpecificationLimit(String str) {
|
||||
int startChar = str.charAt(0);
|
||||
|
||||
int val = Integer.parseInt(str.substring(1));
|
||||
if (startChar == '=') {
|
||||
return SpecificationLimit.equals(val);
|
||||
} else if (startChar == '<') {
|
||||
return SpecificationLimit.lessThan(val);
|
||||
} else if (startChar == '>') {
|
||||
return SpecificationLimit.greaterThan(val);
|
||||
} else {
|
||||
return SpecificationLimit.none();
|
||||
}
|
||||
}
|
||||
|
||||
private static void handleAdvisoryTerms(TransformList<QueryToken>.Entity entity) {
|
||||
var t = entity.value();
|
||||
if (t instanceof QueryToken.LParen) {
|
||||
|
@ -1,6 +1,8 @@
|
||||
package nu.marginalia.functions.searchquery.query_parser.token;
|
||||
|
||||
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
public sealed interface QueryToken {
|
||||
String str();
|
||||
String displayStr();
|
||||
@ -11,25 +13,18 @@ public sealed interface QueryToken {
|
||||
record AdviceTerm(String str, String displayStr) implements QueryToken {}
|
||||
record PriorityTerm(String str, String displayStr) implements QueryToken {}
|
||||
|
||||
record QualityTerm(String str) implements QueryToken {
|
||||
public String displayStr() {
|
||||
return "q" + str;
|
||||
}
|
||||
record QualityTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||
public String str() { return displayStr; }
|
||||
|
||||
}
|
||||
record YearTerm(String str) implements QueryToken {
|
||||
public String displayStr() {
|
||||
return "year" + str;
|
||||
}
|
||||
record YearTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||
public String str() { return displayStr; }
|
||||
}
|
||||
record SizeTerm(String str) implements QueryToken {
|
||||
public String displayStr() {
|
||||
return "size" + str;
|
||||
}
|
||||
record SizeTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||
public String str() { return displayStr; }
|
||||
}
|
||||
record RankTerm(String str) implements QueryToken {
|
||||
public String displayStr() {
|
||||
return "rank" + str;
|
||||
}
|
||||
record RankTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||
public String str() { return displayStr; }
|
||||
}
|
||||
record NearTerm(String str) implements QueryToken {
|
||||
public String displayStr() {
|
||||
|
@ -53,11 +53,7 @@ public class QueryFactory {
|
||||
basicQuery.clear();
|
||||
}
|
||||
|
||||
List<String> searchTermsExclude = new ArrayList<>();
|
||||
List<String> searchTermsInclude = new ArrayList<>();
|
||||
List<String> searchTermsAdvice = new ArrayList<>();
|
||||
List<String> searchTermsPriority = new ArrayList<>();
|
||||
List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
||||
SearchQuery.SearchQueryBuilder queryBuilder = SearchQuery.builder();
|
||||
|
||||
SpecificationLimit qualityLimit = SpecificationLimit.none();
|
||||
SpecificationLimit year = SpecificationLimit.none();
|
||||
@ -77,51 +73,48 @@ public class QueryFactory {
|
||||
|
||||
if (parts.length > 1) {
|
||||
// Require that the terms appear in sequence
|
||||
searchTermCoherences.add(SearchCoherenceConstraint.mandatory(parts));
|
||||
queryBuilder.coherenceConstraint(SearchCoherenceConstraint.mandatory(parts));
|
||||
|
||||
// Construct a regular query from the parts in the quoted string
|
||||
searchTermsInclude.addAll(Arrays.asList(parts));
|
||||
queryBuilder.include(parts);
|
||||
|
||||
// Prefer that the actual n-gram is present
|
||||
searchTermsPriority.add(str);
|
||||
queryBuilder.priority(str);
|
||||
}
|
||||
else {
|
||||
// If the quoted word is a single word, we don't need to do more than include it in the search
|
||||
searchTermsInclude.add(str);
|
||||
queryBuilder.include(str);
|
||||
}
|
||||
}
|
||||
|
||||
case QueryToken.LiteralTerm(String str, String displayStr) -> {
|
||||
analyzeSearchTerm(problems, str, displayStr);
|
||||
searchTermsHuman.addAll(Arrays.asList(displayStr.split("\\s+")));
|
||||
|
||||
searchTermsInclude.add(str);
|
||||
queryBuilder.include(str);
|
||||
}
|
||||
|
||||
|
||||
case QueryToken.ExcludeTerm(String str, String displayStr) -> searchTermsExclude.add(str);
|
||||
case QueryToken.PriorityTerm(String str, String displayStr) -> searchTermsPriority.add(str);
|
||||
case QueryToken.ExcludeTerm(String str, String displayStr) -> queryBuilder.exclude(str);
|
||||
case QueryToken.PriorityTerm(String str, String displayStr) -> queryBuilder.priority(str);
|
||||
case QueryToken.AdviceTerm(String str, String displayStr) -> {
|
||||
searchTermsAdvice.add(str);
|
||||
queryBuilder.advice(str);
|
||||
|
||||
if (str.toLowerCase().startsWith("site:")) {
|
||||
domain = str.substring("site:".length());
|
||||
}
|
||||
}
|
||||
|
||||
case QueryToken.YearTerm(String str) -> year = parseSpecificationLimit(str);
|
||||
case QueryToken.SizeTerm(String str) -> size = parseSpecificationLimit(str);
|
||||
case QueryToken.RankTerm(String str) -> rank = parseSpecificationLimit(str);
|
||||
case QueryToken.QualityTerm(String str) -> qualityLimit = parseSpecificationLimit(str);
|
||||
case QueryToken.YearTerm(SpecificationLimit limit, String displayStr) -> year = limit;
|
||||
case QueryToken.SizeTerm(SpecificationLimit limit, String displayStr) -> size = limit;
|
||||
case QueryToken.RankTerm(SpecificationLimit limit, String displayStr) -> rank = limit;
|
||||
case QueryToken.QualityTerm(SpecificationLimit limit, String displayStr) -> qualityLimit = limit;
|
||||
case QueryToken.QsTerm(String str) -> queryStrategy = parseQueryStrategy(str);
|
||||
|
||||
default -> {}
|
||||
}
|
||||
}
|
||||
|
||||
if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) {
|
||||
searchTermsInclude.addAll(searchTermsAdvice);
|
||||
searchTermsAdvice.clear();
|
||||
}
|
||||
queryBuilder.promoteNonRankingTerms();
|
||||
|
||||
List<Integer> domainIds = params.domainIds();
|
||||
|
||||
@ -131,25 +124,18 @@ public class QueryFactory {
|
||||
limits = limits.forSingleDomain();
|
||||
}
|
||||
|
||||
var expansion = queryExpansion.expandQuery(searchTermsInclude);
|
||||
var expansion = queryExpansion.expandQuery(queryBuilder.searchTermsInclude);
|
||||
|
||||
// Query expansion may produce suggestions for coherence constraints,
|
||||
// add these to the query
|
||||
for (var coh : expansion.extraCoherences()) {
|
||||
searchTermCoherences.add(SearchCoherenceConstraint.optional(coh));
|
||||
queryBuilder.coherenceConstraint(SearchCoherenceConstraint.optional(coh));
|
||||
}
|
||||
|
||||
var searchQuery = new SearchQuery(
|
||||
expansion.compiledQuery(),
|
||||
searchTermsInclude,
|
||||
searchTermsExclude,
|
||||
searchTermsAdvice,
|
||||
searchTermsPriority,
|
||||
searchTermCoherences
|
||||
);
|
||||
queryBuilder.compiledQuery(expansion.compiledQuery());
|
||||
|
||||
var specsBuilder = SearchSpecification.builder()
|
||||
.query(searchQuery)
|
||||
.query(queryBuilder.build())
|
||||
.humanQuery(query)
|
||||
.quality(qualityLimit)
|
||||
.year(year)
|
||||
@ -180,20 +166,7 @@ public class QueryFactory {
|
||||
problems.add("Search term \"" + displayStr + "\" too long");
|
||||
}
|
||||
}
|
||||
private SpecificationLimit parseSpecificationLimit(String str) {
|
||||
int startChar = str.charAt(0);
|
||||
|
||||
int val = Integer.parseInt(str.substring(1));
|
||||
if (startChar == '=') {
|
||||
return SpecificationLimit.equals(val);
|
||||
} else if (startChar == '<') {
|
||||
return SpecificationLimit.lessThan(val);
|
||||
} else if (startChar == '>') {
|
||||
return SpecificationLimit.greaterThan(val);
|
||||
} else {
|
||||
return SpecificationLimit.none();
|
||||
}
|
||||
}
|
||||
|
||||
private QueryStrategy parseQueryStrategy(String str) {
|
||||
return switch (str.toUpperCase()) {
|
||||
@ -208,14 +181,4 @@ public class QueryFactory {
|
||||
default -> QueryStrategy.AUTO;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
private boolean anyPartIsStopWord(String[] parts) {
|
||||
for (String part : parts) {
|
||||
if (WordPatterns.isStopWord(part)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
|
||||
@ -57,7 +58,12 @@ public class QueryFactoryTest {
|
||||
|
||||
@Test
|
||||
void qsec10() {
|
||||
try (var lines = Files.lines(Path.of("/home/vlofgren/Exports/qsec10/webis-qsec-10-training-set/webis-qsec-10-training-set-queries.txt"))) {
|
||||
Path webis = Path.of("/home/vlofgren/Exports/qsec10/webis-qsec-10-training-set/webis-qsec-10-training-set-queries.txt");
|
||||
|
||||
if (!Files.exists(webis))
|
||||
return;
|
||||
|
||||
try (var lines = Files.lines(webis)) {
|
||||
lines.limit(1000).forEach(line -> {
|
||||
String[] parts = line.split("\t");
|
||||
if (parts.length == 2) {
|
||||
@ -129,15 +135,15 @@ public class QueryFactoryTest {
|
||||
{
|
||||
// the is a stopword, so it should generate an ngram search term
|
||||
var specs = parseAndGetSpecs("\"the shining\"");
|
||||
assertEquals("the_shining", specs.query.compiledQuery);
|
||||
assertEquals("( shining | the_shining )", specs.query.compiledQuery);
|
||||
}
|
||||
|
||||
{
|
||||
// tde isn't a stopword, so we should get the normal behavior
|
||||
var specs = parseAndGetSpecs("\"tde shining\"");
|
||||
assertEquals("( shining tde | tde_shining )", specs.query.compiledQuery);
|
||||
assertEquals(List.of("tde_shining"), specs.query.searchTermsAdvice);
|
||||
assertEquals(List.of(List.of("tde", "shining")), specs.query.searchTermCoherences);
|
||||
assertEquals(List.of("tde_shining"), specs.query.searchTermsPriority);
|
||||
assertEquals(List.of(new SearchCoherenceConstraint(true, List.of("tde", "shining"))), specs.query.searchTermCoherences);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,8 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
||||
.domains(new ArrayList<>())
|
||||
.searchSetIdentifier("NONE")
|
||||
.query(
|
||||
SearchQuery.builder("2")
|
||||
SearchQuery.builder()
|
||||
.compiledQuery("2")
|
||||
.include("2")
|
||||
.build()
|
||||
).build()
|
||||
|
Loading…
Reference in New Issue
Block a user