mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(query) Tidy up code
This commit is contained in:
parent
02df421c94
commit
6973712480
@ -36,8 +36,8 @@ public class SearchQuery {
|
|||||||
@Deprecated // why does this exist?
|
@Deprecated // why does this exist?
|
||||||
private double value = 0;
|
private double value = 0;
|
||||||
|
|
||||||
public static SearchQueryBuilder builder(String compiledQuery) {
|
public static SearchQueryBuilder builder() {
|
||||||
return new SearchQueryBuilder(compiledQuery);
|
return new SearchQueryBuilder();
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQuery() {
|
public SearchQuery() {
|
||||||
@ -86,15 +86,19 @@ public class SearchQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static class SearchQueryBuilder {
|
public static class SearchQueryBuilder {
|
||||||
private final String compiledQuery;
|
private String compiledQuery;
|
||||||
private List<String> searchTermsInclude = new ArrayList<>();
|
public final List<String> searchTermsInclude = new ArrayList<>();
|
||||||
private List<String> searchTermsExclude = new ArrayList<>();
|
public final List<String> searchTermsExclude = new ArrayList<>();
|
||||||
private List<String> searchTermsAdvice = new ArrayList<>();
|
public final List<String> searchTermsAdvice = new ArrayList<>();
|
||||||
private List<String> searchTermsPriority = new ArrayList<>();
|
public final List<String> searchTermsPriority = new ArrayList<>();
|
||||||
private List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
public final List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
||||||
|
|
||||||
private SearchQueryBuilder(String compiledQuery) {
|
private SearchQueryBuilder() {
|
||||||
this.compiledQuery = compiledQuery;
|
}
|
||||||
|
|
||||||
|
public SearchQueryBuilder compiledQuery(String query) {
|
||||||
|
this.compiledQuery = query;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQueryBuilder include(String... terms) {
|
public SearchQueryBuilder include(String... terms) {
|
||||||
@ -117,7 +121,7 @@ public class SearchQuery {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchQueryBuilder coherences(SearchCoherenceConstraint constraint) {
|
public SearchQueryBuilder coherenceConstraint(SearchCoherenceConstraint constraint) {
|
||||||
searchTermCoherences.add(constraint);
|
searchTermCoherences.add(constraint);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@ -125,5 +129,13 @@ public class SearchQuery {
|
|||||||
public SearchQuery build() {
|
public SearchQuery build() {
|
||||||
return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
return new SearchQuery(compiledQuery, searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** If there are no ranking terms, promote the advice terms to ranking terms */
|
||||||
|
public void promoteNonRankingTerms() {
|
||||||
|
if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) {
|
||||||
|
searchTermsInclude.addAll(searchTermsAdvice);
|
||||||
|
searchTermsAdvice.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.functions.searchquery.query_parser;
|
package nu.marginalia.functions.searchquery.query_parser;
|
||||||
|
|
||||||
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
|
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
import nu.marginalia.language.WordPatterns;
|
import nu.marginalia.language.WordPatterns;
|
||||||
import nu.marginalia.util.transform_list.TransformList;
|
import nu.marginalia.util.transform_list.TransformList;
|
||||||
|
|
||||||
@ -104,15 +105,19 @@ public class QueryParser {
|
|||||||
String str = t.str();
|
String str = t.str();
|
||||||
|
|
||||||
if (str.startsWith("q") && str.matches("q[=><]\\d+")) {
|
if (str.startsWith("q") && str.matches("q[=><]\\d+")) {
|
||||||
entity.replace(new QueryToken.QualityTerm(str.substring(1)));
|
var limit = parseSpecificationLimit(str.substring(1));
|
||||||
|
entity.replace(new QueryToken.QualityTerm(limit, str));
|
||||||
} else if (str.startsWith("near:")) {
|
} else if (str.startsWith("near:")) {
|
||||||
entity.replace(new QueryToken.NearTerm(str.substring(5)));
|
entity.replace(new QueryToken.NearTerm(str.substring(5)));
|
||||||
} else if (str.startsWith("year") && str.matches("year[=><]\\d{4}")) {
|
} else if (str.startsWith("year") && str.matches("year[=><]\\d{4}")) {
|
||||||
entity.replace(new QueryToken.YearTerm(str.substring(4)));
|
var limit = parseSpecificationLimit(str.substring(4));
|
||||||
|
entity.replace(new QueryToken.YearTerm(limit, str));
|
||||||
} else if (str.startsWith("size") && str.matches("size[=><]\\d+")) {
|
} else if (str.startsWith("size") && str.matches("size[=><]\\d+")) {
|
||||||
entity.replace(new QueryToken.SizeTerm(str.substring(4)));
|
var limit = parseSpecificationLimit(str.substring(4));
|
||||||
|
entity.replace(new QueryToken.SizeTerm(limit, str));
|
||||||
} else if (str.startsWith("rank") && str.matches("rank[=><]\\d+")) {
|
} else if (str.startsWith("rank") && str.matches("rank[=><]\\d+")) {
|
||||||
entity.replace(new QueryToken.RankTerm(str.substring(4)));
|
var limit = parseSpecificationLimit(str.substring(4));
|
||||||
|
entity.replace(new QueryToken.RankTerm(limit, str));
|
||||||
} else if (str.startsWith("qs=")) {
|
} else if (str.startsWith("qs=")) {
|
||||||
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
||||||
} else if (str.contains(":")) {
|
} else if (str.contains(":")) {
|
||||||
@ -120,6 +125,21 @@ public class QueryParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static SpecificationLimit parseSpecificationLimit(String str) {
|
||||||
|
int startChar = str.charAt(0);
|
||||||
|
|
||||||
|
int val = Integer.parseInt(str.substring(1));
|
||||||
|
if (startChar == '=') {
|
||||||
|
return SpecificationLimit.equals(val);
|
||||||
|
} else if (startChar == '<') {
|
||||||
|
return SpecificationLimit.lessThan(val);
|
||||||
|
} else if (startChar == '>') {
|
||||||
|
return SpecificationLimit.greaterThan(val);
|
||||||
|
} else {
|
||||||
|
return SpecificationLimit.none();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void handleAdvisoryTerms(TransformList<QueryToken>.Entity entity) {
|
private static void handleAdvisoryTerms(TransformList<QueryToken>.Entity entity) {
|
||||||
var t = entity.value();
|
var t = entity.value();
|
||||||
if (t instanceof QueryToken.LParen) {
|
if (t instanceof QueryToken.LParen) {
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
package nu.marginalia.functions.searchquery.query_parser.token;
|
package nu.marginalia.functions.searchquery.query_parser.token;
|
||||||
|
|
||||||
|
|
||||||
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
|
||||||
public sealed interface QueryToken {
|
public sealed interface QueryToken {
|
||||||
String str();
|
String str();
|
||||||
String displayStr();
|
String displayStr();
|
||||||
@ -11,25 +13,18 @@ public sealed interface QueryToken {
|
|||||||
record AdviceTerm(String str, String displayStr) implements QueryToken {}
|
record AdviceTerm(String str, String displayStr) implements QueryToken {}
|
||||||
record PriorityTerm(String str, String displayStr) implements QueryToken {}
|
record PriorityTerm(String str, String displayStr) implements QueryToken {}
|
||||||
|
|
||||||
record QualityTerm(String str) implements QueryToken {
|
record QualityTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||||
public String displayStr() {
|
public String str() { return displayStr; }
|
||||||
return "q" + str;
|
|
||||||
}
|
}
|
||||||
|
record YearTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||||
|
public String str() { return displayStr; }
|
||||||
}
|
}
|
||||||
record YearTerm(String str) implements QueryToken {
|
record SizeTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||||
public String displayStr() {
|
public String str() { return displayStr; }
|
||||||
return "year" + str;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
record SizeTerm(String str) implements QueryToken {
|
|
||||||
public String displayStr() {
|
|
||||||
return "size" + str;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
record RankTerm(String str) implements QueryToken {
|
|
||||||
public String displayStr() {
|
|
||||||
return "rank" + str;
|
|
||||||
}
|
}
|
||||||
|
record RankTerm(SpecificationLimit limit, String displayStr) implements QueryToken {
|
||||||
|
public String str() { return displayStr; }
|
||||||
}
|
}
|
||||||
record NearTerm(String str) implements QueryToken {
|
record NearTerm(String str) implements QueryToken {
|
||||||
public String displayStr() {
|
public String displayStr() {
|
||||||
|
@ -53,11 +53,7 @@ public class QueryFactory {
|
|||||||
basicQuery.clear();
|
basicQuery.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> searchTermsExclude = new ArrayList<>();
|
SearchQuery.SearchQueryBuilder queryBuilder = SearchQuery.builder();
|
||||||
List<String> searchTermsInclude = new ArrayList<>();
|
|
||||||
List<String> searchTermsAdvice = new ArrayList<>();
|
|
||||||
List<String> searchTermsPriority = new ArrayList<>();
|
|
||||||
List<SearchCoherenceConstraint> searchTermCoherences = new ArrayList<>();
|
|
||||||
|
|
||||||
SpecificationLimit qualityLimit = SpecificationLimit.none();
|
SpecificationLimit qualityLimit = SpecificationLimit.none();
|
||||||
SpecificationLimit year = SpecificationLimit.none();
|
SpecificationLimit year = SpecificationLimit.none();
|
||||||
@ -77,51 +73,48 @@ public class QueryFactory {
|
|||||||
|
|
||||||
if (parts.length > 1) {
|
if (parts.length > 1) {
|
||||||
// Require that the terms appear in sequence
|
// Require that the terms appear in sequence
|
||||||
searchTermCoherences.add(SearchCoherenceConstraint.mandatory(parts));
|
queryBuilder.coherenceConstraint(SearchCoherenceConstraint.mandatory(parts));
|
||||||
|
|
||||||
// Construct a regular query from the parts in the quoted string
|
// Construct a regular query from the parts in the quoted string
|
||||||
searchTermsInclude.addAll(Arrays.asList(parts));
|
queryBuilder.include(parts);
|
||||||
|
|
||||||
// Prefer that the actual n-gram is present
|
// Prefer that the actual n-gram is present
|
||||||
searchTermsPriority.add(str);
|
queryBuilder.priority(str);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// If the quoted word is a single word, we don't need to do more than include it in the search
|
// If the quoted word is a single word, we don't need to do more than include it in the search
|
||||||
searchTermsInclude.add(str);
|
queryBuilder.include(str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case QueryToken.LiteralTerm(String str, String displayStr) -> {
|
case QueryToken.LiteralTerm(String str, String displayStr) -> {
|
||||||
analyzeSearchTerm(problems, str, displayStr);
|
analyzeSearchTerm(problems, str, displayStr);
|
||||||
searchTermsHuman.addAll(Arrays.asList(displayStr.split("\\s+")));
|
searchTermsHuman.addAll(Arrays.asList(displayStr.split("\\s+")));
|
||||||
|
|
||||||
searchTermsInclude.add(str);
|
queryBuilder.include(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case QueryToken.ExcludeTerm(String str, String displayStr) -> queryBuilder.exclude(str);
|
||||||
case QueryToken.ExcludeTerm(String str, String displayStr) -> searchTermsExclude.add(str);
|
case QueryToken.PriorityTerm(String str, String displayStr) -> queryBuilder.priority(str);
|
||||||
case QueryToken.PriorityTerm(String str, String displayStr) -> searchTermsPriority.add(str);
|
|
||||||
case QueryToken.AdviceTerm(String str, String displayStr) -> {
|
case QueryToken.AdviceTerm(String str, String displayStr) -> {
|
||||||
searchTermsAdvice.add(str);
|
queryBuilder.advice(str);
|
||||||
|
|
||||||
if (str.toLowerCase().startsWith("site:")) {
|
if (str.toLowerCase().startsWith("site:")) {
|
||||||
domain = str.substring("site:".length());
|
domain = str.substring("site:".length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case QueryToken.YearTerm(String str) -> year = parseSpecificationLimit(str);
|
case QueryToken.YearTerm(SpecificationLimit limit, String displayStr) -> year = limit;
|
||||||
case QueryToken.SizeTerm(String str) -> size = parseSpecificationLimit(str);
|
case QueryToken.SizeTerm(SpecificationLimit limit, String displayStr) -> size = limit;
|
||||||
case QueryToken.RankTerm(String str) -> rank = parseSpecificationLimit(str);
|
case QueryToken.RankTerm(SpecificationLimit limit, String displayStr) -> rank = limit;
|
||||||
case QueryToken.QualityTerm(String str) -> qualityLimit = parseSpecificationLimit(str);
|
case QueryToken.QualityTerm(SpecificationLimit limit, String displayStr) -> qualityLimit = limit;
|
||||||
case QueryToken.QsTerm(String str) -> queryStrategy = parseQueryStrategy(str);
|
case QueryToken.QsTerm(String str) -> queryStrategy = parseQueryStrategy(str);
|
||||||
|
|
||||||
default -> {}
|
default -> {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) {
|
queryBuilder.promoteNonRankingTerms();
|
||||||
searchTermsInclude.addAll(searchTermsAdvice);
|
|
||||||
searchTermsAdvice.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Integer> domainIds = params.domainIds();
|
List<Integer> domainIds = params.domainIds();
|
||||||
|
|
||||||
@ -131,25 +124,18 @@ public class QueryFactory {
|
|||||||
limits = limits.forSingleDomain();
|
limits = limits.forSingleDomain();
|
||||||
}
|
}
|
||||||
|
|
||||||
var expansion = queryExpansion.expandQuery(searchTermsInclude);
|
var expansion = queryExpansion.expandQuery(queryBuilder.searchTermsInclude);
|
||||||
|
|
||||||
// Query expansion may produce suggestions for coherence constraints,
|
// Query expansion may produce suggestions for coherence constraints,
|
||||||
// add these to the query
|
// add these to the query
|
||||||
for (var coh : expansion.extraCoherences()) {
|
for (var coh : expansion.extraCoherences()) {
|
||||||
searchTermCoherences.add(SearchCoherenceConstraint.optional(coh));
|
queryBuilder.coherenceConstraint(SearchCoherenceConstraint.optional(coh));
|
||||||
}
|
}
|
||||||
|
|
||||||
var searchQuery = new SearchQuery(
|
queryBuilder.compiledQuery(expansion.compiledQuery());
|
||||||
expansion.compiledQuery(),
|
|
||||||
searchTermsInclude,
|
|
||||||
searchTermsExclude,
|
|
||||||
searchTermsAdvice,
|
|
||||||
searchTermsPriority,
|
|
||||||
searchTermCoherences
|
|
||||||
);
|
|
||||||
|
|
||||||
var specsBuilder = SearchSpecification.builder()
|
var specsBuilder = SearchSpecification.builder()
|
||||||
.query(searchQuery)
|
.query(queryBuilder.build())
|
||||||
.humanQuery(query)
|
.humanQuery(query)
|
||||||
.quality(qualityLimit)
|
.quality(qualityLimit)
|
||||||
.year(year)
|
.year(year)
|
||||||
@ -180,20 +166,7 @@ public class QueryFactory {
|
|||||||
problems.add("Search term \"" + displayStr + "\" too long");
|
problems.add("Search term \"" + displayStr + "\" too long");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private SpecificationLimit parseSpecificationLimit(String str) {
|
|
||||||
int startChar = str.charAt(0);
|
|
||||||
|
|
||||||
int val = Integer.parseInt(str.substring(1));
|
|
||||||
if (startChar == '=') {
|
|
||||||
return SpecificationLimit.equals(val);
|
|
||||||
} else if (startChar == '<') {
|
|
||||||
return SpecificationLimit.lessThan(val);
|
|
||||||
} else if (startChar == '>') {
|
|
||||||
return SpecificationLimit.greaterThan(val);
|
|
||||||
} else {
|
|
||||||
return SpecificationLimit.none();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private QueryStrategy parseQueryStrategy(String str) {
|
private QueryStrategy parseQueryStrategy(String str) {
|
||||||
return switch (str.toUpperCase()) {
|
return switch (str.toUpperCase()) {
|
||||||
@ -208,14 +181,4 @@ public class QueryFactory {
|
|||||||
default -> QueryStrategy.AUTO;
|
default -> QueryStrategy.AUTO;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean anyPartIsStopWord(String[] parts) {
|
|
||||||
for (String part : parts) {
|
|
||||||
if (WordPatterns.isStopWord(part)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.query.svc;
|
package nu.marginalia.query.svc;
|
||||||
|
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint;
|
||||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||||
import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
|
import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
|
||||||
@ -57,7 +58,12 @@ public class QueryFactoryTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void qsec10() {
|
void qsec10() {
|
||||||
try (var lines = Files.lines(Path.of("/home/vlofgren/Exports/qsec10/webis-qsec-10-training-set/webis-qsec-10-training-set-queries.txt"))) {
|
Path webis = Path.of("/home/vlofgren/Exports/qsec10/webis-qsec-10-training-set/webis-qsec-10-training-set-queries.txt");
|
||||||
|
|
||||||
|
if (!Files.exists(webis))
|
||||||
|
return;
|
||||||
|
|
||||||
|
try (var lines = Files.lines(webis)) {
|
||||||
lines.limit(1000).forEach(line -> {
|
lines.limit(1000).forEach(line -> {
|
||||||
String[] parts = line.split("\t");
|
String[] parts = line.split("\t");
|
||||||
if (parts.length == 2) {
|
if (parts.length == 2) {
|
||||||
@ -129,15 +135,15 @@ public class QueryFactoryTest {
|
|||||||
{
|
{
|
||||||
// the is a stopword, so it should generate an ngram search term
|
// the is a stopword, so it should generate an ngram search term
|
||||||
var specs = parseAndGetSpecs("\"the shining\"");
|
var specs = parseAndGetSpecs("\"the shining\"");
|
||||||
assertEquals("the_shining", specs.query.compiledQuery);
|
assertEquals("( shining | the_shining )", specs.query.compiledQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// tde isn't a stopword, so we should get the normal behavior
|
// tde isn't a stopword, so we should get the normal behavior
|
||||||
var specs = parseAndGetSpecs("\"tde shining\"");
|
var specs = parseAndGetSpecs("\"tde shining\"");
|
||||||
assertEquals("( shining tde | tde_shining )", specs.query.compiledQuery);
|
assertEquals("( shining tde | tde_shining )", specs.query.compiledQuery);
|
||||||
assertEquals(List.of("tde_shining"), specs.query.searchTermsAdvice);
|
assertEquals(List.of("tde_shining"), specs.query.searchTermsPriority);
|
||||||
assertEquals(List.of(List.of("tde", "shining")), specs.query.searchTermCoherences);
|
assertEquals(List.of(new SearchCoherenceConstraint(true, List.of("tde", "shining"))), specs.query.searchTermCoherences);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,7 +169,8 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
|||||||
.domains(new ArrayList<>())
|
.domains(new ArrayList<>())
|
||||||
.searchSetIdentifier("NONE")
|
.searchSetIdentifier("NONE")
|
||||||
.query(
|
.query(
|
||||||
SearchQuery.builder("2")
|
SearchQuery.builder()
|
||||||
|
.compiledQuery("2")
|
||||||
.include("2")
|
.include("2")
|
||||||
.build()
|
.build()
|
||||||
).build()
|
).build()
|
||||||
|
Loading…
Reference in New Issue
Block a user