mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(query-parsing) Correct handling of trailing parentheses
This commit is contained in:
parent
99b3b00b68
commit
50ba8fd099
@ -37,20 +37,19 @@ public class QueryParser {
|
|||||||
query = noisePattern.matcher(query).replaceAll(" ");
|
query = noisePattern.matcher(query).replaceAll(" ");
|
||||||
|
|
||||||
int chr = -1;
|
int chr = -1;
|
||||||
int prevChr = -1;
|
int parenDepth = 0;
|
||||||
for (int i = 0; i < query.length(); i++) {
|
for (int i = 0; i < query.length(); i++) {
|
||||||
prevChr = chr;
|
|
||||||
chr = query.charAt(i);
|
chr = query.charAt(i);
|
||||||
|
|
||||||
boolean escape = prevChr == '\\';
|
if ('(' == chr) {
|
||||||
|
parenDepth++;
|
||||||
if (!escape && '(' == chr) {
|
|
||||||
tokens.add(new QueryToken.LParen());
|
tokens.add(new QueryToken.LParen());
|
||||||
}
|
}
|
||||||
else if (!escape && ')' == chr && prevChr != '(') { // special case to deal with queries like "strlen()"
|
else if (')' == chr) {
|
||||||
|
parenDepth--;
|
||||||
tokens.add(new QueryToken.RParen());
|
tokens.add(new QueryToken.RParen());
|
||||||
}
|
}
|
||||||
else if (!escape && '"' == chr) {
|
else if ('"' == chr) {
|
||||||
int end = query.indexOf('"', i+1);
|
int end = query.indexOf('"', i+1);
|
||||||
|
|
||||||
if (end == -1) {
|
if (end == -1) {
|
||||||
@ -61,17 +60,30 @@ public class QueryParser {
|
|||||||
|
|
||||||
i = end;
|
i = end;
|
||||||
}
|
}
|
||||||
else if (!escape && '-' == chr) {
|
else if ('-' == chr) {
|
||||||
tokens.add(new QueryToken.Minus());
|
tokens.add(new QueryToken.Minus());
|
||||||
}
|
}
|
||||||
else if (!escape && '?' == chr) {
|
else if ('?' == chr) {
|
||||||
tokens.add(new QueryToken.QMark());
|
tokens.add(new QueryToken.QMark());
|
||||||
}
|
}
|
||||||
else if (!Character.isSpaceChar(chr)) {
|
else if (!Character.isSpaceChar(chr)) {
|
||||||
|
|
||||||
|
// search for the end of the term
|
||||||
int end = i+1;
|
int end = i+1;
|
||||||
|
int prevC = -1;
|
||||||
|
int c = -1;
|
||||||
for (; end < query.length(); end++) {
|
for (; end < query.length(); end++) {
|
||||||
if (query.charAt(end) == ' ' || query.charAt(end) == ')')
|
prevC = c;
|
||||||
|
c = query.charAt(end);
|
||||||
|
|
||||||
|
if (prevC == '\\')
|
||||||
|
continue;
|
||||||
|
if (c == ' ')
|
||||||
|
break;
|
||||||
|
|
||||||
|
// special case to deal with possible RPAREN token at the end,
|
||||||
|
// but we don't want to break if it's likely part of the search term
|
||||||
|
if (c == '(' && prevC != ')' && parenDepth > 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,4 +243,13 @@ public class QueryFactoryTest {
|
|||||||
System.out.println("Time: " + (System.currentTimeMillis() - start));
|
System.out.println("Time: " + (System.currentTimeMillis() - start));
|
||||||
System.out.println(subquery);
|
System.out.println(subquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParsing() {
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
var subquery = parseAndGetSpecs("strlen()");
|
||||||
|
assertEquals("strlen", subquery.query.compiledQuery);
|
||||||
|
System.out.println("Time: " + (System.currentTimeMillis() - start));
|
||||||
|
System.out.println(subquery);
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user