(query-parsing) Drop search term elements that aren't indexed by the search engine

This commit is contained in:
Viktor Lofgren 2024-09-03 11:21:01 +02:00
parent bb5d946c26
commit 8290c19e24
3 changed files with 19 additions and 8 deletions

View File

@ -61,10 +61,20 @@ public class QueryParser {
if (str.isBlank())
return;
if (str.endsWith(":") || str.endsWith(".")) {
// Remove trailing punctuation
int lastChar = str.charAt(str.length() - 1);
if (":.,!?$".indexOf(lastChar) >= 0)
entity.replace(new QueryToken.LiteralTerm(str.substring(0, str.length() - 1), lt.displayStr()));
}
// Remove term elements that aren't indexed by the search engine
if (str.endsWith("()"))
entity.replace(new QueryToken.LiteralTerm(str.substring(0, str.length() - 2), lt.displayStr()));
if (str.startsWith("$"))
entity.replace(new QueryToken.LiteralTerm(str.substring(1), lt.displayStr()));
if (entity.isBlank()) {
entity.remove();
}
}
private static void createNegatedTerms(TransformList<QueryToken>.Entity first, TransformList<QueryToken>.Entity second) {

View File

@ -1,5 +1,7 @@
package nu.marginalia.util.transform_list;
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
@ -30,7 +32,7 @@ import java.util.function.Predicate;
* </pre>
* </code>
*/
public class TransformList<T> {
public class TransformList<T extends QueryToken> {
private final List<T> backingList;
public TransformList(List<T> backingList) {
@ -138,6 +140,10 @@ public class TransformList<T> {
value = newValue;
}
public boolean isBlank() {
return value == null || value.str().isBlank();
}
public void remove() {
action = Action.REMOVE;
}

View File

@ -1,11 +1,6 @@
package nu.marginalia.language;
/** Logic for deciding which words are eligible to be keywords.
* <p/>
* This is in dire need of oversight. Here be towering dragons with names,
* a skull next to their HP bar, and their own Mick Gordon soundtrack just
* for the battle.
*
*/
public class WordPatterns {
public static final int MIN_WORD_LENGTH = 1;