(query) Modify tokenizer to match the behavior of the sentence extractor

This must match, otherwise a query like "plato's republic" won't match the indexed keywords, since they would strip the possessive.
This commit is contained in:
Viktor Lofgren 2024-04-17 17:54:32 +02:00
parent 44b33798f3
commit a09c84e1b8

View File

@ -2,6 +2,7 @@ package nu.marginalia.functions.searchquery.query_parser;
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
import nu.marginalia.language.encoding.AsciiFlattener;
import nu.marginalia.language.sentence.SentenceExtractorStringUtils;
import java.util.ArrayList;
import java.util.List;
@ -54,7 +55,7 @@ public class QueryTokenizer {
}
String displayStr = query.substring(i, end);
String str = displayStr.toLowerCase();
String str = SentenceExtractorStringUtils.toLowerCaseStripPossessive(displayStr);
tokens.add(new QueryToken.LiteralTerm(str, displayStr));