(query) Modify tokenizer to match the behavior of the sentence extractor

This must match, otherwise a query like "plato's republic" won't match the indexed keywords, since they would strip the possessive.
2025-02-23 21:18:58 +00:00 · 2024-04-17 17:54:32 +02:00 · 2024-04-17 17:54:32 +02:00 · a09c84e1b8
commit a09c84e1b8
parent 44b33798f3
1 changed files with 2 additions and 1 deletions
--- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryTokenizer.java
+++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryTokenizer.java
@ -2,6 +2,7 @@ package nu.marginalia.functions.searchquery.query_parser;

 import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
 import nu.marginalia.language.encoding.AsciiFlattener;
+import nu.marginalia.language.sentence.SentenceExtractorStringUtils;

 import java.util.ArrayList;
 import java.util.List;
@ -54,7 +55,7 @@ public class QueryTokenizer {
                }

                String displayStr = query.substring(i, end);
-                String str = displayStr.toLowerCase();
+                String str = SentenceExtractorStringUtils.toLowerCaseStripPossessive(displayStr);

                tokens.add(new QueryToken.LiteralTerm(str, displayStr));