mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(query) Modify tokenizer to match the behavior of the sentence extractor
This must match, otherwise a query like "plato's republic" won't match the indexed keywords, since they would strip the possessive.
This commit is contained in:
parent
44b33798f3
commit
a09c84e1b8
@ -2,6 +2,7 @@ package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
|
||||
import nu.marginalia.language.encoding.AsciiFlattener;
|
||||
import nu.marginalia.language.sentence.SentenceExtractorStringUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -54,7 +55,7 @@ public class QueryTokenizer {
|
||||
}
|
||||
|
||||
String displayStr = query.substring(i, end);
|
||||
String str = displayStr.toLowerCase();
|
||||
String str = SentenceExtractorStringUtils.toLowerCaseStripPossessive(displayStr);
|
||||
|
||||
tokens.add(new QueryToken.LiteralTerm(str, displayStr));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user