mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(WIP) Partial integration of new query expansion code into the query-serivice
This commit is contained in:
parent
07e4d7ec6d
commit
00ef4f9803
@ -1,17 +1,14 @@
|
|||||||
package nu.marginalia.functions.searchquery.query_parser;
|
package nu.marginalia.functions.searchquery.query_parser;
|
||||||
|
|
||||||
import ca.rmen.porterstemmer.PorterStemmer;
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.EqualsAndHashCode;
|
|
||||||
import lombok.Getter;
|
|
||||||
import lombok.ToString;
|
|
||||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryVariant;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryVariantSet;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryWord;
|
||||||
import nu.marginalia.util.language.EnglishDictionary;
|
import nu.marginalia.util.language.EnglishDictionary;
|
||||||
import nu.marginalia.LanguageModels;
|
import nu.marginalia.LanguageModels;
|
||||||
import nu.marginalia.keyword.KeywordExtractor;
|
import nu.marginalia.keyword.KeywordExtractor;
|
||||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||||
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
|
||||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||||
import nu.marginalia.language.model.DocumentSentence;
|
import nu.marginalia.language.model.DocumentSentence;
|
||||||
import nu.marginalia.language.model.WordSpan;
|
import nu.marginalia.language.model.WordSpan;
|
||||||
@ -22,17 +19,13 @@ import java.util.regex.Pattern;
|
|||||||
public class QueryVariants {
|
public class QueryVariants {
|
||||||
private final KeywordExtractor keywordExtractor;
|
private final KeywordExtractor keywordExtractor;
|
||||||
private final TermFrequencyDict dict;
|
private final TermFrequencyDict dict;
|
||||||
private final PorterStemmer ps = new PorterStemmer();
|
|
||||||
|
|
||||||
private final NGramBloomFilter nGramBloomFilter;
|
|
||||||
private final EnglishDictionary englishDictionary;
|
private final EnglishDictionary englishDictionary;
|
||||||
private final ThreadLocal<SentenceExtractor> sentenceExtractor;
|
private final ThreadLocal<SentenceExtractor> sentenceExtractor;
|
||||||
|
|
||||||
public QueryVariants(LanguageModels lm,
|
public QueryVariants(LanguageModels lm,
|
||||||
TermFrequencyDict dict,
|
TermFrequencyDict dict,
|
||||||
NGramBloomFilter nGramBloomFilter,
|
|
||||||
EnglishDictionary englishDictionary) {
|
EnglishDictionary englishDictionary) {
|
||||||
this.nGramBloomFilter = nGramBloomFilter;
|
|
||||||
this.englishDictionary = englishDictionary;
|
this.englishDictionary = englishDictionary;
|
||||||
this.keywordExtractor = new KeywordExtractor();
|
this.keywordExtractor = new KeywordExtractor();
|
||||||
this.sentenceExtractor = ThreadLocal.withInitial(() -> new SentenceExtractor(lm));
|
this.sentenceExtractor = ThreadLocal.withInitial(() -> new SentenceExtractor(lm));
|
||||||
@ -40,33 +33,6 @@ public class QueryVariants {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final Pattern numWordBoundary = Pattern.compile("[0-9][a-zA-Z]|[a-zA-Z][0-9]");
|
|
||||||
final Pattern dashBoundary = Pattern.compile("-");
|
|
||||||
|
|
||||||
@AllArgsConstructor
|
|
||||||
private static class Word {
|
|
||||||
public final String stemmed;
|
|
||||||
public final String word;
|
|
||||||
public final String wordOriginal;
|
|
||||||
}
|
|
||||||
|
|
||||||
@AllArgsConstructor @Getter @ToString @EqualsAndHashCode
|
|
||||||
public static class QueryVariant {
|
|
||||||
public final List<String> terms;
|
|
||||||
public final double value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Getter @ToString
|
|
||||||
public static class QueryVariantSet {
|
|
||||||
final List<QueryVariant> faithful = new ArrayList<>();
|
|
||||||
final List<QueryVariant> alternative = new ArrayList<>();
|
|
||||||
|
|
||||||
final List<Token> nonLiterals = new ArrayList<>();
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return faithful.isEmpty() && alternative.isEmpty() && nonLiterals.isEmpty();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public QueryVariantSet getQueryVariants(List<Token> query) {
|
public QueryVariantSet getQueryVariants(List<Token> query) {
|
||||||
final JoinedQueryAndNonLiteralTokens joinedQuery = joinQuery(query);
|
final JoinedQueryAndNonLiteralTokens joinedQuery = joinQuery(query);
|
||||||
@ -108,19 +74,11 @@ public class QueryVariants {
|
|||||||
byStart.put(0, elongatedFirstWords);
|
byStart.put(0, elongatedFirstWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<List<Word>> goodSpans = getWordSpans(byStart, sentence, livingSpans);
|
final List<List<QueryWord>> goodSpans = getWordSpans(byStart, sentence, livingSpans);
|
||||||
|
|
||||||
List<List<String>> faithfulQueries = new ArrayList<>();
|
List<List<String>> faithfulQueries = new ArrayList<>();
|
||||||
List<List<String>> alternativeQueries = new ArrayList<>();
|
List<List<String>> alternativeQueries = new ArrayList<>();
|
||||||
|
|
||||||
for (var ls : goodSpans) {
|
|
||||||
faithfulQueries.addAll(createTokens(ls));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (var span : goodSpans) {
|
|
||||||
alternativeQueries.addAll(joinTerms(span));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (var ls : goodSpans) {
|
for (var ls : goodSpans) {
|
||||||
var last = ls.get(ls.size() - 1);
|
var last = ls.get(ls.size() - 1);
|
||||||
|
|
||||||
@ -174,105 +132,8 @@ public class QueryVariants {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Collection<List<String>> createTokens(List<Word> ls) {
|
private List<List<QueryWord>> getWordSpans(TreeMap<Integer, List<WordSpan>> byStart, DocumentSentence sentence, List<ArrayList<WordSpan>> livingSpans) {
|
||||||
List<String> asTokens = new ArrayList<>();
|
List<List<QueryWord>> goodSpans = new ArrayList<>();
|
||||||
List<List<String>> ret = new ArrayList<>();
|
|
||||||
|
|
||||||
|
|
||||||
boolean dash = false;
|
|
||||||
boolean num = false;
|
|
||||||
|
|
||||||
for (var span : ls) {
|
|
||||||
dash |= dashBoundary.matcher(span.word).find();
|
|
||||||
num |= numWordBoundary.matcher(span.word).find();
|
|
||||||
if (ls.size() == 1 || !isOmittableWord(span.word)) {
|
|
||||||
asTokens.add(span.word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ret.add(asTokens);
|
|
||||||
|
|
||||||
if (dash) {
|
|
||||||
ret.addAll(combineDashWords(ls));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num) {
|
|
||||||
ret.addAll(splitWordNum(ls));
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isOmittableWord(String word) {
|
|
||||||
return switch (word) {
|
|
||||||
case "vs", "or", "and", "versus", "is", "the", "why", "when", "if", "who", "are", "am" -> true;
|
|
||||||
default -> false;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private Collection<? extends List<String>> splitWordNum(List<Word> ls) {
|
|
||||||
List<String> asTokens2 = new ArrayList<>();
|
|
||||||
|
|
||||||
boolean num = false;
|
|
||||||
|
|
||||||
for (var span : ls) {
|
|
||||||
var wordMatcher = numWordBoundary.matcher(span.word);
|
|
||||||
var stemmedMatcher = numWordBoundary.matcher(span.stemmed);
|
|
||||||
|
|
||||||
int ws = 0;
|
|
||||||
int ss = 0;
|
|
||||||
boolean didSplit = false;
|
|
||||||
while (wordMatcher.find(ws) && stemmedMatcher.find(ss)) {
|
|
||||||
ws = wordMatcher.start()+1;
|
|
||||||
ss = stemmedMatcher.start()+1;
|
|
||||||
if (nGramBloomFilter.isKnownNGram(splitAtNumBoundary(span.word, stemmedMatcher.start(), "_"))
|
|
||||||
|| nGramBloomFilter.isKnownNGram(splitAtNumBoundary(span.word, stemmedMatcher.start(), "-")))
|
|
||||||
{
|
|
||||||
String combined = splitAtNumBoundary(span.word, wordMatcher.start(), "_");
|
|
||||||
asTokens2.add(combined);
|
|
||||||
didSplit = true;
|
|
||||||
num = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!didSplit) {
|
|
||||||
asTokens2.add(span.word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num) {
|
|
||||||
return List.of(asTokens2);
|
|
||||||
}
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Collection<? extends List<String>> combineDashWords(List<Word> ls) {
|
|
||||||
List<String> asTokens2 = new ArrayList<>();
|
|
||||||
boolean dash = false;
|
|
||||||
|
|
||||||
for (var span : ls) {
|
|
||||||
var matcher = dashBoundary.matcher(span.word);
|
|
||||||
if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stemWord(dashBoundary.matcher(span.word).replaceAll("")))) {
|
|
||||||
dash = true;
|
|
||||||
String combined = dashBoundary.matcher(span.word).replaceAll("");
|
|
||||||
asTokens2.add(combined);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
asTokens2.add(span.word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dash) {
|
|
||||||
return List.of(asTokens2);
|
|
||||||
}
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String splitAtNumBoundary(String in, int splitPoint, String joiner) {
|
|
||||||
return in.substring(0, splitPoint+1) + joiner + in.substring(splitPoint+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<List<Word>> getWordSpans(TreeMap<Integer, List<WordSpan>> byStart, DocumentSentence sentence, List<ArrayList<WordSpan>> livingSpans) {
|
|
||||||
List<List<Word>> goodSpans = new ArrayList<>();
|
|
||||||
for (int i = 0; i < 1; i++) {
|
for (int i = 0; i < 1; i++) {
|
||||||
var spans = byStart.get(i);
|
var spans = byStart.get(i);
|
||||||
|
|
||||||
@ -298,9 +159,9 @@ public class QueryVariants {
|
|||||||
int end = span.get(span.size()-1).end;
|
int end = span.get(span.size()-1).end;
|
||||||
|
|
||||||
if (end == sentence.length()) {
|
if (end == sentence.length()) {
|
||||||
var gs = new ArrayList<Word>(span.size());
|
var gs = new ArrayList<QueryWord>(span.size());
|
||||||
for (var s : span) {
|
for (var s : span) {
|
||||||
gs.add(new Word(sentence.constructStemmedWordFromSpan(s), sentence.constructWordFromSpan(s),
|
gs.add(new QueryWord(sentence.constructStemmedWordFromSpan(s), sentence.constructWordFromSpan(s),
|
||||||
s.size() == 1 ? sentence.words[s.start] : ""));
|
s.size() == 1 ? sentence.words[s.start] : ""));
|
||||||
}
|
}
|
||||||
goodSpans.add(gs);
|
goodSpans.add(gs);
|
||||||
@ -325,38 +186,6 @@ public class QueryVariants {
|
|||||||
return goodSpans;
|
return goodSpans;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<List<String>> joinTerms(List<Word> span) {
|
|
||||||
List<List<String>> ret = new ArrayList<>();
|
|
||||||
|
|
||||||
for (int i = 0; i < span.size()-1; i++) {
|
|
||||||
var a = span.get(i);
|
|
||||||
var b = span.get(i+1);
|
|
||||||
|
|
||||||
var stemmed = ps.stemWord(a.word + b.word);
|
|
||||||
|
|
||||||
double scoreCombo = dict.getTermFreqStemmed(stemmed);
|
|
||||||
if (scoreCombo > 10000) {
|
|
||||||
List<String> asTokens = new ArrayList<>();
|
|
||||||
|
|
||||||
for (int j = 0; j < i; j++) {
|
|
||||||
var word = span.get(j).word;
|
|
||||||
asTokens.add(word);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
var word = a.word + b.word;
|
|
||||||
asTokens.add(word);
|
|
||||||
}
|
|
||||||
for (int j = i+2; j < span.size(); j++) {
|
|
||||||
var word = span.get(j).word;
|
|
||||||
asTokens.add(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.add(asTokens);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
private JoinedQueryAndNonLiteralTokens joinQuery(List<Token> query) {
|
private JoinedQueryAndNonLiteralTokens joinQuery(List<Token> query) {
|
||||||
StringJoiner s = new StringJoiner(" ");
|
StringJoiner s = new StringJoiner(" ");
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.model.QWordGraph;
|
||||||
|
|
||||||
|
public interface ExpansionStrategy {
|
||||||
|
void expand(QWordGraph graph);
|
||||||
|
}
|
@ -0,0 +1,111 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import ca.rmen.porterstemmer.PorterStemmer;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.model.QWord;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.model.QWordGraph;
|
||||||
|
import nu.marginalia.functions.searchquery.segmentation.NgramLexicon;
|
||||||
|
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class QueryExpansion {
|
||||||
|
private static final PorterStemmer ps = new PorterStemmer();
|
||||||
|
private final TermFrequencyDict dict;
|
||||||
|
private final NgramLexicon lexicon;
|
||||||
|
List<ExpansionStrategy> expansionStrategies = List.of(
|
||||||
|
this::joinDashes,
|
||||||
|
this::splitWordNum,
|
||||||
|
this::joinTerms,
|
||||||
|
this::createSegments
|
||||||
|
);
|
||||||
|
|
||||||
|
public QueryExpansion(TermFrequencyDict dict,
|
||||||
|
NgramLexicon lexicon
|
||||||
|
) {
|
||||||
|
this.dict = dict;
|
||||||
|
this.lexicon = lexicon;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QWordGraph expandQuery(List<String> words) {
|
||||||
|
|
||||||
|
QWordGraph graph = new QWordGraph(words);
|
||||||
|
|
||||||
|
for (var strategy : expansionStrategies) {
|
||||||
|
strategy.expand(graph);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final Pattern dashPattern = Pattern.compile("-");
|
||||||
|
private static final Pattern numWordBoundary = Pattern.compile("[0-9][a-zA-Z]|[a-zA-Z][0-9]");
|
||||||
|
|
||||||
|
// Turn 'lawn-chair' into 'lawnchair'
|
||||||
|
public void joinDashes(QWordGraph graph) {
|
||||||
|
for (var qw : graph) {
|
||||||
|
if (qw.word().contains("-")) {
|
||||||
|
var joined = StringUtils.join(dashPattern.split(qw.word()));
|
||||||
|
graph.addVariant(qw, joined);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Turn 'MP3' into 'MP-3'
|
||||||
|
public void splitWordNum(QWordGraph graph) {
|
||||||
|
for (var qw : graph) {
|
||||||
|
var matcher = numWordBoundary.matcher(qw.word());
|
||||||
|
if (matcher.matches()) {
|
||||||
|
var joined = StringUtils.join(dashPattern.split(qw.word()), '-');
|
||||||
|
graph.addVariant(qw, joined);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn 'lawn chair' into 'lawnchair'
|
||||||
|
public void joinTerms(QWordGraph graph) {
|
||||||
|
QWord prev = null;
|
||||||
|
|
||||||
|
for (var qw : graph) {
|
||||||
|
if (prev != null) {
|
||||||
|
var joinedWord = prev.word() + qw.word();
|
||||||
|
var joinedStemmed = ps.stemWord(joinedWord);
|
||||||
|
|
||||||
|
var scoreA = dict.getTermFreqStemmed(prev.stemmed());
|
||||||
|
var scoreB = dict.getTermFreqStemmed(qw.stemmed());
|
||||||
|
|
||||||
|
var scoreCombo = dict.getTermFreqStemmed(joinedStemmed);
|
||||||
|
|
||||||
|
if (scoreCombo > scoreA + scoreB || scoreCombo > 1000) {
|
||||||
|
graph.addVariantForSpan(prev, qw, joinedWord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = qw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void createSegments(QWordGraph graph) {
|
||||||
|
List<QWord> nodes = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var qw : graph) {
|
||||||
|
nodes.add(qw);
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] words = nodes.stream().map(QWord::word).toArray(String[]::new);
|
||||||
|
|
||||||
|
for (int length = 2; length < Math.min(10, words.length); length++) {
|
||||||
|
for (var segment : lexicon.findSegments(length, words)) {
|
||||||
|
int start = segment.start();
|
||||||
|
int end = segment.start() + segment.length();
|
||||||
|
var word = StringUtils.join(words, "_", start, end);
|
||||||
|
|
||||||
|
graph.addVariantForSpan(nodes.get(start), nodes.get(end), word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,17 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.ToString;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Getter
|
||||||
|
@ToString
|
||||||
|
@EqualsAndHashCode
|
||||||
|
public class QueryVariant {
|
||||||
|
public final List<String> terms;
|
||||||
|
public final double value;
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.ToString;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@ToString
|
||||||
|
public class QueryVariantSet {
|
||||||
|
public final List<QueryVariant> faithful = new ArrayList<>();
|
||||||
|
public final List<QueryVariant> alternative = new ArrayList<>();
|
||||||
|
|
||||||
|
public final List<Token> nonLiterals = new ArrayList<>();
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return faithful.isEmpty() && alternative.isEmpty() && nonLiterals.isEmpty();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class QueryWord {
|
||||||
|
public final String stemmed;
|
||||||
|
public final String word;
|
||||||
|
public final String wordOriginal;
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface VariantStrategy {
|
||||||
|
Collection<? extends List<String>> constructVariants(List<QueryWord> ls);
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.model;
|
||||||
|
|
||||||
|
import ca.rmen.porterstemmer.PorterStemmer;
|
||||||
|
|
||||||
|
public record QWord(
|
||||||
|
int ord,
|
||||||
|
boolean variant,
|
||||||
|
String stemmed,
|
||||||
|
String word,
|
||||||
|
String original)
|
||||||
|
{
|
||||||
|
|
||||||
|
// These are special words that are not in the input, but are added to the graph,
|
||||||
|
// note the space around the ^ and $, to avoid collisions with real words
|
||||||
|
private static final String BEG_MARKER = " ^ ";
|
||||||
|
private static final String END_MARKER = " $ ";
|
||||||
|
|
||||||
|
private static final PorterStemmer ps = new PorterStemmer();
|
||||||
|
|
||||||
|
public boolean isBeg() {
|
||||||
|
return word.equals(BEG_MARKER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEnd() {
|
||||||
|
return word.equals(END_MARKER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static QWord beg() {
|
||||||
|
return new QWord(Integer.MIN_VALUE, false, BEG_MARKER, BEG_MARKER, BEG_MARKER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static QWord end() {
|
||||||
|
return new QWord(Integer.MAX_VALUE, false, END_MARKER, END_MARKER, END_MARKER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isOriginal() {
|
||||||
|
return !variant;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QWord(int ord, String word) {
|
||||||
|
this(ord, false, ps.stemWord(word), word, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
public QWord(int ord, QWord original, String word) {
|
||||||
|
this(ord, true, ps.stemWord(word), word, original.original);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,236 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.model;
|
||||||
|
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
/** Graph structure for constructing query variants. The graph should be a directed acyclic graph,
|
||||||
|
* with a single start node and a single end node, denoted by QWord.beg() and QWord.end() respectively.
|
||||||
|
* <p></p>
|
||||||
|
* Naively, every path from the start to the end node should represent a valid query variant, although in
|
||||||
|
* practice it is desirable to be clever about how to evaluate the paths, to avoid combinatorial explosion.
|
||||||
|
*/
|
||||||
|
public class QWordGraph implements Iterable<QWord> {
|
||||||
|
|
||||||
|
|
||||||
|
public record QWordGraphLink(QWord from, QWord to) {
|
||||||
|
}
|
||||||
|
|
||||||
|
private final List<QWordGraphLink> links = new ArrayList<>();
|
||||||
|
private final Map<QWord, List<QWord>> fromTo = new HashMap<>();
|
||||||
|
private final Map<QWord, List<QWord>> toFrom = new HashMap<>();
|
||||||
|
|
||||||
|
private int wordId = 0;
|
||||||
|
|
||||||
|
public QWordGraph(String... words) {
|
||||||
|
this(List.of(words));
|
||||||
|
}
|
||||||
|
|
||||||
|
public QWordGraph(List<String> words) {
|
||||||
|
QWord beg = QWord.beg();
|
||||||
|
QWord end = QWord.end();
|
||||||
|
|
||||||
|
var prev = beg;
|
||||||
|
|
||||||
|
for (String s : words) {
|
||||||
|
var word = new QWord(wordId++, s);
|
||||||
|
addLink(prev, word);
|
||||||
|
prev = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
addLink(prev, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addVariant(QWord original, String word) {
|
||||||
|
var siblings = getVariants(original);
|
||||||
|
if (siblings.stream().anyMatch(w -> w.word().equals(word)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
var newWord = new QWord(wordId++, original, word);
|
||||||
|
|
||||||
|
for (var prev : getPrev(original))
|
||||||
|
addLink(prev, newWord);
|
||||||
|
for (var next : getNext(original))
|
||||||
|
addLink(newWord, next);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addVariantForSpan(QWord first, QWord last, String word) {
|
||||||
|
var newWord = new QWord(wordId++, first, word);
|
||||||
|
|
||||||
|
for (var prev : getPrev(first))
|
||||||
|
addLink(prev, newWord);
|
||||||
|
for (var next : getNext(last))
|
||||||
|
addLink(newWord, next);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<QWord> getVariants(QWord original) {
|
||||||
|
var prevNext = getPrev(original).stream()
|
||||||
|
.flatMap(prev -> getNext(prev).stream())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
return getNext(original).stream()
|
||||||
|
.flatMap(next -> getPrev(next).stream())
|
||||||
|
.filter(prevNext::contains)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addLink(QWord from, QWord to) {
|
||||||
|
links.add(new QWordGraphLink(from, to));
|
||||||
|
fromTo.computeIfAbsent(from, k -> new ArrayList<>()).add(to);
|
||||||
|
toFrom.computeIfAbsent(to, k -> new ArrayList<>()).add(from);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<QWordGraphLink> links() {
|
||||||
|
return Collections.unmodifiableList(links);
|
||||||
|
}
|
||||||
|
public List<QWord> nodes() {
|
||||||
|
return links.stream()
|
||||||
|
.flatMap(l -> Stream.of(l.from(), l.to()))
|
||||||
|
.sorted(Comparator.comparing(QWord::ord))
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<QWord> getNext(QWord word) {
|
||||||
|
return fromTo.getOrDefault(word, List.of());
|
||||||
|
}
|
||||||
|
public List<QWord> getNextOriginal(QWord word) {
|
||||||
|
return fromTo.getOrDefault(word, List.of())
|
||||||
|
.stream()
|
||||||
|
.filter(QWord::isOriginal)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<QWord> getPrev(QWord word) {
|
||||||
|
return toFrom.getOrDefault(word, List.of());
|
||||||
|
}
|
||||||
|
public List<QWord> getPrevOriginal(QWord word) {
|
||||||
|
return toFrom.getOrDefault(word, List.of())
|
||||||
|
.stream()
|
||||||
|
.filter(QWord::isOriginal)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if removing the word would disconnect the graph
|
||||||
|
// so that there is no path from 'begin' to 'end'. This is useful
|
||||||
|
// in breaking up the graph into smaller component subgraphs, and
|
||||||
|
// understanding which vertexes can be re-ordered without changing
|
||||||
|
// the semantics of the encoded query.
|
||||||
|
public boolean isBypassed(QWord word, QWord begin, QWord end) {
|
||||||
|
assert word.isOriginal() : "Can only bypass original words";
|
||||||
|
|
||||||
|
Set<QWord> edge = new HashSet<>();
|
||||||
|
Set<QWord> visited = new HashSet<>();
|
||||||
|
|
||||||
|
edge.add(begin);
|
||||||
|
|
||||||
|
while (!edge.isEmpty()) {
|
||||||
|
Set<QWord> next = new HashSet<>();
|
||||||
|
|
||||||
|
for (var w : edge) {
|
||||||
|
// Skip the word we're trying find a bypassing route for
|
||||||
|
if (w.ord() == word.ord())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (Objects.equals(w, end))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
next.addAll(getNext(w));
|
||||||
|
}
|
||||||
|
|
||||||
|
next.removeAll(visited);
|
||||||
|
visited.addAll(next);
|
||||||
|
edge = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a set of all nodes that are between 'begin' and 'end' in the graph,
|
||||||
|
* including the terminal nodes. This is useful for breaking up the graph into
|
||||||
|
* smaller components that can be evaluated in any order.
|
||||||
|
* <p></p>
|
||||||
|
* It is assumed that there is a path from 'begin' to 'end' in the graph, and no
|
||||||
|
* other paths that bypass 'end'.
|
||||||
|
* <p></p>
|
||||||
|
* The nodes are returned in the order they are encountered in a breadth-first search.
|
||||||
|
*/
|
||||||
|
public List<QWord> nodesBetween(QWord begin, QWord end) {
|
||||||
|
List<QWord> edge = new ArrayList<>();
|
||||||
|
List<QWord> visited = new ArrayList<>();
|
||||||
|
|
||||||
|
edge.add(begin);
|
||||||
|
|
||||||
|
while (!edge.isEmpty()) {
|
||||||
|
List<QWord> next = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var w : edge) {
|
||||||
|
if (Objects.equals(w, end))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
assert (!w.isEnd() && end.isEnd()) : "Graph has a path beyond the specified end vertex";
|
||||||
|
|
||||||
|
next.addAll(getNext(w));
|
||||||
|
}
|
||||||
|
|
||||||
|
next.removeAll(visited);
|
||||||
|
visited.addAll(next);
|
||||||
|
edge = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return visited;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a list of subgraphs that are connected on the path from
|
||||||
|
* 'begin' to 'end'. This is useful for breaking up the graph into
|
||||||
|
* smaller components that can be evaluated in any order.
|
||||||
|
* <p></p>
|
||||||
|
* The subgraphs are specified by their predecessor and successor nodes,
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public List<QWordGraphLink> getSubgraphs(QWord begin, QWord end) {
|
||||||
|
// Short-circuit for the common and simple case
|
||||||
|
if (getNext(begin).equals(List.of(end)))
|
||||||
|
return List.of(new QWordGraphLink(begin, end));
|
||||||
|
|
||||||
|
List<QWordGraphLink> subgraphs = new ArrayList<>();
|
||||||
|
|
||||||
|
List<QWord> points = nodesBetween(begin, end)
|
||||||
|
.stream()
|
||||||
|
.filter(w -> isBypassed(w, begin, end))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
for (int i = 0; i < points.size() - 1; i++) {
|
||||||
|
var a = points.get(i);
|
||||||
|
var b = points.get(i+1);
|
||||||
|
|
||||||
|
subgraphs.add(new QWordGraphLink(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
return subgraphs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
@Override
|
||||||
|
public Iterator<QWord> iterator() {
|
||||||
|
return new Iterator<>() {
|
||||||
|
QWord pos = QWord.beg();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !pos.isEnd();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QWord next() {
|
||||||
|
pos = getNextOriginal(pos).get(0);
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.strategy;
|
||||||
|
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryWord;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.VariantStrategy;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/** Variant strategy that combines word that have dashes, as sometimes lawn-chair
|
||||||
|
* gets spelled lawnchair */
|
||||||
|
public class CombineDashes implements VariantStrategy {
|
||||||
|
final Pattern dashBoundary = Pattern.compile("-");
|
||||||
|
|
||||||
|
public CombineDashes() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<? extends List<String>> constructVariants(List<QueryWord> words) {
|
||||||
|
List<String> asTokens2 = new ArrayList<>();
|
||||||
|
boolean dash = false;
|
||||||
|
|
||||||
|
for (var span : words) {
|
||||||
|
var matcher = dashBoundary.matcher(span.word);
|
||||||
|
if (matcher.find()) {
|
||||||
|
String combined = dashBoundary.matcher(span.word).replaceAll("");
|
||||||
|
asTokens2.add(combined);
|
||||||
|
}
|
||||||
|
|
||||||
|
asTokens2.add(span.word);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dash) {
|
||||||
|
return List.of(asTokens2);
|
||||||
|
}
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.strategy;
|
||||||
|
|
||||||
|
import ca.rmen.porterstemmer.PorterStemmer;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryWord;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.VariantStrategy;
|
||||||
|
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Variant strategy that merges tokens that are adjacent, where the combined token
|
||||||
|
* has a high term frequency. That way we match 'lawnchair' with 'lawn chair' */
|
||||||
|
public class JoinTerms implements VariantStrategy {
|
||||||
|
private final TermFrequencyDict dict;
|
||||||
|
private final PorterStemmer ps;
|
||||||
|
|
||||||
|
public JoinTerms(TermFrequencyDict dict, PorterStemmer ps) {
|
||||||
|
this.dict = dict;
|
||||||
|
this.ps = ps;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<? extends List<String>> constructVariants(List<QueryWord> span) {
|
||||||
|
List<List<String>> ret = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < span.size()-1; i++) {
|
||||||
|
var a = span.get(i);
|
||||||
|
var b = span.get(i+1);
|
||||||
|
|
||||||
|
var stemmed = ps.stemWord(a.word + b.word);
|
||||||
|
|
||||||
|
double scoreCombo = dict.getTermFreqStemmed(stemmed);
|
||||||
|
|
||||||
|
if (scoreCombo > 10000) {
|
||||||
|
List<String> asTokens = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int j = 0; j < i; j++) {
|
||||||
|
var word = span.get(j).word;
|
||||||
|
asTokens.add(word);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var word = a.word + b.word;
|
||||||
|
asTokens.add(word);
|
||||||
|
}
|
||||||
|
for (int j = i+2; j < span.size(); j++) {
|
||||||
|
var word = span.get(j).word;
|
||||||
|
asTokens.add(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.add(asTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,65 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.strategy;
|
||||||
|
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.QueryWord;
|
||||||
|
import nu.marginalia.functions.searchquery.query_parser.variant.VariantStrategy;
|
||||||
|
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/** Variant strategy that splits tokens at the boundary between a number and a word.
|
||||||
|
*/
|
||||||
|
public class SplitWordNum implements VariantStrategy {
|
||||||
|
|
||||||
|
|
||||||
|
final Pattern numWordBoundary = Pattern.compile("[0-9][a-zA-Z]|[a-zA-Z][0-9]");
|
||||||
|
private final NGramBloomFilter nGramBloomFilter;
|
||||||
|
|
||||||
|
public SplitWordNum(NGramBloomFilter nGramBloomFilter) {
|
||||||
|
this.nGramBloomFilter = nGramBloomFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<? extends List<String>> constructVariants(List<QueryWord> ls) {
|
||||||
|
List<String> asTokens2 = new ArrayList<>();
|
||||||
|
|
||||||
|
boolean num = false;
|
||||||
|
|
||||||
|
for (var span : ls) {
|
||||||
|
var wordMatcher = numWordBoundary.matcher(span.word);
|
||||||
|
var stemmedMatcher = numWordBoundary.matcher(span.stemmed);
|
||||||
|
|
||||||
|
int ws = 0;
|
||||||
|
int ss = 0;
|
||||||
|
boolean didSplit = false;
|
||||||
|
while (wordMatcher.find(ws) && stemmedMatcher.find(ss)) {
|
||||||
|
ws = wordMatcher.start()+1;
|
||||||
|
ss = stemmedMatcher.start()+1;
|
||||||
|
if (nGramBloomFilter.isKnownNGram(splitAtNumBoundary(span.word, stemmedMatcher.start(), "_"))
|
||||||
|
|| nGramBloomFilter.isKnownNGram(splitAtNumBoundary(span.word, stemmedMatcher.start(), "-")))
|
||||||
|
{
|
||||||
|
String combined = splitAtNumBoundary(span.word, wordMatcher.start(), "_");
|
||||||
|
asTokens2.add(combined);
|
||||||
|
didSplit = true;
|
||||||
|
num = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!didSplit) {
|
||||||
|
asTokens2.add(span.word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num) {
|
||||||
|
return List.of(asTokens2);
|
||||||
|
}
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String splitAtNumBoundary(String in, int splitPoint, String joiner) {
|
||||||
|
return in.substring(0, splitPoint+1) + joiner + in.substring(splitPoint+1);
|
||||||
|
}
|
||||||
|
}
|
@ -8,7 +8,6 @@ import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
|||||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||||
import nu.marginalia.util.language.EnglishDictionary;
|
import nu.marginalia.util.language.EnglishDictionary;
|
||||||
import nu.marginalia.language.WordPatterns;
|
import nu.marginalia.language.WordPatterns;
|
||||||
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
||||||
import nu.marginalia.functions.searchquery.query_parser.QueryParser;
|
import nu.marginalia.functions.searchquery.query_parser.QueryParser;
|
||||||
@ -37,9 +36,8 @@ public class QueryFactory {
|
|||||||
@Inject
|
@Inject
|
||||||
public QueryFactory(LanguageModels lm,
|
public QueryFactory(LanguageModels lm,
|
||||||
TermFrequencyDict dict,
|
TermFrequencyDict dict,
|
||||||
EnglishDictionary englishDictionary,
|
EnglishDictionary englishDictionary) {
|
||||||
NGramBloomFilter nGramBloomFilter) {
|
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, englishDictionary));
|
||||||
this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -79,7 +77,7 @@ public class QueryFactory {
|
|||||||
|
|
||||||
String domain = null;
|
String domain = null;
|
||||||
|
|
||||||
var basicQuery = queryParser.parse(query);
|
List<Token> basicQuery = queryParser.parse(query);
|
||||||
|
|
||||||
if (basicQuery.size() >= 12) {
|
if (basicQuery.size() >= 12) {
|
||||||
problems.add("Your search query is too long");
|
problems.add("Your search query is too long");
|
||||||
@ -108,10 +106,9 @@ public class QueryFactory {
|
|||||||
for (var parts : queryPermutations) {
|
for (var parts : queryPermutations) {
|
||||||
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(parts);
|
QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(parts);
|
||||||
|
|
||||||
SearchSubquery subquery = termsAccumulator.createSubquery();
|
|
||||||
|
|
||||||
domain = termsAccumulator.domain;
|
domain = termsAccumulator.domain;
|
||||||
|
|
||||||
|
SearchSubquery subquery = termsAccumulator.createSubquery();
|
||||||
subqueries.add(subquery);
|
subqueries.add(subquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
package nu.marginalia.functions.searchquery.query_parser.variant.model;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class QWordGraphTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddConstructor() {
|
||||||
|
QWordGraph graph = new QWordGraph("hello", "world");
|
||||||
|
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(1), QWord.beg(), QWord.end()));
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(2), QWord.beg(), QWord.end()));
|
||||||
|
graph.links().forEach(System.out::println);
|
||||||
|
System.out.println("--");
|
||||||
|
graph.nodes().forEach(System.out::println);
|
||||||
|
System.out.println("--");
|
||||||
|
graph.addVariant(graph.nodes().get(1), "sup");
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(1), QWord.beg(), QWord.end()));
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(2), QWord.beg(), QWord.end()));
|
||||||
|
System.out.println("--");
|
||||||
|
graph.links().forEach(System.out::println);
|
||||||
|
System.out.println("--");
|
||||||
|
graph.nodes().forEach(System.out::println);
|
||||||
|
|
||||||
|
graph.addVariantForSpan(graph.nodes().get(1), graph.nodes().get(2), "heyall");
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(1), QWord.beg(), QWord.end()));
|
||||||
|
System.out.println(graph.isBypassed(graph.nodes().get(2), QWord.beg(), QWord.end()));
|
||||||
|
System.out.println("--");
|
||||||
|
graph.links().forEach(System.out::println);
|
||||||
|
System.out.println("--");
|
||||||
|
graph.nodes().forEach(System.out::println);
|
||||||
|
}
|
||||||
|
}
|
@ -32,8 +32,7 @@ public class QueryFactoryTest {
|
|||||||
|
|
||||||
queryFactory = new QueryFactory(lm,
|
queryFactory = new QueryFactory(lm,
|
||||||
tfd,
|
tfd,
|
||||||
new EnglishDictionary(tfd),
|
new EnglishDictionary(tfd)
|
||||||
new NGramBloomFilter(lm)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user