mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(query) Always generate an ngram alternative, suppresses generation of multiple identical query branches
This commit is contained in:
parent
5165cf6d15
commit
64baa41e64
@ -11,6 +11,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/** Responsible for expanding a query, that is creating alternative branches of query execution
|
||||
* to increase the number of results
|
||||
@ -23,7 +24,8 @@ public class QueryExpansion {
|
||||
private final List<ExpansionStrategy> expansionStrategies = List.of(
|
||||
this::joinDashes,
|
||||
this::splitWordNum,
|
||||
this::joinTerms
|
||||
this::joinTerms,
|
||||
this::ngramAll
|
||||
);
|
||||
|
||||
@Inject
|
||||
@ -63,6 +65,22 @@ public class QueryExpansion {
|
||||
}
|
||||
|
||||
|
||||
public void ngramAll(QWordGraph graph) {
|
||||
List<QWord> parts = new ArrayList<>();
|
||||
|
||||
for (var qw : graph) {
|
||||
if (qw.isBeg() || qw.isEnd())
|
||||
continue;
|
||||
|
||||
parts.add(qw);
|
||||
}
|
||||
|
||||
if (parts.size() > 1) {
|
||||
graph.addVariantForSpan(parts.getFirst(), parts.getLast(),
|
||||
parts.stream().map(QWord::word).collect(Collectors.joining("_")));
|
||||
}
|
||||
}
|
||||
|
||||
// Turn 'MP3' into 'MP-3'
|
||||
public void splitWordNum(QWordGraph graph) {
|
||||
for (var qw : graph) {
|
||||
|
@ -2,6 +2,8 @@ package nu.marginalia.functions.searchquery.query_parser.model;
|
||||
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public record QWord(
|
||||
int ord,
|
||||
boolean variant,
|
||||
@ -48,4 +50,22 @@ public record QWord(
|
||||
public String toString() {
|
||||
return STR."q{\{word}}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
QWord qWord = (QWord) o;
|
||||
return variant == qWord.variant && Objects.equals(word, qWord.word) && Objects.equals(stemmed, qWord.stemmed) && Objects.equals(isOriginal(), qWord.isOriginal());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = Boolean.hashCode(variant);
|
||||
result = 31 * result + Objects.hashCode(stemmed);
|
||||
result = 31 * result + Objects.hashCode(word);
|
||||
result = 31 * result + Objects.hashCode(isOriginal());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,8 @@ public class QWordGraph implements Iterable<QWord> {
|
||||
public record QWordGraphLink(QWord from, QWord to) {}
|
||||
|
||||
private final List<QWordGraphLink> links = new ArrayList<>();
|
||||
private final Map<QWord, List<QWord>> fromTo = new HashMap<>();
|
||||
private final Map<QWord, List<QWord>> toFrom = new HashMap<>();
|
||||
private final Map<Integer, List<QWord>> fromTo = new HashMap<>();
|
||||
private final Map<Integer, List<QWord>> toFrom = new HashMap<>();
|
||||
|
||||
private int wordId = 0;
|
||||
|
||||
@ -79,8 +79,8 @@ public class QWordGraph implements Iterable<QWord> {
|
||||
|
||||
public void addLink(QWord from, QWord to) {
|
||||
links.add(new QWordGraphLink(from, to));
|
||||
fromTo.computeIfAbsent(from, k -> new ArrayList<>()).add(to);
|
||||
toFrom.computeIfAbsent(to, k -> new ArrayList<>()).add(from);
|
||||
fromTo.computeIfAbsent(from.ord(), k -> new ArrayList<>()).add(to);
|
||||
toFrom.computeIfAbsent(to.ord(), k -> new ArrayList<>()).add(from);
|
||||
}
|
||||
|
||||
public List<QWordGraphLink> links() {
|
||||
@ -103,20 +103,20 @@ public class QWordGraph implements Iterable<QWord> {
|
||||
}
|
||||
|
||||
public List<QWord> getNext(QWord word) {
|
||||
return fromTo.getOrDefault(word, List.of());
|
||||
return fromTo.getOrDefault(word.ord(), List.of());
|
||||
}
|
||||
public List<QWord> getNextOriginal(QWord word) {
|
||||
return fromTo.getOrDefault(word, List.of())
|
||||
return fromTo.getOrDefault(word.ord(), List.of())
|
||||
.stream()
|
||||
.filter(QWord::isOriginal)
|
||||
.toList();
|
||||
}
|
||||
|
||||
public List<QWord> getPrev(QWord word) {
|
||||
return toFrom.getOrDefault(word, List.of());
|
||||
return toFrom.getOrDefault(word.ord(), List.of());
|
||||
}
|
||||
public List<QWord> getPrevOriginal(QWord word) {
|
||||
return toFrom.getOrDefault(word, List.of())
|
||||
return toFrom.getOrDefault(word.ord(), List.of())
|
||||
.stream()
|
||||
.filter(QWord::isOriginal)
|
||||
.toList();
|
||||
|
Loading…
Reference in New Issue
Block a user