mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00
(query) Always generate an ngram alternative, suppresses generation of multiple identical query branches
This commit is contained in:
parent
5165cf6d15
commit
64baa41e64
@ -11,6 +11,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/** Responsible for expanding a query, that is creating alternative branches of query execution
|
/** Responsible for expanding a query, that is creating alternative branches of query execution
|
||||||
* to increase the number of results
|
* to increase the number of results
|
||||||
@ -23,7 +24,8 @@ public class QueryExpansion {
|
|||||||
private final List<ExpansionStrategy> expansionStrategies = List.of(
|
private final List<ExpansionStrategy> expansionStrategies = List.of(
|
||||||
this::joinDashes,
|
this::joinDashes,
|
||||||
this::splitWordNum,
|
this::splitWordNum,
|
||||||
this::joinTerms
|
this::joinTerms,
|
||||||
|
this::ngramAll
|
||||||
);
|
);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@ -63,6 +65,22 @@ public class QueryExpansion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void ngramAll(QWordGraph graph) {
|
||||||
|
List<QWord> parts = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var qw : graph) {
|
||||||
|
if (qw.isBeg() || qw.isEnd())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
parts.add(qw);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parts.size() > 1) {
|
||||||
|
graph.addVariantForSpan(parts.getFirst(), parts.getLast(),
|
||||||
|
parts.stream().map(QWord::word).collect(Collectors.joining("_")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Turn 'MP3' into 'MP-3'
|
// Turn 'MP3' into 'MP-3'
|
||||||
public void splitWordNum(QWordGraph graph) {
|
public void splitWordNum(QWordGraph graph) {
|
||||||
for (var qw : graph) {
|
for (var qw : graph) {
|
||||||
|
@ -2,6 +2,8 @@ package nu.marginalia.functions.searchquery.query_parser.model;
|
|||||||
|
|
||||||
import ca.rmen.porterstemmer.PorterStemmer;
|
import ca.rmen.porterstemmer.PorterStemmer;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
public record QWord(
|
public record QWord(
|
||||||
int ord,
|
int ord,
|
||||||
boolean variant,
|
boolean variant,
|
||||||
@ -48,4 +50,22 @@ public record QWord(
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return STR."q{\{word}}";
|
return STR."q{\{word}}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
|
||||||
|
QWord qWord = (QWord) o;
|
||||||
|
return variant == qWord.variant && Objects.equals(word, qWord.word) && Objects.equals(stemmed, qWord.stemmed) && Objects.equals(isOriginal(), qWord.isOriginal());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = Boolean.hashCode(variant);
|
||||||
|
result = 31 * result + Objects.hashCode(stemmed);
|
||||||
|
result = 31 * result + Objects.hashCode(word);
|
||||||
|
result = 31 * result + Objects.hashCode(isOriginal());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,8 +19,8 @@ public class QWordGraph implements Iterable<QWord> {
|
|||||||
public record QWordGraphLink(QWord from, QWord to) {}
|
public record QWordGraphLink(QWord from, QWord to) {}
|
||||||
|
|
||||||
private final List<QWordGraphLink> links = new ArrayList<>();
|
private final List<QWordGraphLink> links = new ArrayList<>();
|
||||||
private final Map<QWord, List<QWord>> fromTo = new HashMap<>();
|
private final Map<Integer, List<QWord>> fromTo = new HashMap<>();
|
||||||
private final Map<QWord, List<QWord>> toFrom = new HashMap<>();
|
private final Map<Integer, List<QWord>> toFrom = new HashMap<>();
|
||||||
|
|
||||||
private int wordId = 0;
|
private int wordId = 0;
|
||||||
|
|
||||||
@ -79,8 +79,8 @@ public class QWordGraph implements Iterable<QWord> {
|
|||||||
|
|
||||||
public void addLink(QWord from, QWord to) {
|
public void addLink(QWord from, QWord to) {
|
||||||
links.add(new QWordGraphLink(from, to));
|
links.add(new QWordGraphLink(from, to));
|
||||||
fromTo.computeIfAbsent(from, k -> new ArrayList<>()).add(to);
|
fromTo.computeIfAbsent(from.ord(), k -> new ArrayList<>()).add(to);
|
||||||
toFrom.computeIfAbsent(to, k -> new ArrayList<>()).add(from);
|
toFrom.computeIfAbsent(to.ord(), k -> new ArrayList<>()).add(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<QWordGraphLink> links() {
|
public List<QWordGraphLink> links() {
|
||||||
@ -103,20 +103,20 @@ public class QWordGraph implements Iterable<QWord> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<QWord> getNext(QWord word) {
|
public List<QWord> getNext(QWord word) {
|
||||||
return fromTo.getOrDefault(word, List.of());
|
return fromTo.getOrDefault(word.ord(), List.of());
|
||||||
}
|
}
|
||||||
public List<QWord> getNextOriginal(QWord word) {
|
public List<QWord> getNextOriginal(QWord word) {
|
||||||
return fromTo.getOrDefault(word, List.of())
|
return fromTo.getOrDefault(word.ord(), List.of())
|
||||||
.stream()
|
.stream()
|
||||||
.filter(QWord::isOriginal)
|
.filter(QWord::isOriginal)
|
||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<QWord> getPrev(QWord word) {
|
public List<QWord> getPrev(QWord word) {
|
||||||
return toFrom.getOrDefault(word, List.of());
|
return toFrom.getOrDefault(word.ord(), List.of());
|
||||||
}
|
}
|
||||||
public List<QWord> getPrevOriginal(QWord word) {
|
public List<QWord> getPrevOriginal(QWord word) {
|
||||||
return toFrom.getOrDefault(word, List.of())
|
return toFrom.getOrDefault(word.ord(), List.of())
|
||||||
.stream()
|
.stream()
|
||||||
.filter(QWord::isOriginal)
|
.filter(QWord::isOriginal)
|
||||||
.toList();
|
.toList();
|
||||||
|
Loading…
Reference in New Issue
Block a user