mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(qs, WIP) Tidy it up a bit
This commit is contained in:
parent
eda926767e
commit
0bd1e15cce
@ -32,6 +32,8 @@ public class QWordPath {
|
|||||||
return nodes.stream();
|
return nodes.stream();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Construct a new path by projecting the path onto a set of nodes, such that
|
||||||
|
* the nodes in the new set is a strict subset of the provided nodes */
|
||||||
public QWordPath project(Set<QWord> nodes) {
|
public QWordPath project(Set<QWord> nodes) {
|
||||||
return new QWordPath(this.nodes.stream().filter(nodes::contains).collect(Collectors.toSet()));
|
return new QWordPath(this.nodes.stream().filter(nodes::contains).collect(Collectors.toSet()));
|
||||||
}
|
}
|
||||||
|
@ -21,17 +21,26 @@ class QWordPathsRenderer {
|
|||||||
return new QWordPathsRenderer(graph).render(graph.reachability());
|
return new QWordPathsRenderer(graph).render(graph.reachability());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Render the paths into a human-readable infix-style expression.
|
||||||
|
* <p></p>
|
||||||
|
* This method is recursive, but the recursion depth is limited by the
|
||||||
|
* maximum length of the paths, which is hard limited to a value typically around 10,
|
||||||
|
* so we don't need to worry about stack overflows here...
|
||||||
|
*/
|
||||||
String render(QWordGraph.ReachabilityData reachability) {
|
String render(QWordGraph.ReachabilityData reachability) {
|
||||||
if (paths.size() == 1) {
|
if (paths.size() == 1) {
|
||||||
return paths.iterator().next().stream().map(QWord::word).collect(Collectors.joining(" "));
|
return paths.iterator().next().stream().map(QWord::word).collect(Collectors.joining(" "));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find the commonality of words in the paths
|
||||||
|
|
||||||
Map<QWord, Integer> commonality = paths.stream().flatMap(QWordPath::stream)
|
Map<QWord, Integer> commonality = paths.stream().flatMap(QWordPath::stream)
|
||||||
.collect(Collectors.groupingBy(w -> w, Collectors.summingInt(w -> 1)));
|
.collect(Collectors.groupingBy(w -> w, Collectors.summingInt(w -> 1)));
|
||||||
|
|
||||||
Set<QWord> commonToAll = new HashSet<>();
|
// Break the words into two categories: those that are common to all paths, and those that are not
|
||||||
Set<QWord> notCommonToAll = new HashSet<>();
|
|
||||||
|
|
||||||
|
List<QWord> commonToAll = new ArrayList<>();
|
||||||
|
Set<QWord> notCommonToAll = new HashSet<>();
|
||||||
commonality.forEach((k, v) -> {
|
commonality.forEach((k, v) -> {
|
||||||
if (v == paths.size()) {
|
if (v == paths.size()) {
|
||||||
commonToAll.add(k);
|
commonToAll.add(k);
|
||||||
@ -40,33 +49,32 @@ class QWordPathsRenderer {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
StringJoiner concat = new StringJoiner(" ");
|
StringJoiner resultJoiner = new StringJoiner(" ");
|
||||||
if (!commonToAll.isEmpty()) { // Case where one or more words are common to all paths
|
|
||||||
|
|
||||||
commonToAll.stream()
|
if (!commonToAll.isEmpty()) { // Case where one or more words are common to all paths
|
||||||
.sorted(reachability.topologicalComparator())
|
commonToAll.sort(reachability.topologicalComparator());
|
||||||
.map(QWord::word)
|
|
||||||
.forEach(concat::add);
|
for (var word : commonToAll) {
|
||||||
|
resultJoiner.add(word.word());
|
||||||
|
}
|
||||||
|
|
||||||
// Deal portion of the paths that do not all share a common word
|
// Deal portion of the paths that do not all share a common word
|
||||||
if (!notCommonToAll.isEmpty()) {
|
if (!notCommonToAll.isEmpty()) {
|
||||||
|
|
||||||
List<QWordPath> nonOverlappingPortions = new ArrayList<>();
|
List<QWordPath> nonOverlappingPortions = new ArrayList<>();
|
||||||
|
|
||||||
|
// Create a new path for each path that does not contain the common words we just printed
|
||||||
for (var path : paths) {
|
for (var path : paths) {
|
||||||
// Project the path onto the divergent nodes (i.e. remove common nodes)
|
|
||||||
var np = path.project(notCommonToAll);
|
var np = path.project(notCommonToAll);
|
||||||
if (np.isEmpty())
|
if (np.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
nonOverlappingPortions.add(np);
|
nonOverlappingPortions.add(np);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nonOverlappingPortions.size() > 1) {
|
// Recurse into the non-overlapping portions
|
||||||
|
if (!nonOverlappingPortions.isEmpty()) {
|
||||||
var wp = new QWordPathsRenderer(nonOverlappingPortions);
|
var wp = new QWordPathsRenderer(nonOverlappingPortions);
|
||||||
concat.add(wp.render(reachability));
|
resultJoiner.add(wp.render(reachability));
|
||||||
} else if (!nonOverlappingPortions.isEmpty()) {
|
|
||||||
var wp = new QWordPathsRenderer(nonOverlappingPortions);
|
|
||||||
concat.add(wp.render(reachability));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (commonality.size() > 1) { // The case where no words are common to all paths
|
} else if (commonality.size() > 1) { // The case where no words are common to all paths
|
||||||
@ -79,6 +87,7 @@ class QWordPathsRenderer {
|
|||||||
// Mutable copy of the paths
|
// Mutable copy of the paths
|
||||||
List<QWordPath> allDivergentPaths = new ArrayList<>(paths);
|
List<QWordPath> allDivergentPaths = new ArrayList<>(paths);
|
||||||
|
|
||||||
|
// Break the paths into branches by the first common word they contain, in order of decreasing commonality
|
||||||
for (var commonWord : byCommonality) {
|
for (var commonWord : byCommonality) {
|
||||||
if (allDivergentPaths.isEmpty())
|
if (allDivergentPaths.isEmpty())
|
||||||
break;
|
break;
|
||||||
@ -91,10 +100,15 @@ class QWordPathsRenderer {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove the common word from the path
|
||||||
|
var newPath = path.without(commonWord);
|
||||||
|
|
||||||
pathsByCommonWord
|
pathsByCommonWord
|
||||||
.computeIfAbsent(commonWord, k -> new ArrayList<>())
|
.computeIfAbsent(commonWord, k -> new ArrayList<>())
|
||||||
.add(path.without(commonWord)); // Remove the common word from the path
|
.add(newPath);
|
||||||
|
|
||||||
|
// Remove the path from the list of divergent paths since we've now accounted for it and
|
||||||
|
// we don't want redundant branches:
|
||||||
iter.remove();
|
iter.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -103,17 +117,17 @@ class QWordPathsRenderer {
|
|||||||
.sorted(Map.Entry.comparingByKey(reachability.topologicalComparator())) // Sort by topological order to ensure consistent output
|
.sorted(Map.Entry.comparingByKey(reachability.topologicalComparator())) // Sort by topological order to ensure consistent output
|
||||||
.map(e -> {
|
.map(e -> {
|
||||||
String commonWord = e.getKey().word();
|
String commonWord = e.getKey().word();
|
||||||
|
// Recurse into the branches:
|
||||||
String branchPart = new QWordPathsRenderer(e.getValue()).render(reachability);
|
String branchPart = new QWordPathsRenderer(e.getValue()).render(reachability);
|
||||||
return STR."\{commonWord} \{branchPart}";
|
return STR."\{commonWord} \{branchPart}";
|
||||||
})
|
})
|
||||||
.collect(Collectors.joining(" | ", " ( ", " ) "));
|
.collect(Collectors.joining(" | ", " ( ", " ) "));
|
||||||
|
|
||||||
concat.add(branches);
|
resultJoiner.add(branches);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove any double spaces that may have been introduced
|
// Remove any double spaces that may have been introduced
|
||||||
return concat.toString().replaceAll("\\s+", " ").trim();
|
return resultJoiner.toString().replaceAll("\\s+", " ").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user