mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00

Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one. While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules. Which you'll do a lot, because it's *modul*ar. The src/main/java convention makes a lot of sense for a non-modular project though. This ain't that.
54 lines
1.7 KiB
Java
54 lines
1.7 KiB
Java
package nu.marginalia.search;
|
|
|
|
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
|
import nu.marginalia.search.model.ClusteredUrlDetails;
|
|
import nu.marginalia.search.model.UrlDetails;
|
|
|
|
import java.util.List;
|
|
import java.util.stream.Collectors;
|
|
|
|
/** Functions for clustering search results */
|
|
public class SearchResultClusterer {
|
|
private SearchResultClusterer() {}
|
|
|
|
public interface SearchResultClusterStrategy {
|
|
List<ClusteredUrlDetails> clusterResults(List<UrlDetails> results, int total);
|
|
}
|
|
|
|
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
|
|
if (response.domain() != null && !response.domain().isBlank())
|
|
return SearchResultClusterer::noOp;
|
|
|
|
return SearchResultClusterer::byDomain;
|
|
}
|
|
|
|
/** No clustering, just return the results as is */
|
|
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
|
|
if (results.isEmpty())
|
|
return List.of();
|
|
|
|
return results.stream()
|
|
.map(ClusteredUrlDetails::new)
|
|
.toList();
|
|
}
|
|
|
|
/** Cluster the results by domain, and return the top "total" clusters
|
|
* sorted by the relevance of the best result
|
|
*/
|
|
private static List<ClusteredUrlDetails> byDomain(List<UrlDetails> results, int total) {
|
|
if (results.isEmpty())
|
|
return List.of();
|
|
|
|
return results.stream()
|
|
.collect(
|
|
Collectors.groupingBy(details -> details.domainId)
|
|
)
|
|
.values().stream()
|
|
.map(ClusteredUrlDetails::new)
|
|
.sorted()
|
|
.limit(total)
|
|
.toList();
|
|
}
|
|
|
|
}
|