MarginaliaSearch/code/services-application/search-service/java/nu/marginalia/search/SearchResultClusterer.java
Viktor Lofgren 1d34224416 (refac) Remove src/main from all source code paths.
Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one.

While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules.  Which you'll do a lot, because it's *modul*ar.  The src/main/java convention makes a lot of sense for a non-modular project though.  This ain't that.
2024-02-23 16:13:40 +01:00

54 lines
1.7 KiB
Java

package nu.marginalia.search;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.UrlDetails;
import java.util.List;
import java.util.stream.Collectors;
/** Functions for clustering search results */
public class SearchResultClusterer {
private SearchResultClusterer() {}
public interface SearchResultClusterStrategy {
List<ClusteredUrlDetails> clusterResults(List<UrlDetails> results, int total);
}
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
if (response.domain() != null && !response.domain().isBlank())
return SearchResultClusterer::noOp;
return SearchResultClusterer::byDomain;
}
/** No clustering, just return the results as is */
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
if (results.isEmpty())
return List.of();
return results.stream()
.map(ClusteredUrlDetails::new)
.toList();
}
/** Cluster the results by domain, and return the top "total" clusters
* sorted by the relevance of the best result
*/
private static List<ClusteredUrlDetails> byDomain(List<UrlDetails> results, int total) {
if (results.isEmpty())
return List.of();
return results.stream()
.collect(
Collectors.groupingBy(details -> details.domainId)
)
.values().stream()
.map(ClusteredUrlDetails::new)
.sorted()
.limit(total)
.toList();
}
}