mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00

Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one. While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules. Which you'll do a lot, because it's *modul*ar. The src/main/java convention makes a lot of sense for a non-modular project though. This ain't that.
61 lines
2.0 KiB
Java
61 lines
2.0 KiB
Java
package nu.marginalia.ranking.domains;
|
|
|
|
import gnu.trove.list.TIntList;
|
|
import gnu.trove.list.array.TIntArrayList;
|
|
import nu.marginalia.ranking.domains.accumulator.RankingResultAccumulator;
|
|
import nu.marginalia.ranking.domains.data.GraphSource;
|
|
import nu.marginalia.ranking.domains.jgrapht.PersonalizedPageRank;
|
|
import org.jgrapht.Graph;
|
|
import org.jgrapht.alg.interfaces.VertexScoringAlgorithm;
|
|
import org.jgrapht.alg.scoring.PageRank;
|
|
|
|
import java.util.Comparator;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.function.Supplier;
|
|
|
|
public class PageRankDomainRanker implements RankingAlgorithm {
|
|
private final List<Integer> influenceSet;
|
|
private final Graph<Integer, ?> graph;
|
|
|
|
public PageRankDomainRanker(GraphSource source,
|
|
List<Integer> influenceSet)
|
|
{
|
|
this.influenceSet = influenceSet;
|
|
this.graph = source.getGraph();
|
|
}
|
|
|
|
public static PageRankDomainRanker forDomainNames(GraphSource source,
|
|
List<String> influenceSet)
|
|
{
|
|
return new PageRankDomainRanker(source, source.domainIds(influenceSet));
|
|
}
|
|
|
|
@Override
|
|
public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) {
|
|
VertexScoringAlgorithm<Integer, Double> pageRank;
|
|
|
|
if (influenceSet != null && !influenceSet.isEmpty()) {
|
|
pageRank = new PersonalizedPageRank<>(graph, influenceSet);
|
|
}
|
|
else {
|
|
pageRank = new PageRank<>(graph);
|
|
}
|
|
|
|
TIntList results = new TIntArrayList(resultCount);
|
|
pageRank.getScores().entrySet()
|
|
.stream()
|
|
.sorted(Comparator.comparing((Map.Entry<Integer, Double> e) -> -e.getValue()))
|
|
.limit(resultCount)
|
|
.map(Map.Entry::getKey)
|
|
.forEach(results::add);
|
|
|
|
var accumulator = accumulatorP.get();
|
|
for (int i = 0; i < results.size(); i++) {
|
|
accumulator.add(results.get(i), i);
|
|
}
|
|
return accumulator.get();
|
|
}
|
|
|
|
}
|