mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Merge pull request 'Deduplicate domains in explore mode' (#42) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/42
This commit is contained in:
commit
36cbe80821
@ -16,8 +16,10 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -58,17 +60,25 @@ public class BrowseCommand implements SearchCommandInterface {
|
||||
String definePrefix = "browse:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
Set<String> domainHashes = new HashSet<>();
|
||||
|
||||
try {
|
||||
if ("random".equals(word)) {
|
||||
var results = edgeDataStoreDao.getRandomDomains(25, blacklist);
|
||||
results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
|
||||
results.removeIf(res ->
|
||||
!screenshotService.hasScreenshot(new EdgeId<>(res.domainId))
|
||||
|| !domainHashes.add(res.domainHash()));
|
||||
|
||||
return new BrowseResultSet(results);
|
||||
}
|
||||
else {
|
||||
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
|
||||
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
|
||||
|
||||
neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
neighbors.removeIf(res ->
|
||||
!screenshotService.hasScreenshot(new EdgeId<>(res.domainId))
|
||||
|| !domainHashes.add(res.domainHash()));
|
||||
|
||||
return new BrowseResultSet(neighbors);
|
||||
}
|
||||
|
@ -8,4 +8,12 @@ import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
public class BrowseResult {
|
||||
public final EdgeUrl url;
|
||||
public final int domainId;
|
||||
|
||||
public String domainHash() {
|
||||
var domain = url.domain;
|
||||
if ("www".equals(domain.subDomain)) {
|
||||
return domain.domain;
|
||||
}
|
||||
return domain.toString();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user