diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java index a66101dc..f2530c9f 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java @@ -16,6 +16,8 @@ import java.util.Optional; public interface EdgeDataStoreDao { EdgeId getDomainId(EdgeDomain domain); + List getDomainNeighborsAdjacentCosine(EdgeId domainId, EdgeDomainBlacklist blacklist, int count); + List getDomainNeighborsAdjacent(EdgeId domainId, EdgeDomainBlacklist backlist, int count); List getRandomDomains(int count, EdgeDomainBlacklist backlist, int set); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java index 44fe223b..7c0c681b 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java @@ -177,12 +177,6 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { public List getDomainNeighborsAdjacent(EdgeId domainId, EdgeDomainBlacklist blacklist, int count) { final Set domains = new HashSet<>(count*3); - domains.addAll(getDomainNeighborsAdjacentCosine(domainId, blacklist, count)); - - if (domains.size() >= count) { - return new ArrayList<>(domains); - } - final String q = """ SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT FROM EC_DOMAIN_NEIGHBORS diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java index f1db351e..c6434bdf 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java @@ -23,7 +23,6 @@ import java.util.regex.Pattern; public class BrowseCommand implements SearchCommandInterface { private final EdgeDataStoreDao edgeDataStoreDao; - private final ScreenshotService screenshotService; private final EdgeDomainBlacklist blacklist; private final MustacheRenderer browseResultsRenderer; private final BrowseResultCleaner browseResultCleaner; @@ -32,14 +31,12 @@ public class BrowseCommand implements SearchCommandInterface { @Inject public BrowseCommand(EdgeDataStoreDao edgeDataStoreDao, - ScreenshotService screenshotService, EdgeDomainBlacklist blacklist, RendererFactory rendererFactory, BrowseResultCleaner browseResultCleaner) throws IOException { this.edgeDataStoreDao = edgeDataStoreDao; - this.screenshotService = screenshotService; this.blacklist = blacklist; this.browseResultCleaner = browseResultCleaner; @@ -61,33 +58,16 @@ public class BrowseCommand implements SearchCommandInterface { String definePrefix = "browse:"; String word = humanQuery.substring(definePrefix.length()).toLowerCase(); - Set domainHashes = new HashSet<>(); - try { if ("random".equals(word)) { - var results = edgeDataStoreDao.getRandomDomains(25, blacklist, 0); - - results.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); - - return new BrowseResultSet(results); + return getRandomEntries(0); } if (word.startsWith("random:")) { int set = Integer.parseInt(word.split(":")[1]); - - var results = edgeDataStoreDao.getRandomDomains(25, blacklist, set); - - results.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); - - return new BrowseResultSet(results); + return getRandomEntries(set); } else { - var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word)); - var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45); - - neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); - neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed()); - - return new BrowseResultSet(neighbors); + return getRelatedEntries(word); } } catch (Exception ex) { @@ -96,4 +76,33 @@ public class BrowseCommand implements SearchCommandInterface { } } + private BrowseResultSet getRandomEntries(int set) { + var results = edgeDataStoreDao.getRandomDomains(25, blacklist, set); + + results.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); + + return new BrowseResultSet(results); + } + + private BrowseResultSet getRelatedEntries(String word) { + var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word)); + + var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacentCosine(domain, blacklist, 256); + neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); + + // If the results are very few, supplement with the alternative shitty algorithm + if (neighbors.size() < 25) { + Set allNeighbors = new HashSet<>(neighbors); + allNeighbors.addAll(edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 50)); + + neighbors.clear(); + neighbors.addAll(allNeighbors); + neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); + } + + neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed()); + + return new BrowseResultSet(neighbors); + } + }