Fetch more browse:domain-results. (#126)

Co-authored-by: vlofgren <vlofgren@gmail.com>
Co-authored-by: vlofgren <vlofgren@marginalia.nu>
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/126
This commit is contained in:
Viktor Lofgren 2022-10-30 11:33:45 +01:00
parent 395da07abe
commit fd18240ec7
3 changed files with 34 additions and 29 deletions

View File

@ -16,6 +16,8 @@ import java.util.Optional;
public interface EdgeDataStoreDao { public interface EdgeDataStoreDao {
EdgeId<EdgeDomain> getDomainId(EdgeDomain domain); EdgeId<EdgeDomain> getDomainId(EdgeDomain domain);
List<BrowseResult> getDomainNeighborsAdjacentCosine(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count);
List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist backlist, int count); List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist backlist, int count);
List<BrowseResult> getRandomDomains(int count, EdgeDomainBlacklist backlist, int set); List<BrowseResult> getRandomDomains(int count, EdgeDomainBlacklist backlist, int set);

View File

@ -177,12 +177,6 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count) { public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count) {
final Set<BrowseResult> domains = new HashSet<>(count*3); final Set<BrowseResult> domains = new HashSet<>(count*3);
domains.addAll(getDomainNeighborsAdjacentCosine(domainId, blacklist, count));
if (domains.size() >= count) {
return new ArrayList<>(domains);
}
final String q = """ final String q = """
SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT
FROM EC_DOMAIN_NEIGHBORS FROM EC_DOMAIN_NEIGHBORS

View File

@ -23,7 +23,6 @@ import java.util.regex.Pattern;
public class BrowseCommand implements SearchCommandInterface { public class BrowseCommand implements SearchCommandInterface {
private final EdgeDataStoreDao edgeDataStoreDao; private final EdgeDataStoreDao edgeDataStoreDao;
private final ScreenshotService screenshotService;
private final EdgeDomainBlacklist blacklist; private final EdgeDomainBlacklist blacklist;
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer; private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
private final BrowseResultCleaner browseResultCleaner; private final BrowseResultCleaner browseResultCleaner;
@ -32,14 +31,12 @@ public class BrowseCommand implements SearchCommandInterface {
@Inject @Inject
public BrowseCommand(EdgeDataStoreDao edgeDataStoreDao, public BrowseCommand(EdgeDataStoreDao edgeDataStoreDao,
ScreenshotService screenshotService,
EdgeDomainBlacklist blacklist, EdgeDomainBlacklist blacklist,
RendererFactory rendererFactory, RendererFactory rendererFactory,
BrowseResultCleaner browseResultCleaner) BrowseResultCleaner browseResultCleaner)
throws IOException throws IOException
{ {
this.edgeDataStoreDao = edgeDataStoreDao; this.edgeDataStoreDao = edgeDataStoreDao;
this.screenshotService = screenshotService;
this.blacklist = blacklist; this.blacklist = blacklist;
this.browseResultCleaner = browseResultCleaner; this.browseResultCleaner = browseResultCleaner;
@ -61,33 +58,16 @@ public class BrowseCommand implements SearchCommandInterface {
String definePrefix = "browse:"; String definePrefix = "browse:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase(); String word = humanQuery.substring(definePrefix.length()).toLowerCase();
Set<String> domainHashes = new HashSet<>();
try { try {
if ("random".equals(word)) { if ("random".equals(word)) {
var results = edgeDataStoreDao.getRandomDomains(25, blacklist, 0); return getRandomEntries(0);
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
return new BrowseResultSet(results);
} }
if (word.startsWith("random:")) { if (word.startsWith("random:")) {
int set = Integer.parseInt(word.split(":")[1]); int set = Integer.parseInt(word.split(":")[1]);
return getRandomEntries(set);
var results = edgeDataStoreDao.getRandomDomains(25, blacklist, set);
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
return new BrowseResultSet(results);
} }
else { else {
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word)); return getRelatedEntries(word);
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed());
return new BrowseResultSet(neighbors);
} }
} }
catch (Exception ex) { catch (Exception ex) {
@ -96,4 +76,33 @@ public class BrowseCommand implements SearchCommandInterface {
} }
} }
private BrowseResultSet getRandomEntries(int set) {
var results = edgeDataStoreDao.getRandomDomains(25, blacklist, set);
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
return new BrowseResultSet(results);
}
private BrowseResultSet getRelatedEntries(String word) {
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
// If the results are very few, supplement with the alternative shitty algorithm
if (neighbors.size() < 25) {
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
allNeighbors.addAll(edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 50));
neighbors.clear();
neighbors.addAll(allNeighbors);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
}
neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed());
return new BrowseResultSet(neighbors);
}
} }