From 395da07abea528054a50326d711c2789fd5b27e1 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sun, 30 Oct 2022 10:56:01 +0100 Subject: [PATCH] Sort browse:-results by relatedness if possible (#125) Co-authored-by: vlofgren Co-authored-by: vlofgren Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/125 --- .../wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java | 16 +++++++++------- .../wmsa/edge/dating/DatingService.java | 2 +- .../search/command/commands/BrowseCommand.java | 7 +++---- .../wmsa/edge/search/model/BrowseResult.java | 7 +------ .../edge/search/results/BrowseResultCleaner.java | 2 +- 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java index 13eb644a..44fe223b 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java @@ -140,7 +140,8 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { String q = """ SELECT EC_DOMAIN.ID, - NV.NEIGHBOR_NAME + NV.NEIGHBOR_NAME, + NV.RELATEDNESS FROM EC_NEIGHBORS_VIEW NV INNER JOIN DATA_DOMAIN_SCREENSHOT ON DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME=NV.NEIGHBOR_NAME INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=NV.NEIGHBOR_ID @@ -158,9 +159,10 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { while (rsp.next() && domains.size() < count) { int id = rsp.getInt(1); String domain = rsp.getString(2); + double relatedness = rsp.getDouble(3); if (!blacklist.isBlacklisted(id)) { - domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, relatedness)); } } } @@ -209,7 +211,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { String domain = rsp.getString(2); if (!blacklist.isBlacklisted(id)) { - domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0)); } } } @@ -238,7 +240,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { String domain = rsp.getString(2); if (!blacklist.isBlacklisted(id)) { - domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0)); } } } @@ -270,7 +272,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { String domain = rsp.getString(2); if (!blacklist.isBlacklisted(id)) { - domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0)); } } } @@ -307,7 +309,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { String domain = rsp.getString(2); if (!blacklist.isBlacklisted(id)) { - domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0)); } } } @@ -343,7 +345,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { int id = rsp.getInt(1); String domain = rsp.getString(2); - ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id)); + ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0)); } } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java index 1193960b..2cebe7dc 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java @@ -169,7 +169,7 @@ public class DatingService extends Service { @NotNull private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) { - while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId)) || session.isRecent(res)) { + while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || session.isRecent(res)) { res = session.next(edgeDataStoreDao, blacklist); } return res; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java index 68a3a47b..f1db351e 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/command/commands/BrowseCommand.java @@ -8,6 +8,7 @@ import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; import nu.marginalia.wmsa.edge.model.EdgeDomain; import nu.marginalia.wmsa.edge.search.command.SearchCommandInterface; import nu.marginalia.wmsa.edge.search.command.SearchParameters; +import nu.marginalia.wmsa.edge.search.model.BrowseResult; import nu.marginalia.wmsa.edge.search.model.BrowseResultSet; import nu.marginalia.wmsa.edge.search.results.BrowseResultCleaner; import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; @@ -16,10 +17,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.HashSet; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -87,6 +85,7 @@ public class BrowseCommand implements SearchCommandInterface { var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45); neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate()); + neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed()); return new BrowseResultSet(neighbors); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/BrowseResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/BrowseResult.java index 948c3b07..3a65ac47 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/BrowseResult.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/model/BrowseResult.java @@ -1,13 +1,8 @@ package nu.marginalia.wmsa.edge.search.model; -import lombok.Data; -import lombok.EqualsAndHashCode; import nu.marginalia.wmsa.edge.model.EdgeUrl; -@Data @EqualsAndHashCode -public class BrowseResult { - public final EdgeUrl url; - public final int domainId; +public record BrowseResult (EdgeUrl url, int domainId, double relatedness) { public String domainHash() { var domain = url.domain; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/BrowseResultCleaner.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/BrowseResultCleaner.java index 01774205..e178171c 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/BrowseResultCleaner.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/BrowseResultCleaner.java @@ -22,7 +22,7 @@ public class BrowseResultCleaner { public Predicate shouldRemoveResultPredicate() { Set domainHashes = new HashSet<>(100); - return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)) + return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || !domainHashes.add(res.domainHash()); } }