Sort results by relatedness where possible.

This commit is contained in:
vlofgren 2022-10-30 10:49:37 +01:00
parent 4a296f70e1
commit b97f425f7e
5 changed files with 15 additions and 19 deletions

View File

@ -140,7 +140,8 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String q = """
SELECT
EC_DOMAIN.ID,
NV.NEIGHBOR_NAME
NV.NEIGHBOR_NAME,
NV.RELATEDNESS
FROM EC_NEIGHBORS_VIEW NV
INNER JOIN DATA_DOMAIN_SCREENSHOT ON DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME=NV.NEIGHBOR_NAME
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=NV.NEIGHBOR_ID
@ -158,9 +159,10 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
while (rsp.next() && domains.size() < count) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
double relatedness = rsp.getDouble(3);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, relatedness));
}
}
}
@ -209,7 +211,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
@ -238,7 +240,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
@ -270,7 +272,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
@ -307,7 +309,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
@ -343,7 +345,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}

View File

@ -169,7 +169,7 @@ public class DatingService extends Service {
@NotNull
private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) {
while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId)) || session.isRecent(res)) {
while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || session.isRecent(res)) {
res = session.next(edgeDataStoreDao, blacklist);
}
return res;

View File

@ -8,6 +8,7 @@ import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.search.command.SearchCommandInterface;
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import nu.marginalia.wmsa.edge.search.model.BrowseResultSet;
import nu.marginalia.wmsa.edge.search.results.BrowseResultCleaner;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
@ -16,10 +17,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@ -87,6 +85,7 @@ public class BrowseCommand implements SearchCommandInterface {
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
neighbors.sort(Comparator.comparing(BrowseResult::relatedness).reversed());
return new BrowseResultSet(neighbors);
}

View File

@ -1,13 +1,8 @@
package nu.marginalia.wmsa.edge.search.model;
import lombok.Data;
import lombok.EqualsAndHashCode;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
@Data @EqualsAndHashCode
public class BrowseResult {
public final EdgeUrl url;
public final int domainId;
public record BrowseResult (EdgeUrl url, int domainId, double relatedness) {
public String domainHash() {
var domain = url.domain;

View File

@ -22,7 +22,7 @@ public class BrowseResultCleaner {
public Predicate<BrowseResult> shouldRemoveResultPredicate() {
Set<String> domainHashes = new HashSet<>(100);
return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId))
return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId()))
|| !domainHashes.add(res.domainHash());
}
}