mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Prefer cosine similarity relatedness for browse:-queries. (#123)
Co-authored-by: vlofgren <vlofgren@gmail.com> Co-authored-by: vlofgren <vlofgren@marginalia.nu> Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/123
This commit is contained in:
parent
e676d8729e
commit
c559611185
@ -134,10 +134,53 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<BrowseResult> getDomainNeighborsAdjacentCosine(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count) {
|
||||||
|
List<BrowseResult> domains = new ArrayList<>(count);
|
||||||
|
|
||||||
|
String q = """
|
||||||
|
SELECT
|
||||||
|
EC_DOMAIN.ID,
|
||||||
|
NV.NEIGHBOR_NAME
|
||||||
|
FROM EC_NEIGHBORS_VIEW NV
|
||||||
|
INNER JOIN DATA_DOMAIN_SCREENSHOT ON DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME=NV.NEIGHBOR_NAME
|
||||||
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=NV.NEIGHBOR_ID
|
||||||
|
WHERE NV.DOMAIN_ID=?
|
||||||
|
GROUP BY NV.NEIGHBOR_ID
|
||||||
|
ORDER BY NV.RELATEDNESS DESC
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(q)) {
|
||||||
|
stmt.setFetchSize(count);
|
||||||
|
stmt.setInt(1, domainId.id());
|
||||||
|
stmt.setInt(2, count);
|
||||||
|
var rsp = stmt.executeQuery();
|
||||||
|
while (rsp.next() && domains.size() < count) {
|
||||||
|
int id = rsp.getInt(1);
|
||||||
|
String domain = rsp.getString(2);
|
||||||
|
|
||||||
|
if (!blacklist.isBlacklisted(id)) {
|
||||||
|
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (SQLException throwables) {
|
||||||
|
throwables.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
return domains;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count) {
|
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, EdgeDomainBlacklist blacklist, int count) {
|
||||||
final Set<BrowseResult> domains = new HashSet<>(count*3);
|
final Set<BrowseResult> domains = new HashSet<>(count*3);
|
||||||
|
|
||||||
|
domains.addAll(getDomainNeighborsAdjacentCosine(domainId, blacklist, count));
|
||||||
|
|
||||||
|
if (domains.size() >= count) {
|
||||||
|
return new ArrayList<>(domains);
|
||||||
|
}
|
||||||
|
|
||||||
final String q = """
|
final String q = """
|
||||||
SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT
|
SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT
|
||||||
FROM EC_DOMAIN_NEIGHBORS
|
FROM EC_DOMAIN_NEIGHBORS
|
||||||
|
Loading…
Reference in New Issue
Block a user