2023-03-04 12:19:01 +00:00
|
|
|
package nu.marginalia.browse;
|
|
|
|
|
|
|
|
import com.google.inject.Inject;
|
|
|
|
import com.google.inject.Singleton;
|
|
|
|
import com.zaxxer.hikari.HikariDataSource;
|
|
|
|
import nu.marginalia.browse.model.BrowseResult;
|
2023-03-25 14:26:17 +00:00
|
|
|
import nu.marginalia.db.DomainBlacklist;
|
2024-09-27 11:45:54 +00:00
|
|
|
import nu.marginalia.model.EdgeDomain;
|
2023-03-04 12:19:01 +00:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import java.sql.SQLException;
|
2024-09-27 11:45:54 +00:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
2023-03-04 12:19:01 +00:00
|
|
|
|
|
|
|
@Singleton
|
|
|
|
public class DbBrowseDomainsSimilarCosine {
|
|
|
|
|
|
|
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
|
|
|
private final HikariDataSource dataSource;
|
|
|
|
|
|
|
|
@Inject
|
|
|
|
public DbBrowseDomainsSimilarCosine(HikariDataSource dataSource) {
|
|
|
|
this.dataSource = dataSource;
|
|
|
|
}
|
|
|
|
|
2023-12-04 21:10:24 +00:00
|
|
|
public List<BrowseResult> getDomainNeighborsAdjacentCosineRequireScreenshot(int domainId, DomainBlacklist blacklist, int count) {
|
2023-03-04 12:19:01 +00:00
|
|
|
List<BrowseResult> domains = new ArrayList<>(count);
|
|
|
|
|
|
|
|
String q = """
|
|
|
|
SELECT
|
|
|
|
EC_DOMAIN.ID,
|
|
|
|
NV.NEIGHBOR_NAME,
|
2023-12-04 21:10:24 +00:00
|
|
|
NV.RELATEDNESS,
|
|
|
|
EC_DOMAIN.INDEXED
|
2023-03-04 12:19:01 +00:00
|
|
|
FROM EC_NEIGHBORS_VIEW NV
|
|
|
|
INNER JOIN DATA_DOMAIN_SCREENSHOT ON DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME=NV.NEIGHBOR_NAME
|
|
|
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=NV.NEIGHBOR_ID
|
|
|
|
WHERE NV.DOMAIN_ID=?
|
|
|
|
GROUP BY NV.NEIGHBOR_ID
|
|
|
|
ORDER BY NV.RELATEDNESS DESC
|
|
|
|
""";
|
|
|
|
|
|
|
|
try (var connection = dataSource.getConnection()) {
|
|
|
|
try (var stmt = connection.prepareStatement(q)) {
|
|
|
|
stmt.setFetchSize(count);
|
2023-08-24 15:46:02 +00:00
|
|
|
stmt.setInt(1, domainId);
|
2023-03-04 12:19:01 +00:00
|
|
|
stmt.setInt(2, count);
|
|
|
|
var rsp = stmt.executeQuery();
|
|
|
|
while (rsp.next() && domains.size() < count) {
|
|
|
|
int id = rsp.getInt(1);
|
|
|
|
String domain = rsp.getString(2);
|
|
|
|
double relatedness = rsp.getDouble(3);
|
2023-12-04 21:10:24 +00:00
|
|
|
boolean indexed = rsp.getBoolean("INDEXED");
|
2023-03-04 12:19:01 +00:00
|
|
|
|
|
|
|
if (!blacklist.isBlacklisted(id)) {
|
2024-09-27 11:45:54 +00:00
|
|
|
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrlHttp(), id, relatedness, indexed));
|
2023-03-04 12:19:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (SQLException throwables) {
|
|
|
|
throwables.printStackTrace();
|
|
|
|
}
|
|
|
|
|
|
|
|
return domains;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|