mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(system) Remove EdgeId<T> and similar objects
They seemed like a good idea at the time, but in practice they're wasting resources and not really providing the clarity I had hoped.
This commit is contained in:
parent
c909120ae1
commit
1e6800565a
@ -2,8 +2,6 @@ package nu.marginalia.index.client.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
|
@ -9,16 +9,16 @@ import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
import java.util.OptionalInt;
|
||||
|
||||
@Singleton
|
||||
public class DbDomainQueries {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private final Cache<EdgeDomain, EdgeId<EdgeDomain>> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
|
||||
@Inject
|
||||
public DbDomainQueries(HikariDataSource dataSource)
|
||||
@ -28,7 +28,7 @@ public class DbDomainQueries {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public EdgeId<EdgeDomain> getDomainId(EdgeDomain domain) {
|
||||
public Integer getDomainId(EdgeDomain domain) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
return domainIdCache.get(domain, () -> {
|
||||
@ -36,7 +36,7 @@ public class DbDomainQueries {
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return new EdgeId<>(rsp.getInt(1));
|
||||
return rsp.getInt(1);
|
||||
}
|
||||
}
|
||||
throw new NoSuchElementException();
|
||||
@ -48,12 +48,12 @@ public class DbDomainQueries {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public Optional<EdgeId<EdgeDomain>> tryGetDomainId(EdgeDomain domain) {
|
||||
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||
|
||||
var maybe = Optional.ofNullable(domainIdCache.getIfPresent(domain));
|
||||
|
||||
if (maybe.isPresent())
|
||||
return maybe;
|
||||
Integer maybeId = domainIdCache.getIfPresent(domain);
|
||||
if (maybeId != null) {
|
||||
return OptionalInt.of(maybeId);
|
||||
}
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
@ -61,25 +61,25 @@ public class DbDomainQueries {
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
var id = new EdgeId<EdgeDomain>(rsp.getInt(1));
|
||||
var id = rsp.getInt(1);
|
||||
|
||||
domainIdCache.put(domain, id);
|
||||
return Optional.of(id);
|
||||
return OptionalInt.of(id);
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
return Optional.empty();
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public Optional<EdgeDomain> getDomain(EdgeId<EdgeDomain> id) {
|
||||
public Optional<EdgeDomain> getDomain(int id) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, id.id());
|
||||
stmt.setInt(1, id);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return Optional.of(new EdgeDomain(rsp.getString(1)));
|
||||
|
@ -2,15 +2,10 @@ package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.ImplementedBy;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
@ImplementedBy(DomainBlacklistImpl.class)
|
||||
public interface DomainBlacklist {
|
||||
boolean isBlacklisted(int domainId);
|
||||
default boolean isBlacklisted(EdgeId<EdgeDomain> domainId) {
|
||||
return isBlacklisted(domainId.id());
|
||||
}
|
||||
default TIntHashSet getSpamDomains() {
|
||||
return new TIntHashSet();
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.list.TIntList;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeIdList;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
@ -58,10 +59,10 @@ public class DomainTypes {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Retrieve the EdgeId of all domains of a certain type,
|
||||
/** Retrieve the domain id of all domains of a certain type,
|
||||
* ignoring entries that are not in the EC_DOMAIN table */
|
||||
public EdgeIdList<EdgeDomain> getKnownDomainsByType(Type type) {
|
||||
EdgeIdList<EdgeDomain> ret = new EdgeIdList<>();
|
||||
public TIntList getKnownDomainsByType(Type type) {
|
||||
TIntList ret = new TIntArrayList();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
|
@ -6,7 +6,6 @@ import nu.marginalia.bigstring.BigString;
|
||||
import nu.marginalia.bigstring.CompressedBigString;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
@ -24,8 +23,6 @@ public class GsonFactory {
|
||||
}
|
||||
})
|
||||
.registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
|
||||
.registerTypeAdapter(EdgeId.class, (JsonDeserializer<EdgeId<?>>) (json, typeOfT, context) -> new EdgeId<>(json.getAsInt()))
|
||||
.registerTypeAdapter(EdgeId.class, (JsonSerializer<EdgeId<?>>) (src, typeOfSrc, context) -> new JsonPrimitive(src.id()))
|
||||
.registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString()))
|
||||
.registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
||||
.registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
||||
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
|
||||
/**
|
||||
* This exists entirely for strengthening the typing of IDs
|
||||
* Deprecated: We dont' use this anymore
|
||||
* @param <T>
|
||||
*/
|
||||
@Deprecated
|
||||
public record EdgeId<T>(int id) {
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@Deprecated
|
||||
public record EdgeIdArray<T> (int... values) implements EdgeIdCollection<T> {
|
||||
|
||||
public static <T> EdgeIdArray<T> gather(IntStream stream) {
|
||||
return new EdgeIdArray<>(stream.toArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] values() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return values.length == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.length;
|
||||
}
|
||||
|
||||
public int get(int idx) {
|
||||
return values[idx];
|
||||
}
|
||||
|
||||
public void sort() {
|
||||
Arrays.sort(values);
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@Deprecated
|
||||
public interface EdgeIdCollection<T> extends Iterable<EdgeId<T>> {
|
||||
int size();
|
||||
boolean isEmpty();
|
||||
int[] values();
|
||||
|
||||
default IntStream stream() {
|
||||
return Arrays.stream(values());
|
||||
}
|
||||
|
||||
default Iterator<EdgeId<T>> iterator() {
|
||||
return Arrays.stream(values()).mapToObj(EdgeId<T>::new).iterator();
|
||||
}
|
||||
default EdgeIdArray<T> asArray() {
|
||||
return new EdgeIdArray<>(values());
|
||||
}
|
||||
default EdgeIdList<T> asList() {
|
||||
return new EdgeIdList<>(values());
|
||||
}
|
||||
default EdgeIdSet<T> asSet() {
|
||||
return new EdgeIdSet<>(values());
|
||||
}
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
|
||||
@Deprecated
|
||||
public interface EdgeIdCollectionMutable<T> {
|
||||
TIntCollection underlyingCollection();
|
||||
|
||||
default void addAll(EdgeIdArray<T> other) { underlyingCollection().addAll(other.values()); }
|
||||
default void addAll(EdgeIdList<T> other) { underlyingCollection().addAll(other.list()); }
|
||||
default void addAll(EdgeIdCollection<T> other) { underlyingCollection().addAll(other.values()); }
|
||||
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@Deprecated
|
||||
public record EdgeIdList<T> (TIntArrayList list) implements
|
||||
EdgeIdCollection<T>,
|
||||
EdgeIdCollectionMutable<T> {
|
||||
|
||||
public EdgeIdList(int... values) { this(new TIntArrayList(values)); }
|
||||
public static <T> EdgeIdList<T> gather(IntStream stream) {
|
||||
return stream.collect(EdgeIdList::new, EdgeIdList::add, EdgeIdList::addAll);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] values() {
|
||||
return list.toArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return list.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return list.size();
|
||||
}
|
||||
|
||||
public int get(int idx) {
|
||||
return list.get(idx);
|
||||
}
|
||||
|
||||
public void add(int id) {
|
||||
list.add(id);
|
||||
}
|
||||
|
||||
public void sort() {
|
||||
list.sort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntCollection underlyingCollection() {
|
||||
return list;
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
@Deprecated
|
||||
public record EdgeIdSet<T> (TIntHashSet set) implements EdgeIdCollection<T>, EdgeIdCollectionMutable<T> {
|
||||
|
||||
public EdgeIdSet(int... values) {
|
||||
this(new TIntHashSet(values.length, 0.5f, -1));
|
||||
|
||||
set.addAll(values);
|
||||
}
|
||||
|
||||
public EdgeIdSet(int initialCapacity, float loadFactor) {
|
||||
this(new TIntHashSet(initialCapacity, loadFactor, -1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntCollection underlyingCollection() {
|
||||
return set;
|
||||
}
|
||||
|
||||
public static <T> EdgeIdSet<T> gather(IntStream stream) {
|
||||
return new EdgeIdSet<>(stream.toArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] values() {
|
||||
return set.toArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return set.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return set.size();
|
||||
}
|
||||
|
||||
public boolean contains(int id) {
|
||||
return set.contains(id);
|
||||
}
|
||||
public boolean add(int id) {
|
||||
return set.add(id);
|
||||
}
|
||||
public boolean remove(int id) { return set.remove(id); }
|
||||
|
||||
}
|
@ -1,8 +1,5 @@
|
||||
package nu.marginalia.index.journal.model;
|
||||
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
|
||||
public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {
|
||||
|
@ -1,71 +0,0 @@
|
||||
package nu.marginalia.browse;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeIdCollection;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
|
||||
@Singleton
|
||||
public class DbBrowseDomainsFromUrlId {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
@Inject
|
||||
public DbBrowseDomainsFromUrlId(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
||||
for (var id : ids.values()) {
|
||||
j.add(Integer.toString(id));
|
||||
}
|
||||
return j.toString();
|
||||
}
|
||||
|
||||
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
|
||||
if (urlIds.isEmpty())
|
||||
return Collections.emptyList();
|
||||
|
||||
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
|
||||
|
||||
try (var conn = dataSource.getConnection()) {
|
||||
try (var stmt = conn.createStatement()) {
|
||||
|
||||
String inStmt = idList(urlIds);
|
||||
|
||||
var rsp = stmt.executeQuery("""
|
||||
SELECT DOMAIN_ID, DOMAIN_NAME
|
||||
FROM EC_URL_VIEW
|
||||
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
|
||||
WHERE
|
||||
KNOWN_URLS<5000
|
||||
AND QUALITY>-10
|
||||
AND EC_URL_VIEW.ID IN
|
||||
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
|
||||
while (rsp.next()) {
|
||||
int id = rsp.getInt(1);
|
||||
String domain = rsp.getString(2);
|
||||
|
||||
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("SQL error", ex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -6,7 +6,6 @@ import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -24,7 +23,7 @@ public class DbBrowseDomainsSimilarCosine {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public List<BrowseResult> getDomainNeighborsAdjacentCosine(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) {
|
||||
public List<BrowseResult> getDomainNeighborsAdjacentCosine(int domainId, DomainBlacklist blacklist, int count) {
|
||||
List<BrowseResult> domains = new ArrayList<>(count);
|
||||
|
||||
String q = """
|
||||
@ -43,7 +42,7 @@ public class DbBrowseDomainsSimilarCosine {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(q)) {
|
||||
stmt.setFetchSize(count);
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
stmt.setInt(2, count);
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next() && domains.size() < count) {
|
||||
|
@ -5,10 +5,7 @@ import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.model.id.EdgeIdCollection;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -26,7 +23,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) {
|
||||
public List<BrowseResult> getDomainNeighborsAdjacent(int domainId, DomainBlacklist blacklist, int count) {
|
||||
final Set<BrowseResult> domains = new HashSet<>(count*3);
|
||||
|
||||
final String q = """
|
||||
@ -49,7 +46,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(q)) {
|
||||
stmt.setFetchSize(count);
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
stmt.setInt(2, count);
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next()) {
|
||||
@ -78,7 +75,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
||||
try (var stmt = connection.prepareStatement(q2)) {
|
||||
|
||||
stmt.setFetchSize(count/2);
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
stmt.setInt(2, count/2 - domains.size());
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next() && domains.size() < count/2) {
|
||||
@ -109,7 +106,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
||||
LIMIT ?""";
|
||||
try (var stmt = connection.prepareStatement(q3)) {
|
||||
stmt.setFetchSize(count/2);
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
stmt.setInt(2, count/2 - domains.size());
|
||||
|
||||
var rsp = stmt.executeQuery();
|
||||
@ -165,49 +162,4 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
||||
return domains;
|
||||
}
|
||||
|
||||
|
||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
||||
for (var id : ids.values()) {
|
||||
j.add(Integer.toString(id));
|
||||
}
|
||||
return j.toString();
|
||||
}
|
||||
|
||||
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
|
||||
if (urlIds.isEmpty())
|
||||
return Collections.emptyList();
|
||||
|
||||
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
|
||||
|
||||
try (var conn = dataSource.getConnection()) {
|
||||
try (var stmt = conn.createStatement()) {
|
||||
|
||||
String inStmt = idList(urlIds);
|
||||
|
||||
var rsp = stmt.executeQuery("""
|
||||
SELECT DOMAIN_ID, DOMAIN_NAME
|
||||
FROM EC_URL_VIEW
|
||||
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
|
||||
WHERE
|
||||
KNOWN_URLS<5000
|
||||
AND QUALITY>-10
|
||||
AND EC_URL_VIEW.ID IN
|
||||
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
|
||||
while (rsp.next()) {
|
||||
int id = rsp.getInt(1);
|
||||
String domain = rsp.getString(2);
|
||||
|
||||
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("SQL error", ex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -4,9 +4,7 @@ import com.google.common.base.Strings;
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -30,7 +28,7 @@ public class ScreenshotService {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public boolean hasScreenshot(EdgeId<EdgeDomain> domainId) {
|
||||
public boolean hasScreenshot(int domainId) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("""
|
||||
SELECT TRUE
|
||||
@ -38,7 +36,7 @@ public class ScreenshotService {
|
||||
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
|
||||
WHERE EC_DOMAIN.ID=?
|
||||
""")) {
|
||||
ps.setInt(1, domainId.id());
|
||||
ps.setInt(1, domainId);
|
||||
var rs = ps.executeQuery();
|
||||
if (rs.next()) {
|
||||
return rs.getBoolean(1);
|
||||
@ -86,7 +84,7 @@ public class ScreenshotService {
|
||||
|
||||
private Object serveSvgPlaceholder(Response response, int id) {
|
||||
|
||||
var name = domainQueries.getDomain(new EdgeId<>(id)).map(Object::toString)
|
||||
var name = domainQueries.getDomain(id).map(Object::toString)
|
||||
.orElse("[Screenshot Not Yet Captured]");
|
||||
|
||||
response.type("image/svg+xml");
|
||||
|
@ -15,9 +15,6 @@ import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -11,7 +11,6 @@ import nu.marginalia.db.storage.model.FileStorageId;
|
||||
import nu.marginalia.db.storage.model.FileStorageType;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.model.id.EdgeIdList;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.screenshot.ScreenshotService;
|
||||
import nu.marginalia.service.server.*;
|
||||
@ -224,7 +223,7 @@ public class ControlService extends Service {
|
||||
}
|
||||
});
|
||||
|
||||
randomExplorationService.removeRandomDomains(new EdgeIdList<>(idList.toArray()));
|
||||
randomExplorationService.removeRandomDomains(idList.toArray());
|
||||
|
||||
String after = request.queryParams("after");
|
||||
|
||||
|
@ -2,8 +2,6 @@ package nu.marginalia.control.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeIdList;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
@ -18,7 +16,7 @@ public class RandomExplorationService {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public void removeRandomDomains(EdgeIdList<EdgeDomain> ids) throws SQLException {
|
||||
public void removeRandomDomains(int[] ids) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
DELETE FROM EC_RANDOM_DOMAINS
|
||||
@ -27,7 +25,7 @@ public class RandomExplorationService {
|
||||
"""))
|
||||
{
|
||||
for (var id : ids) {
|
||||
stmt.setInt(1, id.id());
|
||||
stmt.setInt(1, id);
|
||||
stmt.addBatch();
|
||||
}
|
||||
stmt.executeBatch();
|
||||
|
@ -2,13 +2,12 @@ package nu.marginalia.index.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import gnu.trove.list.TIntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.db.DomainTypes;
|
||||
import nu.marginalia.index.IndexServicesFactory;
|
||||
import nu.marginalia.index.searchset.SearchSet;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeIdList;
|
||||
import nu.marginalia.ranking.ReversePageRank;
|
||||
import nu.marginalia.ranking.StandardPageRank;
|
||||
import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator;
|
||||
@ -168,7 +167,7 @@ public class IndexSearchSetsService {
|
||||
|
||||
@SneakyThrows
|
||||
public void updateBlogsSet() {
|
||||
EdgeIdList<EdgeDomain> knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
|
||||
TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
|
||||
|
||||
if (knownDomains.isEmpty()) {
|
||||
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
|
||||
@ -177,7 +176,7 @@ public class IndexSearchSetsService {
|
||||
}
|
||||
|
||||
synchronized (this) {
|
||||
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.values()));
|
||||
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.toArray()));
|
||||
blogsSet.write();
|
||||
}
|
||||
}
|
||||
|
@ -106,16 +106,11 @@ public class SearchOperator {
|
||||
}
|
||||
|
||||
private int getDomainId(String domain) {
|
||||
int domainId = -1;
|
||||
try {
|
||||
if (domain != null) {
|
||||
return domainQueries.getDomainId(new EdgeDomain(domain)).id();
|
||||
}
|
||||
if (domain == null) {
|
||||
return -1;
|
||||
}
|
||||
catch (NoSuchElementException ex) {
|
||||
|
||||
}
|
||||
return domainId;
|
||||
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
||||
}
|
||||
|
||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
||||
|
@ -10,7 +10,6 @@ import nu.marginalia.db.storage.model.FileStorageType;
|
||||
import nu.marginalia.linkdb.LinkdbReader;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.search.client.SearchMqEndpoints;
|
||||
import nu.marginalia.search.db.DbUrlDetailsQuery;
|
||||
import nu.marginalia.search.svc.SearchFrontPageService;
|
||||
import nu.marginalia.search.svc.*;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
@ -30,7 +29,6 @@ import java.nio.file.Path;
|
||||
public class SearchService extends Service {
|
||||
|
||||
private final WebsiteUrl websiteUrl;
|
||||
private final DbUrlDetailsQuery dbUrlDetailsQuery;
|
||||
private final StaticResources staticResources;
|
||||
private final FileStorageService fileStorageService;
|
||||
private final LinkdbReader linkdbReader;
|
||||
@ -42,7 +40,6 @@ public class SearchService extends Service {
|
||||
@Inject
|
||||
public SearchService(BaseServiceParams params,
|
||||
WebsiteUrl websiteUrl,
|
||||
DbUrlDetailsQuery dbUrlDetailsQuery,
|
||||
StaticResources staticResources,
|
||||
SearchFrontPageService frontPageService,
|
||||
SearchErrorPageService errorPageService,
|
||||
@ -57,7 +54,6 @@ public class SearchService extends Service {
|
||||
|
||||
this.eventLog = params.eventLog;
|
||||
this.websiteUrl = websiteUrl;
|
||||
this.dbUrlDetailsQuery = dbUrlDetailsQuery;
|
||||
this.staticResources = staticResources;
|
||||
this.fileStorageService = fileStorageService;
|
||||
this.linkdbReader = linkdbReader;
|
||||
@ -91,12 +87,6 @@ public class SearchService extends Service {
|
||||
Spark.awaitInitialization();
|
||||
}
|
||||
|
||||
@MqNotification(endpoint = SearchMqEndpoints.FLUSH_CACHES)
|
||||
public void flushCaches(String unusedArg) {
|
||||
logger.info("Flushing caches");
|
||||
dbUrlDetailsQuery.clearCaches();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
|
||||
public void switchLinkdb(String unusedArg) {
|
||||
|
@ -61,13 +61,13 @@ public class SiteListCommand implements SearchCommandInterface {
|
||||
|
||||
List<UrlDetails> resultSet;
|
||||
Path screenshotPath = null;
|
||||
Integer domainId = -1;
|
||||
int domainId = -1;
|
||||
if (null != domain) {
|
||||
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
|
||||
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery);
|
||||
var maybeId = domainQueries.tryGetDomainId(domain);
|
||||
if (maybeId.isPresent()) {
|
||||
domainId = maybeId.get().id();
|
||||
domainId = maybeId.getAsInt();
|
||||
screenshotPath = Path.of("/screenshot/" + domainId);
|
||||
}
|
||||
else {
|
||||
|
@ -1,112 +0,0 @@
|
||||
package nu.marginalia.search.db;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.model.id.EdgeIdCollection;
|
||||
import nu.marginalia.search.model.PageScoreAdjustment;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class DbUrlDetailsQuery {
|
||||
private final HikariDataSource dataSource;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final Cache<EdgeUrl, EdgeId<EdgeUrl>> urlIdCache = CacheBuilder.newBuilder().maximumSize(100_000).build();
|
||||
|
||||
public static double QUALITY_LOWER_BOUND_CUTOFF = -15.;
|
||||
@Inject
|
||||
public DbUrlDetailsQuery(HikariDataSource dataSource)
|
||||
{
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
|
||||
public synchronized void clearCaches()
|
||||
{
|
||||
urlIdCache.invalidateAll();
|
||||
}
|
||||
|
||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
||||
for (var id : ids.values()) {
|
||||
j.add(Integer.toString(id));
|
||||
}
|
||||
return j.toString();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public List<UrlDetails> getUrlDetailsMulti(EdgeIdCollection<EdgeUrl> ids) {
|
||||
if (ids.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<UrlDetails> result = new ArrayList<>(ids.size());
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
String idString = idList(ids);
|
||||
|
||||
try (var stmt = connection.prepareStatement(
|
||||
"""
|
||||
SELECT ID, DOMAIN_ID, URL,
|
||||
TITLE, DESCRIPTION,
|
||||
QUALITY,
|
||||
WORDS_TOTAL, FORMAT, FEATURES,
|
||||
IP, DOMAIN_STATE,
|
||||
DATA_HASH
|
||||
FROM EC_URL_VIEW
|
||||
WHERE TITLE IS NOT NULL
|
||||
AND ID IN
|
||||
""" + idString)) {
|
||||
stmt.setFetchSize(ids.size());
|
||||
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next()) {
|
||||
var val = new UrlDetails(rsp.getInt(1),
|
||||
rsp.getInt(2),
|
||||
new EdgeUrl(rsp.getString(3)),
|
||||
rsp.getString(4), // title
|
||||
rsp.getString(5), // description
|
||||
rsp.getDouble(6), // quality
|
||||
rsp.getInt(7), // wordsTotal
|
||||
rsp.getString(8), // format
|
||||
rsp.getInt(9), // features
|
||||
rsp.getString(10), // ip
|
||||
DomainIndexingState.valueOf(rsp.getString(11)), // domainState
|
||||
rsp.getLong(12), // dataHash
|
||||
PageScoreAdjustment.zero(), // urlQualityAdjustment
|
||||
Integer.MAX_VALUE, // rankingId
|
||||
Double.MAX_VALUE, // termScore
|
||||
1, // resultsFromSameDomain
|
||||
"", // positions
|
||||
null, // result item
|
||||
null // keyword scores
|
||||
);
|
||||
if (val.urlQuality <= QUALITY_LOWER_BOUND_CUTOFF
|
||||
&& Strings.isNullOrEmpty(val.description)
|
||||
&& val.url.path.length() > 1) {
|
||||
continue;
|
||||
}
|
||||
result.add(val);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.screenshot.ScreenshotService;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
@ -22,7 +21,7 @@ public class BrowseResultCleaner {
|
||||
public Predicate<BrowseResult> shouldRemoveResultPredicate() {
|
||||
Set<String> domainHashes = new HashSet<>(100);
|
||||
|
||||
return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId()))
|
||||
return (res) -> !screenshotService.hasScreenshot(res.domainId())
|
||||
|| !domainHashes.add(res.domainHash());
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,6 @@ import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.search.model.DomainInformation;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -13,10 +12,7 @@ import org.slf4j.LoggerFactory;
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
TODO: This class needs to be refactored, a lot of
|
||||
@ -42,10 +38,11 @@ public class DomainInformationService {
|
||||
|
||||
public Optional<DomainInformation> domainInfo(String site) {
|
||||
|
||||
EdgeId<EdgeDomain> domainId = getDomainFromPartial(site);
|
||||
if (domainId == null) {
|
||||
OptionalInt maybeDomainId = getDomainFromPartial(site);
|
||||
if (maybeDomainId.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
int domainId = maybeDomainId.getAsInt();
|
||||
|
||||
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
||||
if (domain.isEmpty()) {
|
||||
@ -85,7 +82,7 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private boolean inCrawlQueue(EdgeId<EdgeDomain> domainId) {
|
||||
private boolean inCrawlQueue(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(
|
||||
"""
|
||||
@ -94,21 +91,15 @@ public class DomainInformationService {
|
||||
WHERE EC_DOMAIN.ID=?
|
||||
"""))
|
||||
{
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
return rsp.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private EdgeId<EdgeDomain> getDomainFromPartial(String site) {
|
||||
try {
|
||||
return dbDomainQueries.getDomainId(new EdgeDomain(site));
|
||||
}
|
||||
catch (Exception ex) {
|
||||
return null;
|
||||
}
|
||||
|
||||
private OptionalInt getDomainFromPartial(String site) {
|
||||
return dbDomainQueries.tryGetDomainId(new EdgeDomain(site));
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@ -125,11 +116,11 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public int getPagesKnown(EdgeId<EdgeDomain> domainId) {
|
||||
public int getPagesKnown(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
@ -142,11 +133,11 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public int getPagesVisited(EdgeId<EdgeDomain> domainId) {
|
||||
public int getPagesVisited(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
@ -160,11 +151,11 @@ public class DomainInformationService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public int getPagesIndexed(EdgeId<EdgeDomain> domainId) {
|
||||
public int getPagesIndexed(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
@ -177,11 +168,11 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public int getIncomingLinks(EdgeId<EdgeDomain> domainId) {
|
||||
public int getIncomingLinks(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
@ -193,11 +184,11 @@ public class DomainInformationService {
|
||||
}
|
||||
}
|
||||
@SneakyThrows
|
||||
public int getOutboundLinks(EdgeId<EdgeDomain> domainId) {
|
||||
public int getOutboundLinks(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
@ -210,11 +201,11 @@ public class DomainInformationService {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public double getDomainQuality(EdgeId<EdgeDomain> domainId) {
|
||||
public double getDomainQuality(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getDouble(1);
|
||||
@ -226,11 +217,11 @@ public class DomainInformationService {
|
||||
}
|
||||
}
|
||||
|
||||
public DomainIndexingState getDomainState(EdgeId<EdgeDomain> domainId) {
|
||||
public DomainIndexingState getDomainState(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return DomainIndexingState.valueOf(rsp.getString(1));
|
||||
@ -244,11 +235,11 @@ public class DomainInformationService {
|
||||
return DomainIndexingState.ERROR;
|
||||
}
|
||||
|
||||
public List<EdgeDomain> getLinkingDomains(EdgeId<EdgeDomain> domainId) {
|
||||
public List<EdgeDomain> getLinkingDomains(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
List<EdgeDomain> results = new ArrayList<>(25);
|
||||
try (var stmt = connection.prepareStatement("SELECT SOURCE_DOMAIN FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next()) {
|
||||
results.add(new EdgeDomain(rsp.getString(1)));
|
||||
@ -264,11 +255,11 @@ public class DomainInformationService {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
public double getRank(EdgeId<EdgeDomain> domainId) {
|
||||
public double getRank(int domainId) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, domainId.id());
|
||||
stmt.setInt(1, domainId);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getDouble(1);
|
||||
|
@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
@ -61,7 +60,7 @@ public class SearchAddToCrawlQueueService {
|
||||
}
|
||||
|
||||
private String getDomainName(int id) {
|
||||
var domain = domainQueries.getDomain(new EdgeId<>(id));
|
||||
var domain = domainQueries.getDomain(id);
|
||||
if (domain.isEmpty())
|
||||
Spark.halt(404);
|
||||
return domain.get().toString();
|
||||
|
@ -9,7 +9,6 @@ import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.screenshot.ScreenshotService;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.service.server.*;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import spark.Request;
|
||||
@ -156,7 +155,7 @@ public class DatingService extends Service {
|
||||
var session = sessionObjectOpt.get();
|
||||
|
||||
int id = Integer.parseInt(request.params("id"));
|
||||
BrowseResult res = session.nextSimilar(new EdgeId<>(id), browseSimilarCosine, blacklist);
|
||||
BrowseResult res = session.nextSimilar(id, browseSimilarCosine, blacklist);
|
||||
|
||||
res = findViableDomain(session, res);
|
||||
|
||||
@ -168,7 +167,7 @@ public class DatingService extends Service {
|
||||
|
||||
@NotNull
|
||||
private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) {
|
||||
while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || session.isRecent(res)) {
|
||||
while (!screenshotService.hasScreenshot(res.domainId()) || session.isRecent(res)) {
|
||||
res = session.next(browseRandom, blacklist);
|
||||
}
|
||||
return res;
|
||||
|
@ -3,9 +3,7 @@ package nu.marginalia.dating;
|
||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
@ -29,8 +27,8 @@ public class DatingSessionObject {
|
||||
return queue.pollFirst();
|
||||
}
|
||||
|
||||
public BrowseResult nextSimilar(EdgeId<EdgeDomain> id, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
|
||||
adjacent.getDomainNeighborsAdjacentCosine(id, blacklist, 25).forEach(queue::addFirst);
|
||||
public BrowseResult nextSimilar(int domainId, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
|
||||
adjacent.getDomainNeighborsAdjacentCosine(domainId, blacklist, 25).forEach(queue::addFirst);
|
||||
|
||||
while (queue.size() > MAX_QUEUE_SIZE) {
|
||||
queue.removeLast();
|
||||
|
@ -5,7 +5,6 @@ import lombok.SneakyThrows;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
|
||||
@ -40,8 +39,7 @@ public class WebsiteAdjacenciesCalculator {
|
||||
System.out.println(Arrays.toString(domainName));
|
||||
|
||||
int[] domainIds = Arrays.stream(domainName).map(EdgeDomain::new)
|
||||
.map(dataStoreDao::getDomainId)
|
||||
.mapToInt(EdgeId::id)
|
||||
.mapToInt(dataStoreDao::getDomainId)
|
||||
.map(domainAliases::deAlias)
|
||||
.toArray();
|
||||
|
||||
@ -49,7 +47,7 @@ public class WebsiteAdjacenciesCalculator {
|
||||
findAdjacentDtoS(domainId, similarities -> {
|
||||
for (var similarity : similarities.similarities()) {
|
||||
if (adjacenciesData.isIndexedDomain(similarity.domainId)) System.out.print("*");
|
||||
System.out.println(dataStoreDao.getDomain(new EdgeId<>(similarity.domainId)).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value));
|
||||
System.out.println(dataStoreDao.getDomain(similarity.domainId).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user