(system) Remove EdgeId<T> and similar objects

They seemed like a good idea at the time, but in practice they're wasting resources and not really providing the clarity I had hoped.
This commit is contained in:
Viktor Lofgren 2023-08-24 17:46:02 +02:00
parent c909120ae1
commit 1e6800565a
30 changed files with 74 additions and 548 deletions

View File

@ -2,8 +2,6 @@ package nu.marginalia.index.client.model.results;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.id.UrlIdCodec;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;

View File

@ -9,16 +9,16 @@ import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeId;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Optional; import java.util.Optional;
import java.util.OptionalInt;
@Singleton @Singleton
public class DbDomainQueries { public class DbDomainQueries {
private final HikariDataSource dataSource; private final HikariDataSource dataSource;
private final Cache<EdgeDomain, EdgeId<EdgeDomain>> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build(); private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
@Inject @Inject
public DbDomainQueries(HikariDataSource dataSource) public DbDomainQueries(HikariDataSource dataSource)
@ -28,7 +28,7 @@ public class DbDomainQueries {
@SneakyThrows @SneakyThrows
public EdgeId<EdgeDomain> getDomainId(EdgeDomain domain) { public Integer getDomainId(EdgeDomain domain) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
return domainIdCache.get(domain, () -> { return domainIdCache.get(domain, () -> {
@ -36,7 +36,7 @@ public class DbDomainQueries {
stmt.setString(1, domain.toString()); stmt.setString(1, domain.toString());
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return new EdgeId<>(rsp.getInt(1)); return rsp.getInt(1);
} }
} }
throw new NoSuchElementException(); throw new NoSuchElementException();
@ -48,12 +48,12 @@ public class DbDomainQueries {
} }
@SneakyThrows @SneakyThrows
public Optional<EdgeId<EdgeDomain>> tryGetDomainId(EdgeDomain domain) { public OptionalInt tryGetDomainId(EdgeDomain domain) {
var maybe = Optional.ofNullable(domainIdCache.getIfPresent(domain)); Integer maybeId = domainIdCache.getIfPresent(domain);
if (maybeId != null) {
if (maybe.isPresent()) return OptionalInt.of(maybeId);
return maybe; }
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
@ -61,25 +61,25 @@ public class DbDomainQueries {
stmt.setString(1, domain.toString()); stmt.setString(1, domain.toString());
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
var id = new EdgeId<EdgeDomain>(rsp.getInt(1)); var id = rsp.getInt(1);
domainIdCache.put(domain, id); domainIdCache.put(domain, id);
return Optional.of(id); return OptionalInt.of(id);
} }
} }
return Optional.empty(); return OptionalInt.empty();
} }
catch (UncheckedExecutionException ex) { catch (UncheckedExecutionException ex) {
return Optional.empty(); return OptionalInt.empty();
} }
} }
@SneakyThrows @SneakyThrows
public Optional<EdgeDomain> getDomain(EdgeId<EdgeDomain> id) { public Optional<EdgeDomain> getDomain(int id) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
stmt.setInt(1, id.id()); stmt.setInt(1, id);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return Optional.of(new EdgeDomain(rsp.getString(1))); return Optional.of(new EdgeDomain(rsp.getString(1)));

View File

@ -2,15 +2,10 @@ package nu.marginalia.db;
import com.google.inject.ImplementedBy; import com.google.inject.ImplementedBy;
import gnu.trove.set.hash.TIntHashSet; import gnu.trove.set.hash.TIntHashSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeId;
@ImplementedBy(DomainBlacklistImpl.class) @ImplementedBy(DomainBlacklistImpl.class)
public interface DomainBlacklist { public interface DomainBlacklist {
boolean isBlacklisted(int domainId); boolean isBlacklisted(int domainId);
default boolean isBlacklisted(EdgeId<EdgeDomain> domainId) {
return isBlacklisted(domainId.id());
}
default TIntHashSet getSpamDomains() { default TIntHashSet getSpamDomains() {
return new TIntHashSet(); return new TIntHashSet();
} }

View File

@ -1,8 +1,9 @@
package nu.marginalia.db; package nu.marginalia.db;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeIdList;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -58,10 +59,10 @@ public class DomainTypes {
return ret; return ret;
} }
/** Retrieve the EdgeId of all domains of a certain type, /** Retrieve the domain id of all domains of a certain type,
* ignoring entries that are not in the EC_DOMAIN table */ * ignoring entries that are not in the EC_DOMAIN table */
public EdgeIdList<EdgeDomain> getKnownDomainsByType(Type type) { public TIntList getKnownDomainsByType(Type type) {
EdgeIdList<EdgeDomain> ret = new EdgeIdList<>(); TIntList ret = new TIntArrayList();
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""

View File

@ -6,7 +6,6 @@ import nu.marginalia.bigstring.BigString;
import nu.marginalia.bigstring.CompressedBigString; import nu.marginalia.bigstring.CompressedBigString;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.EdgeId;
import java.net.URISyntaxException; import java.net.URISyntaxException;
@ -24,8 +23,6 @@ public class GsonFactory {
} }
}) })
.registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString())) .registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
.registerTypeAdapter(EdgeId.class, (JsonDeserializer<EdgeId<?>>) (json, typeOfT, context) -> new EdgeId<>(json.getAsInt()))
.registerTypeAdapter(EdgeId.class, (JsonSerializer<EdgeId<?>>) (src, typeOfSrc, context) -> new JsonPrimitive(src.id()))
.registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString())) .registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString()))
.registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode())) .registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
.registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode())) .registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))

View File

@ -1,11 +0,0 @@
package nu.marginalia.model.id;
/**
* This exists entirely for strengthening the typing of IDs
* Deprecated: We dont' use this anymore
* @param <T>
*/
@Deprecated
public record EdgeId<T>(int id) {
}

View File

@ -1,35 +0,0 @@
package nu.marginalia.model.id;
import java.util.Arrays;
import java.util.stream.IntStream;
@Deprecated
public record EdgeIdArray<T> (int... values) implements EdgeIdCollection<T> {
public static <T> EdgeIdArray<T> gather(IntStream stream) {
return new EdgeIdArray<>(stream.toArray());
}
@Override
public int[] values() {
return values;
}
@Override
public boolean isEmpty() {
return values.length == 0;
}
@Override
public int size() {
return values.length;
}
public int get(int idx) {
return values[idx];
}
public void sort() {
Arrays.sort(values);
}
}

View File

@ -1,29 +0,0 @@
package nu.marginalia.model.id;
import java.util.Arrays;
import java.util.Iterator;
import java.util.stream.IntStream;
@Deprecated
public interface EdgeIdCollection<T> extends Iterable<EdgeId<T>> {
int size();
boolean isEmpty();
int[] values();
default IntStream stream() {
return Arrays.stream(values());
}
default Iterator<EdgeId<T>> iterator() {
return Arrays.stream(values()).mapToObj(EdgeId<T>::new).iterator();
}
default EdgeIdArray<T> asArray() {
return new EdgeIdArray<>(values());
}
default EdgeIdList<T> asList() {
return new EdgeIdList<>(values());
}
default EdgeIdSet<T> asSet() {
return new EdgeIdSet<>(values());
}
}

View File

@ -1,13 +0,0 @@
package nu.marginalia.model.id;
import gnu.trove.TIntCollection;
@Deprecated
public interface EdgeIdCollectionMutable<T> {
TIntCollection underlyingCollection();
default void addAll(EdgeIdArray<T> other) { underlyingCollection().addAll(other.values()); }
default void addAll(EdgeIdList<T> other) { underlyingCollection().addAll(other.list()); }
default void addAll(EdgeIdCollection<T> other) { underlyingCollection().addAll(other.values()); }
}

View File

@ -1,49 +0,0 @@
package nu.marginalia.model.id;
import gnu.trove.TIntCollection;
import gnu.trove.list.array.TIntArrayList;
import java.util.stream.IntStream;
@Deprecated
public record EdgeIdList<T> (TIntArrayList list) implements
EdgeIdCollection<T>,
EdgeIdCollectionMutable<T> {
public EdgeIdList(int... values) { this(new TIntArrayList(values)); }
public static <T> EdgeIdList<T> gather(IntStream stream) {
return stream.collect(EdgeIdList::new, EdgeIdList::add, EdgeIdList::addAll);
}
@Override
public int[] values() {
return list.toArray();
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
@Override
public int size() {
return list.size();
}
public int get(int idx) {
return list.get(idx);
}
public void add(int id) {
list.add(id);
}
public void sort() {
list.sort();
}
@Override
public TIntCollection underlyingCollection() {
return list;
}
}

View File

@ -1,53 +0,0 @@
package nu.marginalia.model.id;
import gnu.trove.TIntCollection;
import gnu.trove.set.hash.TIntHashSet;
import java.util.stream.IntStream;
@Deprecated
public record EdgeIdSet<T> (TIntHashSet set) implements EdgeIdCollection<T>, EdgeIdCollectionMutable<T> {
public EdgeIdSet(int... values) {
this(new TIntHashSet(values.length, 0.5f, -1));
set.addAll(values);
}
public EdgeIdSet(int initialCapacity, float loadFactor) {
this(new TIntHashSet(initialCapacity, loadFactor, -1));
}
@Override
public TIntCollection underlyingCollection() {
return set;
}
public static <T> EdgeIdSet<T> gather(IntStream stream) {
return new EdgeIdSet<>(stream.toArray());
}
@Override
public int[] values() {
return set.toArray();
}
@Override
public boolean isEmpty() {
return set.isEmpty();
}
@Override
public int size() {
return set.size();
}
public boolean contains(int id) {
return set.contains(id);
}
public boolean add(int id) {
return set.add(id);
}
public boolean remove(int id) { return set.remove(id); }
}

View File

@ -1,8 +1,5 @@
package nu.marginalia.index.journal.model; package nu.marginalia.index.journal.model;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.id.UrlIdCodec;
public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) { public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {

View File

@ -1,71 +0,0 @@
package nu.marginalia.browse;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.EdgeIdCollection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.*;
@Singleton
public class DbBrowseDomainsFromUrlId {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final HikariDataSource dataSource;
@Inject
public DbBrowseDomainsFromUrlId(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
StringJoiner j = new StringJoiner(",", "(", ")");
for (var id : ids.values()) {
j.add(Integer.toString(id));
}
return j.toString();
}
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
if (urlIds.isEmpty())
return Collections.emptyList();
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
try (var conn = dataSource.getConnection()) {
try (var stmt = conn.createStatement()) {
String inStmt = idList(urlIds);
var rsp = stmt.executeQuery("""
SELECT DOMAIN_ID, DOMAIN_NAME
FROM EC_URL_VIEW
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
WHERE
KNOWN_URLS<5000
AND QUALITY>-10
AND EC_URL_VIEW.ID IN
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
while (rsp.next()) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
catch (SQLException ex) {
logger.error("SQL error", ex);
}
return ret;
}
}

View File

@ -6,7 +6,6 @@ import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DomainBlacklist; import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -24,7 +23,7 @@ public class DbBrowseDomainsSimilarCosine {
this.dataSource = dataSource; this.dataSource = dataSource;
} }
public List<BrowseResult> getDomainNeighborsAdjacentCosine(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) { public List<BrowseResult> getDomainNeighborsAdjacentCosine(int domainId, DomainBlacklist blacklist, int count) {
List<BrowseResult> domains = new ArrayList<>(count); List<BrowseResult> domains = new ArrayList<>(count);
String q = """ String q = """
@ -43,7 +42,7 @@ public class DbBrowseDomainsSimilarCosine {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(q)) { try (var stmt = connection.prepareStatement(q)) {
stmt.setFetchSize(count); stmt.setFetchSize(count);
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
stmt.setInt(2, count); stmt.setInt(2, count);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
while (rsp.next() && domains.size() < count) { while (rsp.next() && domains.size() < count) {

View File

@ -5,10 +5,7 @@ import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.db.DomainBlacklist; import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.model.id.EdgeIdCollection;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -26,7 +23,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
this.dataSource = dataSource; this.dataSource = dataSource;
} }
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) { public List<BrowseResult> getDomainNeighborsAdjacent(int domainId, DomainBlacklist blacklist, int count) {
final Set<BrowseResult> domains = new HashSet<>(count*3); final Set<BrowseResult> domains = new HashSet<>(count*3);
final String q = """ final String q = """
@ -49,7 +46,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(q)) { try (var stmt = connection.prepareStatement(q)) {
stmt.setFetchSize(count); stmt.setFetchSize(count);
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
stmt.setInt(2, count); stmt.setInt(2, count);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
while (rsp.next()) { while (rsp.next()) {
@ -78,7 +75,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
try (var stmt = connection.prepareStatement(q2)) { try (var stmt = connection.prepareStatement(q2)) {
stmt.setFetchSize(count/2); stmt.setFetchSize(count/2);
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
stmt.setInt(2, count/2 - domains.size()); stmt.setInt(2, count/2 - domains.size());
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
while (rsp.next() && domains.size() < count/2) { while (rsp.next() && domains.size() < count/2) {
@ -109,7 +106,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
LIMIT ?"""; LIMIT ?""";
try (var stmt = connection.prepareStatement(q3)) { try (var stmt = connection.prepareStatement(q3)) {
stmt.setFetchSize(count/2); stmt.setFetchSize(count/2);
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
stmt.setInt(2, count/2 - domains.size()); stmt.setInt(2, count/2 - domains.size());
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
@ -165,49 +162,4 @@ public class DbBrowseDomainsSimilarOldAlgo {
return domains; return domains;
} }
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
StringJoiner j = new StringJoiner(",", "(", ")");
for (var id : ids.values()) {
j.add(Integer.toString(id));
}
return j.toString();
}
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
if (urlIds.isEmpty())
return Collections.emptyList();
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
try (var conn = dataSource.getConnection()) {
try (var stmt = conn.createStatement()) {
String inStmt = idList(urlIds);
var rsp = stmt.executeQuery("""
SELECT DOMAIN_ID, DOMAIN_NAME
FROM EC_URL_VIEW
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
WHERE
KNOWN_URLS<5000
AND QUALITY>-10
AND EC_URL_VIEW.ID IN
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
while (rsp.next()) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
catch (SQLException ex) {
logger.error("SQL error", ex);
}
return ret;
}
} }

View File

@ -4,9 +4,7 @@ import com.google.common.base.Strings;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -30,7 +28,7 @@ public class ScreenshotService {
this.dataSource = dataSource; this.dataSource = dataSource;
} }
public boolean hasScreenshot(EdgeId<EdgeDomain> domainId) { public boolean hasScreenshot(int domainId) {
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var ps = conn.prepareStatement(""" var ps = conn.prepareStatement("""
SELECT TRUE SELECT TRUE
@ -38,7 +36,7 @@ public class ScreenshotService {
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
WHERE EC_DOMAIN.ID=? WHERE EC_DOMAIN.ID=?
""")) { """)) {
ps.setInt(1, domainId.id()); ps.setInt(1, domainId);
var rs = ps.executeQuery(); var rs = ps.executeQuery();
if (rs.next()) { if (rs.next()) {
return rs.getBoolean(1); return rs.getBoolean(1);
@ -86,7 +84,7 @@ public class ScreenshotService {
private Object serveSvgPlaceholder(Response response, int id) { private Object serveSvgPlaceholder(Response response, int id) {
var name = domainQueries.getDomain(new EdgeId<>(id)).map(Object::toString) var name = domainQueries.getDomain(id).map(Object::toString)
.orElse("[Screenshot Not Yet Captured]"); .orElse("[Screenshot Not Yet Captured]");
response.type("image/svg+xml"); response.type("image/svg+xml");

View File

@ -15,9 +15,6 @@ import nu.marginalia.lexicon.KeywordLexicon;
import nu.marginalia.lexicon.journal.KeywordLexiconJournal; import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode; import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.model.idx.DocumentMetadata;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.id.EdgeId;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -11,7 +11,6 @@ import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType; import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.model.id.EdgeIdList;
import nu.marginalia.renderer.RendererFactory; import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService; import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.service.server.*; import nu.marginalia.service.server.*;
@ -224,7 +223,7 @@ public class ControlService extends Service {
} }
}); });
randomExplorationService.removeRandomDomains(new EdgeIdList<>(idList.toArray())); randomExplorationService.removeRandomDomains(idList.toArray());
String after = request.queryParams("after"); String after = request.queryParams("after");

View File

@ -2,8 +2,6 @@ package nu.marginalia.control.svc;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeIdList;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
@ -18,7 +16,7 @@ public class RandomExplorationService {
this.dataSource = dataSource; this.dataSource = dataSource;
} }
public void removeRandomDomains(EdgeIdList<EdgeDomain> ids) throws SQLException { public void removeRandomDomains(int[] ids) throws SQLException {
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""
DELETE FROM EC_RANDOM_DOMAINS DELETE FROM EC_RANDOM_DOMAINS
@ -27,7 +25,7 @@ public class RandomExplorationService {
""")) """))
{ {
for (var id : ids) { for (var id : ids) {
stmt.setInt(1, id.id()); stmt.setInt(1, id);
stmt.addBatch(); stmt.addBatch();
} }
stmt.executeBatch(); stmt.executeBatch();

View File

@ -2,13 +2,12 @@ package nu.marginalia.index.svc;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import gnu.trove.list.TIntList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.db.DomainTypes; import nu.marginalia.db.DomainTypes;
import nu.marginalia.index.IndexServicesFactory; import nu.marginalia.index.IndexServicesFactory;
import nu.marginalia.index.searchset.SearchSet; import nu.marginalia.index.searchset.SearchSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeIdList;
import nu.marginalia.ranking.ReversePageRank; import nu.marginalia.ranking.ReversePageRank;
import nu.marginalia.ranking.StandardPageRank; import nu.marginalia.ranking.StandardPageRank;
import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator; import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator;
@ -168,7 +167,7 @@ public class IndexSearchSetsService {
@SneakyThrows @SneakyThrows
public void updateBlogsSet() { public void updateBlogsSet() {
EdgeIdList<EdgeDomain> knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG); TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
if (knownDomains.isEmpty()) { if (knownDomains.isEmpty()) {
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe? // FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
@ -177,7 +176,7 @@ public class IndexSearchSetsService {
} }
synchronized (this) { synchronized (this) {
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.values())); blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.toArray()));
blogsSet.write(); blogsSet.write();
} }
} }

View File

@ -106,16 +106,11 @@ public class SearchOperator {
} }
private int getDomainId(String domain) { private int getDomainId(String domain) {
int domainId = -1; if (domain == null) {
try { return -1;
if (domain != null) {
return domainQueries.getDomainId(new EdgeDomain(domain)).id();
}
} }
catch (NoSuchElementException ex) {
} return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
return domainId;
} }
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) { private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {

View File

@ -10,7 +10,6 @@ import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.linkdb.LinkdbReader; import nu.marginalia.linkdb.LinkdbReader;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.search.client.SearchMqEndpoints; import nu.marginalia.search.client.SearchMqEndpoints;
import nu.marginalia.search.db.DbUrlDetailsQuery;
import nu.marginalia.search.svc.SearchFrontPageService; import nu.marginalia.search.svc.SearchFrontPageService;
import nu.marginalia.search.svc.*; import nu.marginalia.search.svc.*;
import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceEventLog;
@ -30,7 +29,6 @@ import java.nio.file.Path;
public class SearchService extends Service { public class SearchService extends Service {
private final WebsiteUrl websiteUrl; private final WebsiteUrl websiteUrl;
private final DbUrlDetailsQuery dbUrlDetailsQuery;
private final StaticResources staticResources; private final StaticResources staticResources;
private final FileStorageService fileStorageService; private final FileStorageService fileStorageService;
private final LinkdbReader linkdbReader; private final LinkdbReader linkdbReader;
@ -42,7 +40,6 @@ public class SearchService extends Service {
@Inject @Inject
public SearchService(BaseServiceParams params, public SearchService(BaseServiceParams params,
WebsiteUrl websiteUrl, WebsiteUrl websiteUrl,
DbUrlDetailsQuery dbUrlDetailsQuery,
StaticResources staticResources, StaticResources staticResources,
SearchFrontPageService frontPageService, SearchFrontPageService frontPageService,
SearchErrorPageService errorPageService, SearchErrorPageService errorPageService,
@ -57,7 +54,6 @@ public class SearchService extends Service {
this.eventLog = params.eventLog; this.eventLog = params.eventLog;
this.websiteUrl = websiteUrl; this.websiteUrl = websiteUrl;
this.dbUrlDetailsQuery = dbUrlDetailsQuery;
this.staticResources = staticResources; this.staticResources = staticResources;
this.fileStorageService = fileStorageService; this.fileStorageService = fileStorageService;
this.linkdbReader = linkdbReader; this.linkdbReader = linkdbReader;
@ -91,12 +87,6 @@ public class SearchService extends Service {
Spark.awaitInitialization(); Spark.awaitInitialization();
} }
@MqNotification(endpoint = SearchMqEndpoints.FLUSH_CACHES)
public void flushCaches(String unusedArg) {
logger.info("Flushing caches");
dbUrlDetailsQuery.clearCaches();
}
@SneakyThrows @SneakyThrows
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB) @MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
public void switchLinkdb(String unusedArg) { public void switchLinkdb(String unusedArg) {

View File

@ -61,13 +61,13 @@ public class SiteListCommand implements SearchCommandInterface {
List<UrlDetails> resultSet; List<UrlDetails> resultSet;
Path screenshotPath = null; Path screenshotPath = null;
Integer domainId = -1; int domainId = -1;
if (null != domain) { if (null != domain) {
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain); var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery); resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery);
var maybeId = domainQueries.tryGetDomainId(domain); var maybeId = domainQueries.tryGetDomainId(domain);
if (maybeId.isPresent()) { if (maybeId.isPresent()) {
domainId = maybeId.get().id(); domainId = maybeId.getAsInt();
screenshotPath = Path.of("/screenshot/" + domainId); screenshotPath = Path.of("/screenshot/" + domainId);
} }
else { else {

View File

@ -1,112 +0,0 @@
package nu.marginalia.search.db;
import com.google.common.base.Strings;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.model.id.EdgeIdCollection;
import nu.marginalia.search.model.PageScoreAdjustment;
import nu.marginalia.search.model.UrlDetails;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
public class DbUrlDetailsQuery {
private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Cache<EdgeUrl, EdgeId<EdgeUrl>> urlIdCache = CacheBuilder.newBuilder().maximumSize(100_000).build();
public static double QUALITY_LOWER_BOUND_CUTOFF = -15.;
@Inject
public DbUrlDetailsQuery(HikariDataSource dataSource)
{
this.dataSource = dataSource;
}
public synchronized void clearCaches()
{
urlIdCache.invalidateAll();
}
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
StringJoiner j = new StringJoiner(",", "(", ")");
for (var id : ids.values()) {
j.add(Integer.toString(id));
}
return j.toString();
}
@SneakyThrows
public List<UrlDetails> getUrlDetailsMulti(EdgeIdCollection<EdgeUrl> ids) {
if (ids.isEmpty()) {
return Collections.emptyList();
}
List<UrlDetails> result = new ArrayList<>(ids.size());
try (var connection = dataSource.getConnection()) {
String idString = idList(ids);
try (var stmt = connection.prepareStatement(
"""
SELECT ID, DOMAIN_ID, URL,
TITLE, DESCRIPTION,
QUALITY,
WORDS_TOTAL, FORMAT, FEATURES,
IP, DOMAIN_STATE,
DATA_HASH
FROM EC_URL_VIEW
WHERE TITLE IS NOT NULL
AND ID IN
""" + idString)) {
stmt.setFetchSize(ids.size());
var rsp = stmt.executeQuery();
while (rsp.next()) {
var val = new UrlDetails(rsp.getInt(1),
rsp.getInt(2),
new EdgeUrl(rsp.getString(3)),
rsp.getString(4), // title
rsp.getString(5), // description
rsp.getDouble(6), // quality
rsp.getInt(7), // wordsTotal
rsp.getString(8), // format
rsp.getInt(9), // features
rsp.getString(10), // ip
DomainIndexingState.valueOf(rsp.getString(11)), // domainState
rsp.getLong(12), // dataHash
PageScoreAdjustment.zero(), // urlQualityAdjustment
Integer.MAX_VALUE, // rankingId
Double.MAX_VALUE, // termScore
1, // resultsFromSameDomain
"", // positions
null, // result item
null // keyword scores
);
if (val.urlQuality <= QUALITY_LOWER_BOUND_CUTOFF
&& Strings.isNullOrEmpty(val.description)
&& val.url.path.length() > 1) {
continue;
}
result.add(val);
}
}
}
return result;
}
}

View File

@ -4,7 +4,6 @@ import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.screenshot.ScreenshotService; import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.model.id.EdgeId;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
@ -22,7 +21,7 @@ public class BrowseResultCleaner {
public Predicate<BrowseResult> shouldRemoveResultPredicate() { public Predicate<BrowseResult> shouldRemoveResultPredicate() {
Set<String> domainHashes = new HashSet<>(100); Set<String> domainHashes = new HashSet<>(100);
return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) return (res) -> !screenshotService.hasScreenshot(res.domainId())
|| !domainHashes.add(res.domainHash()); || !domainHashes.add(res.domainHash());
} }
} }

View File

@ -5,7 +5,6 @@ import lombok.SneakyThrows;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.search.model.DomainInformation; import nu.marginalia.search.model.DomainInformation;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -13,10 +12,7 @@ import org.slf4j.LoggerFactory;
import javax.inject.Inject; import javax.inject.Inject;
import javax.inject.Singleton; import javax.inject.Singleton;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.*;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
/* /*
TODO: This class needs to be refactored, a lot of TODO: This class needs to be refactored, a lot of
@ -42,10 +38,11 @@ public class DomainInformationService {
public Optional<DomainInformation> domainInfo(String site) { public Optional<DomainInformation> domainInfo(String site) {
EdgeId<EdgeDomain> domainId = getDomainFromPartial(site); OptionalInt maybeDomainId = getDomainFromPartial(site);
if (domainId == null) { if (maybeDomainId.isEmpty()) {
return Optional.empty(); return Optional.empty();
} }
int domainId = maybeDomainId.getAsInt();
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId); Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
if (domain.isEmpty()) { if (domain.isEmpty()) {
@ -85,7 +82,7 @@ public class DomainInformationService {
} }
@SneakyThrows @SneakyThrows
private boolean inCrawlQueue(EdgeId<EdgeDomain> domainId) { private boolean inCrawlQueue(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement( try (var stmt = connection.prepareStatement(
""" """
@ -94,21 +91,15 @@ public class DomainInformationService {
WHERE EC_DOMAIN.ID=? WHERE EC_DOMAIN.ID=?
""")) """))
{ {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
return rsp.next(); return rsp.next();
} }
} }
} }
private EdgeId<EdgeDomain> getDomainFromPartial(String site) { private OptionalInt getDomainFromPartial(String site) {
try { return dbDomainQueries.tryGetDomainId(new EdgeDomain(site));
return dbDomainQueries.getDomainId(new EdgeDomain(site));
}
catch (Exception ex) {
return null;
}
} }
@SneakyThrows @SneakyThrows
@ -125,11 +116,11 @@ public class DomainInformationService {
} }
@SneakyThrows @SneakyThrows
public int getPagesKnown(EdgeId<EdgeDomain> domainId) { public int getPagesKnown(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getInt(1); return rsp.getInt(1);
@ -142,11 +133,11 @@ public class DomainInformationService {
} }
@SneakyThrows @SneakyThrows
public int getPagesVisited(EdgeId<EdgeDomain> domainId) { public int getPagesVisited(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getInt(1); return rsp.getInt(1);
@ -160,11 +151,11 @@ public class DomainInformationService {
@SneakyThrows @SneakyThrows
public int getPagesIndexed(EdgeId<EdgeDomain> domainId) { public int getPagesIndexed(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getInt(1); return rsp.getInt(1);
@ -177,11 +168,11 @@ public class DomainInformationService {
} }
@SneakyThrows @SneakyThrows
public int getIncomingLinks(EdgeId<EdgeDomain> domainId) { public int getIncomingLinks(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) { try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getInt(1); return rsp.getInt(1);
@ -193,11 +184,11 @@ public class DomainInformationService {
} }
} }
@SneakyThrows @SneakyThrows
public int getOutboundLinks(EdgeId<EdgeDomain> domainId) { public int getOutboundLinks(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) { try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getInt(1); return rsp.getInt(1);
@ -210,11 +201,11 @@ public class DomainInformationService {
} }
@SneakyThrows @SneakyThrows
public double getDomainQuality(EdgeId<EdgeDomain> domainId) { public double getDomainQuality(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getDouble(1); return rsp.getDouble(1);
@ -226,11 +217,11 @@ public class DomainInformationService {
} }
} }
public DomainIndexingState getDomainState(EdgeId<EdgeDomain> domainId) { public DomainIndexingState getDomainState(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return DomainIndexingState.valueOf(rsp.getString(1)); return DomainIndexingState.valueOf(rsp.getString(1));
@ -244,11 +235,11 @@ public class DomainInformationService {
return DomainIndexingState.ERROR; return DomainIndexingState.ERROR;
} }
public List<EdgeDomain> getLinkingDomains(EdgeId<EdgeDomain> domainId) { public List<EdgeDomain> getLinkingDomains(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
List<EdgeDomain> results = new ArrayList<>(25); List<EdgeDomain> results = new ArrayList<>(25);
try (var stmt = connection.prepareStatement("SELECT SOURCE_DOMAIN FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) { try (var stmt = connection.prepareStatement("SELECT SOURCE_DOMAIN FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
while (rsp.next()) { while (rsp.next()) {
results.add(new EdgeDomain(rsp.getString(1))); results.add(new EdgeDomain(rsp.getString(1)));
@ -264,11 +255,11 @@ public class DomainInformationService {
return Collections.emptyList(); return Collections.emptyList();
} }
public double getRank(EdgeId<EdgeDomain> domainId) { public double getRank(int domainId) {
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) { try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) {
stmt.setInt(1, domainId.id()); stmt.setInt(1, domainId);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
if (rsp.next()) { if (rsp.next()) {
return rsp.getDouble(1); return rsp.getDouble(1);

View File

@ -4,7 +4,6 @@ import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.WebsiteUrl; import nu.marginalia.WebsiteUrl;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import spark.Request; import spark.Request;
@ -61,7 +60,7 @@ public class SearchAddToCrawlQueueService {
} }
private String getDomainName(int id) { private String getDomainName(int id) {
var domain = domainQueries.getDomain(new EdgeId<>(id)); var domain = domainQueries.getDomain(id);
if (domain.isEmpty()) if (domain.isEmpty())
Spark.halt(404); Spark.halt(404);
return domain.get().toString(); return domain.get().toString();

View File

@ -9,7 +9,6 @@ import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory; import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService; import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.service.server.*; import nu.marginalia.service.server.*;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import spark.Request; import spark.Request;
@ -156,7 +155,7 @@ public class DatingService extends Service {
var session = sessionObjectOpt.get(); var session = sessionObjectOpt.get();
int id = Integer.parseInt(request.params("id")); int id = Integer.parseInt(request.params("id"));
BrowseResult res = session.nextSimilar(new EdgeId<>(id), browseSimilarCosine, blacklist); BrowseResult res = session.nextSimilar(id, browseSimilarCosine, blacklist);
res = findViableDomain(session, res); res = findViableDomain(session, res);
@ -168,7 +167,7 @@ public class DatingService extends Service {
@NotNull @NotNull
private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) { private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) {
while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || session.isRecent(res)) { while (!screenshotService.hasScreenshot(res.domainId()) || session.isRecent(res)) {
res = session.next(browseRandom, blacklist); res = session.next(browseRandom, blacklist);
} }
return res; return res;

View File

@ -3,9 +3,7 @@ package nu.marginalia.dating;
import nu.marginalia.browse.DbBrowseDomainsRandom; import nu.marginalia.browse.DbBrowseDomainsRandom;
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine; import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DomainBlacklist; import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import java.util.LinkedList; import java.util.LinkedList;
@ -29,8 +27,8 @@ public class DatingSessionObject {
return queue.pollFirst(); return queue.pollFirst();
} }
public BrowseResult nextSimilar(EdgeId<EdgeDomain> id, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) { public BrowseResult nextSimilar(int domainId, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
adjacent.getDomainNeighborsAdjacentCosine(id, blacklist, 25).forEach(queue::addFirst); adjacent.getDomainNeighborsAdjacentCosine(domainId, blacklist, 25).forEach(queue::addFirst);
while (queue.size() > MAX_QUEUE_SIZE) { while (queue.size() > MAX_QUEUE_SIZE) {
queue.removeLast(); queue.removeLast();

View File

@ -5,7 +5,6 @@ import lombok.SneakyThrows;
import nu.marginalia.ProcessConfiguration; import nu.marginalia.ProcessConfiguration;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat;
import nu.marginalia.service.module.DatabaseModule; import nu.marginalia.service.module.DatabaseModule;
@ -40,8 +39,7 @@ public class WebsiteAdjacenciesCalculator {
System.out.println(Arrays.toString(domainName)); System.out.println(Arrays.toString(domainName));
int[] domainIds = Arrays.stream(domainName).map(EdgeDomain::new) int[] domainIds = Arrays.stream(domainName).map(EdgeDomain::new)
.map(dataStoreDao::getDomainId) .mapToInt(dataStoreDao::getDomainId)
.mapToInt(EdgeId::id)
.map(domainAliases::deAlias) .map(domainAliases::deAlias)
.toArray(); .toArray();
@ -49,7 +47,7 @@ public class WebsiteAdjacenciesCalculator {
findAdjacentDtoS(domainId, similarities -> { findAdjacentDtoS(domainId, similarities -> {
for (var similarity : similarities.similarities()) { for (var similarity : similarities.similarities()) {
if (adjacenciesData.isIndexedDomain(similarity.domainId)) System.out.print("*"); if (adjacenciesData.isIndexedDomain(similarity.domainId)) System.out.print("*");
System.out.println(dataStoreDao.getDomain(new EdgeId<>(similarity.domainId)).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value)); System.out.println(dataStoreDao.getDomain(similarity.domainId).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value));
} }
}); });
} }