mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(system) Remove EdgeId<T> and similar objects
They seemed like a good idea at the time, but in practice they're wasting resources and not really providing the clarity I had hoped.
This commit is contained in:
parent
c909120ae1
commit
1e6800565a
@ -2,8 +2,6 @@ package nu.marginalia.index.client.model.results;
|
|||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.model.id.UrlIdCodec;
|
import nu.marginalia.model.id.UrlIdCodec;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
@ -9,16 +9,16 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
|
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.OptionalInt;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class DbDomainQueries {
|
public class DbDomainQueries {
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
private final Cache<EdgeDomain, EdgeId<EdgeDomain>> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public DbDomainQueries(HikariDataSource dataSource)
|
public DbDomainQueries(HikariDataSource dataSource)
|
||||||
@ -28,7 +28,7 @@ public class DbDomainQueries {
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public EdgeId<EdgeDomain> getDomainId(EdgeDomain domain) {
|
public Integer getDomainId(EdgeDomain domain) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
return domainIdCache.get(domain, () -> {
|
return domainIdCache.get(domain, () -> {
|
||||||
@ -36,7 +36,7 @@ public class DbDomainQueries {
|
|||||||
stmt.setString(1, domain.toString());
|
stmt.setString(1, domain.toString());
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return new EdgeId<>(rsp.getInt(1));
|
return rsp.getInt(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
@ -48,12 +48,12 @@ public class DbDomainQueries {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public Optional<EdgeId<EdgeDomain>> tryGetDomainId(EdgeDomain domain) {
|
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||||
|
|
||||||
var maybe = Optional.ofNullable(domainIdCache.getIfPresent(domain));
|
Integer maybeId = domainIdCache.getIfPresent(domain);
|
||||||
|
if (maybeId != null) {
|
||||||
if (maybe.isPresent())
|
return OptionalInt.of(maybeId);
|
||||||
return maybe;
|
}
|
||||||
|
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
@ -61,25 +61,25 @@ public class DbDomainQueries {
|
|||||||
stmt.setString(1, domain.toString());
|
stmt.setString(1, domain.toString());
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
var id = new EdgeId<EdgeDomain>(rsp.getInt(1));
|
var id = rsp.getInt(1);
|
||||||
|
|
||||||
domainIdCache.put(domain, id);
|
domainIdCache.put(domain, id);
|
||||||
return Optional.of(id);
|
return OptionalInt.of(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Optional.empty();
|
return OptionalInt.empty();
|
||||||
}
|
}
|
||||||
catch (UncheckedExecutionException ex) {
|
catch (UncheckedExecutionException ex) {
|
||||||
return Optional.empty();
|
return OptionalInt.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public Optional<EdgeDomain> getDomain(EdgeId<EdgeDomain> id) {
|
public Optional<EdgeDomain> getDomain(int id) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
||||||
stmt.setInt(1, id.id());
|
stmt.setInt(1, id);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return Optional.of(new EdgeDomain(rsp.getString(1)));
|
return Optional.of(new EdgeDomain(rsp.getString(1)));
|
||||||
|
@ -2,15 +2,10 @@ package nu.marginalia.db;
|
|||||||
|
|
||||||
import com.google.inject.ImplementedBy;
|
import com.google.inject.ImplementedBy;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
|
|
||||||
@ImplementedBy(DomainBlacklistImpl.class)
|
@ImplementedBy(DomainBlacklistImpl.class)
|
||||||
public interface DomainBlacklist {
|
public interface DomainBlacklist {
|
||||||
boolean isBlacklisted(int domainId);
|
boolean isBlacklisted(int domainId);
|
||||||
default boolean isBlacklisted(EdgeId<EdgeDomain> domainId) {
|
|
||||||
return isBlacklisted(domainId.id());
|
|
||||||
}
|
|
||||||
default TIntHashSet getSpamDomains() {
|
default TIntHashSet getSpamDomains() {
|
||||||
return new TIntHashSet();
|
return new TIntHashSet();
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
package nu.marginalia.db;
|
package nu.marginalia.db;
|
||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import gnu.trove.list.TIntList;
|
||||||
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.id.EdgeIdList;
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
|
||||||
@ -58,10 +59,10 @@ public class DomainTypes {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Retrieve the EdgeId of all domains of a certain type,
|
/** Retrieve the domain id of all domains of a certain type,
|
||||||
* ignoring entries that are not in the EC_DOMAIN table */
|
* ignoring entries that are not in the EC_DOMAIN table */
|
||||||
public EdgeIdList<EdgeDomain> getKnownDomainsByType(Type type) {
|
public TIntList getKnownDomainsByType(Type type) {
|
||||||
EdgeIdList<EdgeDomain> ret = new EdgeIdList<>();
|
TIntList ret = new TIntArrayList();
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
|
@ -6,7 +6,6 @@ import nu.marginalia.bigstring.BigString;
|
|||||||
import nu.marginalia.bigstring.CompressedBigString;
|
import nu.marginalia.bigstring.CompressedBigString;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
|
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
|
||||||
@ -24,8 +23,6 @@ public class GsonFactory {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
.registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
|
.registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
|
||||||
.registerTypeAdapter(EdgeId.class, (JsonDeserializer<EdgeId<?>>) (json, typeOfT, context) -> new EdgeId<>(json.getAsInt()))
|
|
||||||
.registerTypeAdapter(EdgeId.class, (JsonSerializer<EdgeId<?>>) (src, typeOfSrc, context) -> new JsonPrimitive(src.id()))
|
|
||||||
.registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString()))
|
.registerTypeAdapter(BigString.class, (JsonDeserializer<BigString>) (json, typeOfT, context) -> BigString.encode(json.getAsString()))
|
||||||
.registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
.registerTypeAdapter(BigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
||||||
.registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
.registerTypeAdapter(CompressedBigString.class, (JsonSerializer<BigString>) (src, typeOfT, context) -> new JsonPrimitive(src.decode()))
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This exists entirely for strengthening the typing of IDs
|
|
||||||
* Deprecated: We dont' use this anymore
|
|
||||||
* @param <T>
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public record EdgeId<T>(int id) {
|
|
||||||
}
|
|
@ -1,35 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.stream.IntStream;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public record EdgeIdArray<T> (int... values) implements EdgeIdCollection<T> {
|
|
||||||
|
|
||||||
public static <T> EdgeIdArray<T> gather(IntStream stream) {
|
|
||||||
return new EdgeIdArray<>(stream.toArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int[] values() {
|
|
||||||
return values;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return values.length == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return values.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int get(int idx) {
|
|
||||||
return values[idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void sort() {
|
|
||||||
Arrays.sort(values);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,29 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.stream.IntStream;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public interface EdgeIdCollection<T> extends Iterable<EdgeId<T>> {
|
|
||||||
int size();
|
|
||||||
boolean isEmpty();
|
|
||||||
int[] values();
|
|
||||||
|
|
||||||
default IntStream stream() {
|
|
||||||
return Arrays.stream(values());
|
|
||||||
}
|
|
||||||
|
|
||||||
default Iterator<EdgeId<T>> iterator() {
|
|
||||||
return Arrays.stream(values()).mapToObj(EdgeId<T>::new).iterator();
|
|
||||||
}
|
|
||||||
default EdgeIdArray<T> asArray() {
|
|
||||||
return new EdgeIdArray<>(values());
|
|
||||||
}
|
|
||||||
default EdgeIdList<T> asList() {
|
|
||||||
return new EdgeIdList<>(values());
|
|
||||||
}
|
|
||||||
default EdgeIdSet<T> asSet() {
|
|
||||||
return new EdgeIdSet<>(values());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
import gnu.trove.TIntCollection;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public interface EdgeIdCollectionMutable<T> {
|
|
||||||
TIntCollection underlyingCollection();
|
|
||||||
|
|
||||||
default void addAll(EdgeIdArray<T> other) { underlyingCollection().addAll(other.values()); }
|
|
||||||
default void addAll(EdgeIdList<T> other) { underlyingCollection().addAll(other.list()); }
|
|
||||||
default void addAll(EdgeIdCollection<T> other) { underlyingCollection().addAll(other.values()); }
|
|
||||||
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
import gnu.trove.TIntCollection;
|
|
||||||
import gnu.trove.list.array.TIntArrayList;
|
|
||||||
|
|
||||||
import java.util.stream.IntStream;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public record EdgeIdList<T> (TIntArrayList list) implements
|
|
||||||
EdgeIdCollection<T>,
|
|
||||||
EdgeIdCollectionMutable<T> {
|
|
||||||
|
|
||||||
public EdgeIdList(int... values) { this(new TIntArrayList(values)); }
|
|
||||||
public static <T> EdgeIdList<T> gather(IntStream stream) {
|
|
||||||
return stream.collect(EdgeIdList::new, EdgeIdList::add, EdgeIdList::addAll);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int[] values() {
|
|
||||||
return list.toArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return list.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return list.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int get(int idx) {
|
|
||||||
return list.get(idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void add(int id) {
|
|
||||||
list.add(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void sort() {
|
|
||||||
list.sort();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TIntCollection underlyingCollection() {
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,53 +0,0 @@
|
|||||||
package nu.marginalia.model.id;
|
|
||||||
|
|
||||||
import gnu.trove.TIntCollection;
|
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
|
||||||
|
|
||||||
import java.util.stream.IntStream;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public record EdgeIdSet<T> (TIntHashSet set) implements EdgeIdCollection<T>, EdgeIdCollectionMutable<T> {
|
|
||||||
|
|
||||||
public EdgeIdSet(int... values) {
|
|
||||||
this(new TIntHashSet(values.length, 0.5f, -1));
|
|
||||||
|
|
||||||
set.addAll(values);
|
|
||||||
}
|
|
||||||
|
|
||||||
public EdgeIdSet(int initialCapacity, float loadFactor) {
|
|
||||||
this(new TIntHashSet(initialCapacity, loadFactor, -1));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TIntCollection underlyingCollection() {
|
|
||||||
return set;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T> EdgeIdSet<T> gather(IntStream stream) {
|
|
||||||
return new EdgeIdSet<>(stream.toArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int[] values() {
|
|
||||||
return set.toArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return set.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return set.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean contains(int id) {
|
|
||||||
return set.contains(id);
|
|
||||||
}
|
|
||||||
public boolean add(int id) {
|
|
||||||
return set.add(id);
|
|
||||||
}
|
|
||||||
public boolean remove(int id) { return set.remove(id); }
|
|
||||||
|
|
||||||
}
|
|
@ -1,8 +1,5 @@
|
|||||||
package nu.marginalia.index.journal.model;
|
package nu.marginalia.index.journal.model;
|
||||||
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.model.id.UrlIdCodec;
|
import nu.marginalia.model.id.UrlIdCodec;
|
||||||
|
|
||||||
public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {
|
public record IndexJournalEntry(IndexJournalEntryHeader header, IndexJournalEntryData data) {
|
||||||
|
@ -1,71 +0,0 @@
|
|||||||
package nu.marginalia.browse;
|
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
|
||||||
import com.google.inject.Singleton;
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.model.id.EdgeIdCollection;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
public class DbBrowseDomainsFromUrlId {
|
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
|
||||||
private final HikariDataSource dataSource;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public DbBrowseDomainsFromUrlId(HikariDataSource dataSource) {
|
|
||||||
this.dataSource = dataSource;
|
|
||||||
}
|
|
||||||
|
|
||||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
|
||||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
|
||||||
for (var id : ids.values()) {
|
|
||||||
j.add(Integer.toString(id));
|
|
||||||
}
|
|
||||||
return j.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
|
|
||||||
if (urlIds.isEmpty())
|
|
||||||
return Collections.emptyList();
|
|
||||||
|
|
||||||
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
|
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection()) {
|
|
||||||
try (var stmt = conn.createStatement()) {
|
|
||||||
|
|
||||||
String inStmt = idList(urlIds);
|
|
||||||
|
|
||||||
var rsp = stmt.executeQuery("""
|
|
||||||
SELECT DOMAIN_ID, DOMAIN_NAME
|
|
||||||
FROM EC_URL_VIEW
|
|
||||||
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE
|
|
||||||
KNOWN_URLS<5000
|
|
||||||
AND QUALITY>-10
|
|
||||||
AND EC_URL_VIEW.ID IN
|
|
||||||
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
|
|
||||||
while (rsp.next()) {
|
|
||||||
int id = rsp.getInt(1);
|
|
||||||
String domain = rsp.getString(2);
|
|
||||||
|
|
||||||
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (SQLException ex) {
|
|
||||||
logger.error("SQL error", ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
@ -6,7 +6,6 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -24,7 +23,7 @@ public class DbBrowseDomainsSimilarCosine {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<BrowseResult> getDomainNeighborsAdjacentCosine(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) {
|
public List<BrowseResult> getDomainNeighborsAdjacentCosine(int domainId, DomainBlacklist blacklist, int count) {
|
||||||
List<BrowseResult> domains = new ArrayList<>(count);
|
List<BrowseResult> domains = new ArrayList<>(count);
|
||||||
|
|
||||||
String q = """
|
String q = """
|
||||||
@ -43,7 +42,7 @@ public class DbBrowseDomainsSimilarCosine {
|
|||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
try (var stmt = connection.prepareStatement(q)) {
|
try (var stmt = connection.prepareStatement(q)) {
|
||||||
stmt.setFetchSize(count);
|
stmt.setFetchSize(count);
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
stmt.setInt(2, count);
|
stmt.setInt(2, count);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
while (rsp.next() && domains.size() < count) {
|
while (rsp.next() && domains.size() < count) {
|
||||||
|
@ -5,10 +5,7 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.model.id.EdgeIdCollection;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -26,7 +23,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<BrowseResult> getDomainNeighborsAdjacent(EdgeId<EdgeDomain> domainId, DomainBlacklist blacklist, int count) {
|
public List<BrowseResult> getDomainNeighborsAdjacent(int domainId, DomainBlacklist blacklist, int count) {
|
||||||
final Set<BrowseResult> domains = new HashSet<>(count*3);
|
final Set<BrowseResult> domains = new HashSet<>(count*3);
|
||||||
|
|
||||||
final String q = """
|
final String q = """
|
||||||
@ -49,7 +46,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
|||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
try (var stmt = connection.prepareStatement(q)) {
|
try (var stmt = connection.prepareStatement(q)) {
|
||||||
stmt.setFetchSize(count);
|
stmt.setFetchSize(count);
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
stmt.setInt(2, count);
|
stmt.setInt(2, count);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
while (rsp.next()) {
|
while (rsp.next()) {
|
||||||
@ -78,7 +75,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
|||||||
try (var stmt = connection.prepareStatement(q2)) {
|
try (var stmt = connection.prepareStatement(q2)) {
|
||||||
|
|
||||||
stmt.setFetchSize(count/2);
|
stmt.setFetchSize(count/2);
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
stmt.setInt(2, count/2 - domains.size());
|
stmt.setInt(2, count/2 - domains.size());
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
while (rsp.next() && domains.size() < count/2) {
|
while (rsp.next() && domains.size() < count/2) {
|
||||||
@ -109,7 +106,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
|||||||
LIMIT ?""";
|
LIMIT ?""";
|
||||||
try (var stmt = connection.prepareStatement(q3)) {
|
try (var stmt = connection.prepareStatement(q3)) {
|
||||||
stmt.setFetchSize(count/2);
|
stmt.setFetchSize(count/2);
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
stmt.setInt(2, count/2 - domains.size());
|
stmt.setInt(2, count/2 - domains.size());
|
||||||
|
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
@ -165,49 +162,4 @@ public class DbBrowseDomainsSimilarOldAlgo {
|
|||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
|
||||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
|
||||||
for (var id : ids.values()) {
|
|
||||||
j.add(Integer.toString(id));
|
|
||||||
}
|
|
||||||
return j.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<BrowseResult> getBrowseResultFromUrlIds(EdgeIdCollection<EdgeUrl> urlIds) {
|
|
||||||
if (urlIds.isEmpty())
|
|
||||||
return Collections.emptyList();
|
|
||||||
|
|
||||||
List<BrowseResult> ret = new ArrayList<>(urlIds.size());
|
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection()) {
|
|
||||||
try (var stmt = conn.createStatement()) {
|
|
||||||
|
|
||||||
String inStmt = idList(urlIds);
|
|
||||||
|
|
||||||
var rsp = stmt.executeQuery("""
|
|
||||||
SELECT DOMAIN_ID, DOMAIN_NAME
|
|
||||||
FROM EC_URL_VIEW
|
|
||||||
INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE
|
|
||||||
KNOWN_URLS<5000
|
|
||||||
AND QUALITY>-10
|
|
||||||
AND EC_URL_VIEW.ID IN
|
|
||||||
""" + inStmt); // this injection is safe, inStmt is derived from concatenating a list of integers
|
|
||||||
while (rsp.next()) {
|
|
||||||
int id = rsp.getInt(1);
|
|
||||||
String domain = rsp.getString(2);
|
|
||||||
|
|
||||||
ret.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (SQLException ex) {
|
|
||||||
logger.error("SQL error", ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,9 +4,7 @@ import com.google.common.base.Strings;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -30,7 +28,7 @@ public class ScreenshotService {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasScreenshot(EdgeId<EdgeDomain> domainId) {
|
public boolean hasScreenshot(int domainId) {
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var ps = conn.prepareStatement("""
|
var ps = conn.prepareStatement("""
|
||||||
SELECT TRUE
|
SELECT TRUE
|
||||||
@ -38,7 +36,7 @@ public class ScreenshotService {
|
|||||||
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
|
||||||
WHERE EC_DOMAIN.ID=?
|
WHERE EC_DOMAIN.ID=?
|
||||||
""")) {
|
""")) {
|
||||||
ps.setInt(1, domainId.id());
|
ps.setInt(1, domainId);
|
||||||
var rs = ps.executeQuery();
|
var rs = ps.executeQuery();
|
||||||
if (rs.next()) {
|
if (rs.next()) {
|
||||||
return rs.getBoolean(1);
|
return rs.getBoolean(1);
|
||||||
@ -86,7 +84,7 @@ public class ScreenshotService {
|
|||||||
|
|
||||||
private Object serveSvgPlaceholder(Response response, int id) {
|
private Object serveSvgPlaceholder(Response response, int id) {
|
||||||
|
|
||||||
var name = domainQueries.getDomain(new EdgeId<>(id)).map(Object::toString)
|
var name = domainQueries.getDomain(id).map(Object::toString)
|
||||||
.orElse("[Screenshot Not Yet Captured]");
|
.orElse("[Screenshot Not Yet Captured]");
|
||||||
|
|
||||||
response.type("image/svg+xml");
|
response.type("image/svg+xml");
|
||||||
|
@ -15,9 +15,6 @@ import nu.marginalia.lexicon.KeywordLexicon;
|
|||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||||
import nu.marginalia.model.idx.DocumentMetadata;
|
import nu.marginalia.model.idx.DocumentMetadata;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ import nu.marginalia.db.storage.model.FileStorageId;
|
|||||||
import nu.marginalia.db.storage.model.FileStorageType;
|
import nu.marginalia.db.storage.model.FileStorageType;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.model.id.EdgeIdList;
|
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
import nu.marginalia.service.server.*;
|
import nu.marginalia.service.server.*;
|
||||||
@ -224,7 +223,7 @@ public class ControlService extends Service {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
randomExplorationService.removeRandomDomains(new EdgeIdList<>(idList.toArray()));
|
randomExplorationService.removeRandomDomains(idList.toArray());
|
||||||
|
|
||||||
String after = request.queryParams("after");
|
String after = request.queryParams("after");
|
||||||
|
|
||||||
|
@ -2,8 +2,6 @@ package nu.marginalia.control.svc;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.id.EdgeIdList;
|
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -18,7 +16,7 @@ public class RandomExplorationService {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void removeRandomDomains(EdgeIdList<EdgeDomain> ids) throws SQLException {
|
public void removeRandomDomains(int[] ids) throws SQLException {
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
DELETE FROM EC_RANDOM_DOMAINS
|
DELETE FROM EC_RANDOM_DOMAINS
|
||||||
@ -27,7 +25,7 @@ public class RandomExplorationService {
|
|||||||
"""))
|
"""))
|
||||||
{
|
{
|
||||||
for (var id : ids) {
|
for (var id : ids) {
|
||||||
stmt.setInt(1, id.id());
|
stmt.setInt(1, id);
|
||||||
stmt.addBatch();
|
stmt.addBatch();
|
||||||
}
|
}
|
||||||
stmt.executeBatch();
|
stmt.executeBatch();
|
||||||
|
@ -2,13 +2,12 @@ package nu.marginalia.index.svc;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
|
import gnu.trove.list.TIntList;
|
||||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.db.DomainTypes;
|
import nu.marginalia.db.DomainTypes;
|
||||||
import nu.marginalia.index.IndexServicesFactory;
|
import nu.marginalia.index.IndexServicesFactory;
|
||||||
import nu.marginalia.index.searchset.SearchSet;
|
import nu.marginalia.index.searchset.SearchSet;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.model.id.EdgeIdList;
|
|
||||||
import nu.marginalia.ranking.ReversePageRank;
|
import nu.marginalia.ranking.ReversePageRank;
|
||||||
import nu.marginalia.ranking.StandardPageRank;
|
import nu.marginalia.ranking.StandardPageRank;
|
||||||
import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator;
|
import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator;
|
||||||
@ -168,7 +167,7 @@ public class IndexSearchSetsService {
|
|||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void updateBlogsSet() {
|
public void updateBlogsSet() {
|
||||||
EdgeIdList<EdgeDomain> knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
|
TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
|
||||||
|
|
||||||
if (knownDomains.isEmpty()) {
|
if (knownDomains.isEmpty()) {
|
||||||
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
|
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
|
||||||
@ -177,7 +176,7 @@ public class IndexSearchSetsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.values()));
|
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.toArray()));
|
||||||
blogsSet.write();
|
blogsSet.write();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -106,16 +106,11 @@ public class SearchOperator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private int getDomainId(String domain) {
|
private int getDomainId(String domain) {
|
||||||
int domainId = -1;
|
if (domain == null) {
|
||||||
try {
|
return -1;
|
||||||
if (domain != null) {
|
|
||||||
return domainQueries.getDomainId(new EdgeDomain(domain)).id();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (NoSuchElementException ex) {
|
|
||||||
|
|
||||||
}
|
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
||||||
return domainId;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
||||||
|
@ -10,7 +10,6 @@ import nu.marginalia.db.storage.model.FileStorageType;
|
|||||||
import nu.marginalia.linkdb.LinkdbReader;
|
import nu.marginalia.linkdb.LinkdbReader;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.search.client.SearchMqEndpoints;
|
import nu.marginalia.search.client.SearchMqEndpoints;
|
||||||
import nu.marginalia.search.db.DbUrlDetailsQuery;
|
|
||||||
import nu.marginalia.search.svc.SearchFrontPageService;
|
import nu.marginalia.search.svc.SearchFrontPageService;
|
||||||
import nu.marginalia.search.svc.*;
|
import nu.marginalia.search.svc.*;
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
@ -30,7 +29,6 @@ import java.nio.file.Path;
|
|||||||
public class SearchService extends Service {
|
public class SearchService extends Service {
|
||||||
|
|
||||||
private final WebsiteUrl websiteUrl;
|
private final WebsiteUrl websiteUrl;
|
||||||
private final DbUrlDetailsQuery dbUrlDetailsQuery;
|
|
||||||
private final StaticResources staticResources;
|
private final StaticResources staticResources;
|
||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
private final LinkdbReader linkdbReader;
|
private final LinkdbReader linkdbReader;
|
||||||
@ -42,7 +40,6 @@ public class SearchService extends Service {
|
|||||||
@Inject
|
@Inject
|
||||||
public SearchService(BaseServiceParams params,
|
public SearchService(BaseServiceParams params,
|
||||||
WebsiteUrl websiteUrl,
|
WebsiteUrl websiteUrl,
|
||||||
DbUrlDetailsQuery dbUrlDetailsQuery,
|
|
||||||
StaticResources staticResources,
|
StaticResources staticResources,
|
||||||
SearchFrontPageService frontPageService,
|
SearchFrontPageService frontPageService,
|
||||||
SearchErrorPageService errorPageService,
|
SearchErrorPageService errorPageService,
|
||||||
@ -57,7 +54,6 @@ public class SearchService extends Service {
|
|||||||
|
|
||||||
this.eventLog = params.eventLog;
|
this.eventLog = params.eventLog;
|
||||||
this.websiteUrl = websiteUrl;
|
this.websiteUrl = websiteUrl;
|
||||||
this.dbUrlDetailsQuery = dbUrlDetailsQuery;
|
|
||||||
this.staticResources = staticResources;
|
this.staticResources = staticResources;
|
||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
this.linkdbReader = linkdbReader;
|
this.linkdbReader = linkdbReader;
|
||||||
@ -91,12 +87,6 @@ public class SearchService extends Service {
|
|||||||
Spark.awaitInitialization();
|
Spark.awaitInitialization();
|
||||||
}
|
}
|
||||||
|
|
||||||
@MqNotification(endpoint = SearchMqEndpoints.FLUSH_CACHES)
|
|
||||||
public void flushCaches(String unusedArg) {
|
|
||||||
logger.info("Flushing caches");
|
|
||||||
dbUrlDetailsQuery.clearCaches();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
|
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
|
||||||
public void switchLinkdb(String unusedArg) {
|
public void switchLinkdb(String unusedArg) {
|
||||||
|
@ -61,13 +61,13 @@ public class SiteListCommand implements SearchCommandInterface {
|
|||||||
|
|
||||||
List<UrlDetails> resultSet;
|
List<UrlDetails> resultSet;
|
||||||
Path screenshotPath = null;
|
Path screenshotPath = null;
|
||||||
Integer domainId = -1;
|
int domainId = -1;
|
||||||
if (null != domain) {
|
if (null != domain) {
|
||||||
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
|
var dumbQuery = queryFactory.createQuery(SearchProfile.CORPO, 100, 100, "site:"+domain);
|
||||||
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery);
|
resultSet = searchQueryIndexService.executeQuery(ctx, dumbQuery);
|
||||||
var maybeId = domainQueries.tryGetDomainId(domain);
|
var maybeId = domainQueries.tryGetDomainId(domain);
|
||||||
if (maybeId.isPresent()) {
|
if (maybeId.isPresent()) {
|
||||||
domainId = maybeId.get().id();
|
domainId = maybeId.getAsInt();
|
||||||
screenshotPath = Path.of("/screenshot/" + domainId);
|
screenshotPath = Path.of("/screenshot/" + domainId);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1,112 +0,0 @@
|
|||||||
package nu.marginalia.search.db;
|
|
||||||
|
|
||||||
import com.google.common.base.Strings;
|
|
||||||
import com.google.common.cache.Cache;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.inject.Inject;
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
import lombok.SneakyThrows;
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.model.id.EdgeIdCollection;
|
|
||||||
import nu.marginalia.search.model.PageScoreAdjustment;
|
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
|
||||||
public class DbUrlDetailsQuery {
|
|
||||||
private final HikariDataSource dataSource;
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
|
||||||
|
|
||||||
private final Cache<EdgeUrl, EdgeId<EdgeUrl>> urlIdCache = CacheBuilder.newBuilder().maximumSize(100_000).build();
|
|
||||||
|
|
||||||
public static double QUALITY_LOWER_BOUND_CUTOFF = -15.;
|
|
||||||
@Inject
|
|
||||||
public DbUrlDetailsQuery(HikariDataSource dataSource)
|
|
||||||
{
|
|
||||||
this.dataSource = dataSource;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public synchronized void clearCaches()
|
|
||||||
{
|
|
||||||
urlIdCache.invalidateAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
private <T> String idList(EdgeIdCollection<EdgeUrl> ids) {
|
|
||||||
StringJoiner j = new StringJoiner(",", "(", ")");
|
|
||||||
for (var id : ids.values()) {
|
|
||||||
j.add(Integer.toString(id));
|
|
||||||
}
|
|
||||||
return j.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SneakyThrows
|
|
||||||
public List<UrlDetails> getUrlDetailsMulti(EdgeIdCollection<EdgeUrl> ids) {
|
|
||||||
if (ids.isEmpty()) {
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
List<UrlDetails> result = new ArrayList<>(ids.size());
|
|
||||||
|
|
||||||
try (var connection = dataSource.getConnection()) {
|
|
||||||
|
|
||||||
String idString = idList(ids);
|
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement(
|
|
||||||
"""
|
|
||||||
SELECT ID, DOMAIN_ID, URL,
|
|
||||||
TITLE, DESCRIPTION,
|
|
||||||
QUALITY,
|
|
||||||
WORDS_TOTAL, FORMAT, FEATURES,
|
|
||||||
IP, DOMAIN_STATE,
|
|
||||||
DATA_HASH
|
|
||||||
FROM EC_URL_VIEW
|
|
||||||
WHERE TITLE IS NOT NULL
|
|
||||||
AND ID IN
|
|
||||||
""" + idString)) {
|
|
||||||
stmt.setFetchSize(ids.size());
|
|
||||||
|
|
||||||
var rsp = stmt.executeQuery();
|
|
||||||
while (rsp.next()) {
|
|
||||||
var val = new UrlDetails(rsp.getInt(1),
|
|
||||||
rsp.getInt(2),
|
|
||||||
new EdgeUrl(rsp.getString(3)),
|
|
||||||
rsp.getString(4), // title
|
|
||||||
rsp.getString(5), // description
|
|
||||||
rsp.getDouble(6), // quality
|
|
||||||
rsp.getInt(7), // wordsTotal
|
|
||||||
rsp.getString(8), // format
|
|
||||||
rsp.getInt(9), // features
|
|
||||||
rsp.getString(10), // ip
|
|
||||||
DomainIndexingState.valueOf(rsp.getString(11)), // domainState
|
|
||||||
rsp.getLong(12), // dataHash
|
|
||||||
PageScoreAdjustment.zero(), // urlQualityAdjustment
|
|
||||||
Integer.MAX_VALUE, // rankingId
|
|
||||||
Double.MAX_VALUE, // termScore
|
|
||||||
1, // resultsFromSameDomain
|
|
||||||
"", // positions
|
|
||||||
null, // result item
|
|
||||||
null // keyword scores
|
|
||||||
);
|
|
||||||
if (val.urlQuality <= QUALITY_LOWER_BOUND_CUTOFF
|
|
||||||
&& Strings.isNullOrEmpty(val.description)
|
|
||||||
&& val.url.path.length() > 1) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
result.add(val);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -22,7 +21,7 @@ public class BrowseResultCleaner {
|
|||||||
public Predicate<BrowseResult> shouldRemoveResultPredicate() {
|
public Predicate<BrowseResult> shouldRemoveResultPredicate() {
|
||||||
Set<String> domainHashes = new HashSet<>(100);
|
Set<String> domainHashes = new HashSet<>(100);
|
||||||
|
|
||||||
return (res) -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId()))
|
return (res) -> !screenshotService.hasScreenshot(res.domainId())
|
||||||
|| !domainHashes.add(res.domainHash());
|
|| !domainHashes.add(res.domainHash());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,6 @@ import lombok.SneakyThrows;
|
|||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.search.model.DomainInformation;
|
import nu.marginalia.search.model.DomainInformation;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -13,10 +12,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
import javax.inject.Singleton;
|
import javax.inject.Singleton;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO: This class needs to be refactored, a lot of
|
TODO: This class needs to be refactored, a lot of
|
||||||
@ -42,10 +38,11 @@ public class DomainInformationService {
|
|||||||
|
|
||||||
public Optional<DomainInformation> domainInfo(String site) {
|
public Optional<DomainInformation> domainInfo(String site) {
|
||||||
|
|
||||||
EdgeId<EdgeDomain> domainId = getDomainFromPartial(site);
|
OptionalInt maybeDomainId = getDomainFromPartial(site);
|
||||||
if (domainId == null) {
|
if (maybeDomainId.isEmpty()) {
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
int domainId = maybeDomainId.getAsInt();
|
||||||
|
|
||||||
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
Optional<EdgeDomain> domain = dbDomainQueries.getDomain(domainId);
|
||||||
if (domain.isEmpty()) {
|
if (domain.isEmpty()) {
|
||||||
@ -85,7 +82,7 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private boolean inCrawlQueue(EdgeId<EdgeDomain> domainId) {
|
private boolean inCrawlQueue(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
try (var stmt = connection.prepareStatement(
|
try (var stmt = connection.prepareStatement(
|
||||||
"""
|
"""
|
||||||
@ -94,21 +91,15 @@ public class DomainInformationService {
|
|||||||
WHERE EC_DOMAIN.ID=?
|
WHERE EC_DOMAIN.ID=?
|
||||||
"""))
|
"""))
|
||||||
{
|
{
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
return rsp.next();
|
return rsp.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private EdgeId<EdgeDomain> getDomainFromPartial(String site) {
|
private OptionalInt getDomainFromPartial(String site) {
|
||||||
try {
|
return dbDomainQueries.tryGetDomainId(new EdgeDomain(site));
|
||||||
return dbDomainQueries.getDomainId(new EdgeDomain(site));
|
|
||||||
}
|
|
||||||
catch (Exception ex) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@ -125,11 +116,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public int getPagesKnown(EdgeId<EdgeDomain> domainId) {
|
public int getPagesKnown(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
@ -142,11 +133,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public int getPagesVisited(EdgeId<EdgeDomain> domainId) {
|
public int getPagesVisited(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
@ -160,11 +151,11 @@ public class DomainInformationService {
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public int getPagesIndexed(EdgeId<EdgeDomain> domainId) {
|
public int getPagesIndexed(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
@ -177,11 +168,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public int getIncomingLinks(EdgeId<EdgeDomain> domainId) {
|
public int getIncomingLinks(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
@ -193,11 +184,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public int getOutboundLinks(EdgeId<EdgeDomain> domainId) {
|
public int getOutboundLinks(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
@ -210,11 +201,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public double getDomainQuality(EdgeId<EdgeDomain> domainId) {
|
public double getDomainQuality(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getDouble(1);
|
return rsp.getDouble(1);
|
||||||
@ -226,11 +217,11 @@ public class DomainInformationService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public DomainIndexingState getDomainState(EdgeId<EdgeDomain> domainId) {
|
public DomainIndexingState getDomainState(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return DomainIndexingState.valueOf(rsp.getString(1));
|
return DomainIndexingState.valueOf(rsp.getString(1));
|
||||||
@ -244,11 +235,11 @@ public class DomainInformationService {
|
|||||||
return DomainIndexingState.ERROR;
|
return DomainIndexingState.ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<EdgeDomain> getLinkingDomains(EdgeId<EdgeDomain> domainId) {
|
public List<EdgeDomain> getLinkingDomains(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
List<EdgeDomain> results = new ArrayList<>(25);
|
List<EdgeDomain> results = new ArrayList<>(25);
|
||||||
try (var stmt = connection.prepareStatement("SELECT SOURCE_DOMAIN FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) {
|
try (var stmt = connection.prepareStatement("SELECT SOURCE_DOMAIN FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
while (rsp.next()) {
|
while (rsp.next()) {
|
||||||
results.add(new EdgeDomain(rsp.getString(1)));
|
results.add(new EdgeDomain(rsp.getString(1)));
|
||||||
@ -264,11 +255,11 @@ public class DomainInformationService {
|
|||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getRank(EdgeId<EdgeDomain> domainId) {
|
public double getRank(int domainId) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
|
||||||
try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) {
|
||||||
stmt.setInt(1, domainId.id());
|
stmt.setInt(1, domainId);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getDouble(1);
|
return rsp.getDouble(1);
|
||||||
|
@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.WebsiteUrl;
|
import nu.marginalia.WebsiteUrl;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
@ -61,7 +60,7 @@ public class SearchAddToCrawlQueueService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private String getDomainName(int id) {
|
private String getDomainName(int id) {
|
||||||
var domain = domainQueries.getDomain(new EdgeId<>(id));
|
var domain = domainQueries.getDomain(id);
|
||||||
if (domain.isEmpty())
|
if (domain.isEmpty())
|
||||||
Spark.halt(404);
|
Spark.halt(404);
|
||||||
return domain.get().toString();
|
return domain.get().toString();
|
||||||
|
@ -9,7 +9,6 @@ import nu.marginalia.db.DomainBlacklist;
|
|||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.service.server.*;
|
import nu.marginalia.service.server.*;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
@ -156,7 +155,7 @@ public class DatingService extends Service {
|
|||||||
var session = sessionObjectOpt.get();
|
var session = sessionObjectOpt.get();
|
||||||
|
|
||||||
int id = Integer.parseInt(request.params("id"));
|
int id = Integer.parseInt(request.params("id"));
|
||||||
BrowseResult res = session.nextSimilar(new EdgeId<>(id), browseSimilarCosine, blacklist);
|
BrowseResult res = session.nextSimilar(id, browseSimilarCosine, blacklist);
|
||||||
|
|
||||||
res = findViableDomain(session, res);
|
res = findViableDomain(session, res);
|
||||||
|
|
||||||
@ -168,7 +167,7 @@ public class DatingService extends Service {
|
|||||||
|
|
||||||
@NotNull
|
@NotNull
|
||||||
private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) {
|
private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) {
|
||||||
while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId())) || session.isRecent(res)) {
|
while (!screenshotService.hasScreenshot(res.domainId()) || session.isRecent(res)) {
|
||||||
res = session.next(browseRandom, blacklist);
|
res = session.next(browseRandom, blacklist);
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -3,9 +3,7 @@ package nu.marginalia.dating;
|
|||||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
|
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
@ -29,8 +27,8 @@ public class DatingSessionObject {
|
|||||||
return queue.pollFirst();
|
return queue.pollFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
public BrowseResult nextSimilar(EdgeId<EdgeDomain> id, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
|
public BrowseResult nextSimilar(int domainId, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
|
||||||
adjacent.getDomainNeighborsAdjacentCosine(id, blacklist, 25).forEach(queue::addFirst);
|
adjacent.getDomainNeighborsAdjacentCosine(domainId, blacklist, 25).forEach(queue::addFirst);
|
||||||
|
|
||||||
while (queue.size() > MAX_QUEUE_SIZE) {
|
while (queue.size() > MAX_QUEUE_SIZE) {
|
||||||
queue.removeLast();
|
queue.removeLast();
|
||||||
|
@ -5,7 +5,6 @@ import lombok.SneakyThrows;
|
|||||||
import nu.marginalia.ProcessConfiguration;
|
import nu.marginalia.ProcessConfiguration;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
|
||||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
|
||||||
@ -40,8 +39,7 @@ public class WebsiteAdjacenciesCalculator {
|
|||||||
System.out.println(Arrays.toString(domainName));
|
System.out.println(Arrays.toString(domainName));
|
||||||
|
|
||||||
int[] domainIds = Arrays.stream(domainName).map(EdgeDomain::new)
|
int[] domainIds = Arrays.stream(domainName).map(EdgeDomain::new)
|
||||||
.map(dataStoreDao::getDomainId)
|
.mapToInt(dataStoreDao::getDomainId)
|
||||||
.mapToInt(EdgeId::id)
|
|
||||||
.map(domainAliases::deAlias)
|
.map(domainAliases::deAlias)
|
||||||
.toArray();
|
.toArray();
|
||||||
|
|
||||||
@ -49,7 +47,7 @@ public class WebsiteAdjacenciesCalculator {
|
|||||||
findAdjacentDtoS(domainId, similarities -> {
|
findAdjacentDtoS(domainId, similarities -> {
|
||||||
for (var similarity : similarities.similarities()) {
|
for (var similarity : similarities.similarities()) {
|
||||||
if (adjacenciesData.isIndexedDomain(similarity.domainId)) System.out.print("*");
|
if (adjacenciesData.isIndexedDomain(similarity.domainId)) System.out.print("*");
|
||||||
System.out.println(dataStoreDao.getDomain(new EdgeId<>(similarity.domainId)).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value));
|
System.out.println(dataStoreDao.getDomain(similarity.domainId).map(Object::toString).orElse("") + " " + prettyPercent(similarity.value));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user