From e9db8b6c1d9c50099253497e3e397a392f840e19 Mon Sep 17 00:00:00 2001 From: vlofgren Date: Thu, 28 Jul 2022 18:58:51 +0200 Subject: [PATCH] Experimental domain-searching feature --- .../edge/data/dao/EdgeDataStoreDaoImpl.java | 2 +- .../wmsa/edge/index/client/EdgeIndexClient.java | 10 ++++++++-- .../wmsa/edge/search/EdgeSearchOperator.java | 17 +++++++++-------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java index 08263759..8fff98c4 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java @@ -277,7 +277,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { // this is safe, string cocatenation is of integers String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")")); - var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt); + var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC"); while (rsp.next() && ret.size() < count) { int id = rsp.getInt(1); String domain = rsp.getString(2); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java index 4a9c1737..5c9d65ad 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java @@ -65,8 +65,14 @@ public class EdgeIndexClient extends AbstractDynamicClient { } @CheckReturnValue - public EdgeDomainSearchResults queryDomains(Context ctx, EdgeDomainSearchSpecification specs) { - return this.postGet(ctx, "/search-domain/", specs, EdgeDomainSearchResults.class).blockingFirst(); + public List queryDomains(Context ctx, EdgeDomainSearchSpecification... specs) { + return Observable.fromArray(specs) + .concatMap(s -> postGet(ctx, "/search-domain/", specs, EdgeDomainSearchResults.class) + .subscribeOn(Schedulers.io()) + .timeout(1, TimeUnit.SECONDS) + .onErrorComplete()) + .toList() + .blockingGet(); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java index 008c82a1..fa540ce3 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java @@ -112,17 +112,18 @@ public class EdgeSearchOperator { } private List getDomainResults(Context ctx, EdgeSearchSpecification specs) { - List keywords = specs.subqueries.stream().filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1) - .findFirst().map(sq -> sq.searchTermsInclude).orElseGet(Collections::emptyList); + var requests = specs.subqueries.stream().filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1) + .flatMap(sq -> sq.searchTermsInclude.stream()) + .map(keyword -> new EdgeDomainSearchSpecification(specs.buckets.get(0), IndexBlock.TitleKeywords, keyword, 1_000_000, 10, 20)) + .toArray(EdgeDomainSearchSpecification[]::new); - if (keywords.size() == 1) { - var request = new EdgeDomainSearchSpecification(specs.buckets.get(0), IndexBlock.TitleKeywords, keywords.get(0), 1_000_000, 10, 20); - var response = indexClient.queryDomains(ctx, request); + if (requests.length == 0) + return Collections.emptyList(); - return edgeDataStoreDao.getBrowseResultFromUrlIds(response.results, 5); - } + List> results = indexClient.queryDomains(ctx, requests) + .stream().flatMap(rs -> rs.results.stream()).distinct().toList(); - return Collections.emptyList(); + return edgeDataStoreDao.getBrowseResultFromUrlIds(results, 5); } private String getEvalResult(@Nullable Future eval) {