diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java index 932cb207..ab0df876 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java @@ -277,7 +277,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { // this is safe, string cocatenation is of integers String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")")); - var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC"); + var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<750 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC"); while (rsp.next()) { int id = rsp.getInt(1); String domain = rsp.getString(2); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java index 91be4f93..01515e2c 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java @@ -65,7 +65,7 @@ public class EdgeIndexClient extends AbstractDynamicClient { } @CheckReturnValue - public List queryDomains(Context ctx, EdgeDomainSearchSpecification... specs) { + public List queryDomains(Context ctx, List specs) { return Observable.fromArray(specs) .concatMap(s -> postGet(ctx, "/search-domain/", s, EdgeDomainSearchResults.class) .subscribeOn(Schedulers.io()) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java index 166327e3..5d6d8aff 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java @@ -22,7 +22,7 @@ public class EdgeSearchSubquery { this.searchTermsInclude = searchTermsInclude; this.searchTermsExclude = searchTermsExclude; this.block = block; - this.termSize = (int) searchTermsInclude.stream().flatMapToInt(String::chars).filter(i -> '_'==i).count(); + this.termSize = (int) searchTermsInclude.stream().flatMapToInt(String::chars).filter(i -> '_'==i).count(); } public EdgeSearchSubquery withBlock(IndexBlock block) { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java index 0002232b..fa509a62 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java @@ -112,22 +112,34 @@ public class EdgeSearchOperator { } private List getDomainResults(Context ctx, EdgeSearchSpecification specs) { - var requests = specs.subqueries.stream() + + List buckets = specs.buckets.stream().limit(specs.stagger ? 2 : 1).toList(); + List keywords = specs.subqueries.stream() .filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1) - .flatMap(sq -> sq.searchTermsInclude.stream()) + .map(sq -> sq.searchTermsInclude.get(0)) .distinct() - .flatMap(keyword -> - specs.buckets.stream().map(bucket -> new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, 2_000_000/specs.buckets.size(), 10, 25)) - ) - .toArray(EdgeDomainSearchSpecification[]::new); + .toList(); - if (requests.length == 0) + List requests = new ArrayList<>(keywords.size() * buckets.size()); + + for (var keyword : keywords) { + for (var bucket : buckets) { + requests.add(new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, + 1_000_000, 10, 25)); + } + } + + if (requests.isEmpty()) { return Collections.emptyList(); + } - List> results = indexClient.queryDomains(ctx, requests) - .stream().flatMap(rs -> rs.results.stream()).distinct().toList(); + Set> results = new LinkedHashSet<>(); - return edgeDataStoreDao.getBrowseResultFromUrlIds(results); + for (var result : indexClient.queryDomains(ctx, requests)) { + results.addAll(result.results); + } + + return edgeDataStoreDao.getBrowseResultFromUrlIds(new ArrayList<>(results)); } private String getEvalResult(@Nullable Future eval) {