Merge pull request 'Experimental domain-searching feature' (#54) from master into release

Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/54
This commit is contained in:
Viktor Lofgren 2022-07-28 18:59:16 +02:00
commit 4650025105
3 changed files with 18 additions and 11 deletions

View File

@ -277,7 +277,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
// this is safe, string cocatenation is of integers
String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")"));
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt);
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
while (rsp.next() && ret.size() < count) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);

View File

@ -65,8 +65,14 @@ public class EdgeIndexClient extends AbstractDynamicClient {
}
@CheckReturnValue
public EdgeDomainSearchResults queryDomains(Context ctx, EdgeDomainSearchSpecification specs) {
return this.postGet(ctx, "/search-domain/", specs, EdgeDomainSearchResults.class).blockingFirst();
public List<EdgeDomainSearchResults> queryDomains(Context ctx, EdgeDomainSearchSpecification... specs) {
return Observable.fromArray(specs)
.concatMap(s -> postGet(ctx, "/search-domain/", specs, EdgeDomainSearchResults.class)
.subscribeOn(Schedulers.io())
.timeout(1, TimeUnit.SECONDS)
.onErrorComplete())
.toList()
.blockingGet();
}

View File

@ -112,17 +112,18 @@ public class EdgeSearchOperator {
}
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
List<String> keywords = specs.subqueries.stream().filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
.findFirst().map(sq -> sq.searchTermsInclude).orElseGet(Collections::emptyList);
var requests = specs.subqueries.stream().filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
.flatMap(sq -> sq.searchTermsInclude.stream())
.map(keyword -> new EdgeDomainSearchSpecification(specs.buckets.get(0), IndexBlock.TitleKeywords, keyword, 1_000_000, 10, 20))
.toArray(EdgeDomainSearchSpecification[]::new);
if (keywords.size() == 1) {
var request = new EdgeDomainSearchSpecification(specs.buckets.get(0), IndexBlock.TitleKeywords, keywords.get(0), 1_000_000, 10, 20);
var response = indexClient.queryDomains(ctx, request);
if (requests.length == 0)
return Collections.emptyList();
return edgeDataStoreDao.getBrowseResultFromUrlIds(response.results, 5);
}
List<EdgeId<EdgeUrl>> results = indexClient.queryDomains(ctx, requests)
.stream().flatMap(rs -> rs.results.stream()).distinct().toList();
return Collections.emptyList();
return edgeDataStoreDao.getBrowseResultFromUrlIds(results, 5);
}
private String getEvalResult(@Nullable Future<String> eval) {