mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Merge pull request 'Experimental domain-searching feature' (#63) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/63
This commit is contained in:
commit
bbb0bf4b7e
@ -277,7 +277,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
|
||||
// this is safe, string cocatenation is of integers
|
||||
String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")"));
|
||||
|
||||
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
|
||||
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<750 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
|
||||
while (rsp.next()) {
|
||||
int id = rsp.getInt(1);
|
||||
String domain = rsp.getString(2);
|
||||
|
@ -65,7 +65,7 @@ public class EdgeIndexClient extends AbstractDynamicClient {
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
||||
public List<EdgeDomainSearchResults> queryDomains(Context ctx, EdgeDomainSearchSpecification... specs) {
|
||||
public List<EdgeDomainSearchResults> queryDomains(Context ctx, List<EdgeDomainSearchSpecification> specs) {
|
||||
return Observable.fromArray(specs)
|
||||
.concatMap(s -> postGet(ctx, "/search-domain/", s, EdgeDomainSearchResults.class)
|
||||
.subscribeOn(Schedulers.io())
|
||||
|
@ -22,7 +22,7 @@ public class EdgeSearchSubquery {
|
||||
this.searchTermsInclude = searchTermsInclude;
|
||||
this.searchTermsExclude = searchTermsExclude;
|
||||
this.block = block;
|
||||
this.termSize = (int) searchTermsInclude.stream().flatMapToInt(String::chars).filter(i -> '_'==i).count();
|
||||
this.termSize = (int) searchTermsInclude.stream().flatMapToInt(String::chars).filter(i -> '_'==i).count();
|
||||
}
|
||||
|
||||
public EdgeSearchSubquery withBlock(IndexBlock block) {
|
||||
|
@ -112,22 +112,34 @@ public class EdgeSearchOperator {
|
||||
}
|
||||
|
||||
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
||||
var requests = specs.subqueries.stream()
|
||||
|
||||
List<Integer> buckets = specs.buckets.stream().limit(specs.stagger ? 2 : 1).toList();
|
||||
List<String> keywords = specs.subqueries.stream()
|
||||
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
|
||||
.flatMap(sq -> sq.searchTermsInclude.stream())
|
||||
.map(sq -> sq.searchTermsInclude.get(0))
|
||||
.distinct()
|
||||
.flatMap(keyword ->
|
||||
specs.buckets.stream().map(bucket -> new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, 2_000_000/specs.buckets.size(), 10, 25))
|
||||
)
|
||||
.toArray(EdgeDomainSearchSpecification[]::new);
|
||||
.toList();
|
||||
|
||||
if (requests.length == 0)
|
||||
List<EdgeDomainSearchSpecification> requests = new ArrayList<>(keywords.size() * buckets.size());
|
||||
|
||||
for (var keyword : keywords) {
|
||||
for (var bucket : buckets) {
|
||||
requests.add(new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword,
|
||||
1_000_000, 10, 25));
|
||||
}
|
||||
}
|
||||
|
||||
if (requests.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
List<EdgeId<EdgeUrl>> results = indexClient.queryDomains(ctx, requests)
|
||||
.stream().flatMap(rs -> rs.results.stream()).distinct().toList();
|
||||
Set<EdgeId<EdgeUrl>> results = new LinkedHashSet<>();
|
||||
|
||||
return edgeDataStoreDao.getBrowseResultFromUrlIds(results);
|
||||
for (var result : indexClient.queryDomains(ctx, requests)) {
|
||||
results.addAll(result.results);
|
||||
}
|
||||
|
||||
return edgeDataStoreDao.getBrowseResultFromUrlIds(new ArrayList<>(results));
|
||||
}
|
||||
|
||||
private String getEvalResult(@Nullable Future<String> eval) {
|
||||
|
Loading…
Reference in New Issue
Block a user