mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Experimental domain-searching feature
This commit is contained in:
parent
3eb9eecac9
commit
145b02a736
@ -277,7 +277,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
|
|||||||
// this is safe, string cocatenation is of integers
|
// this is safe, string cocatenation is of integers
|
||||||
String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")"));
|
String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")"));
|
||||||
|
|
||||||
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
|
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<750 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
|
||||||
while (rsp.next()) {
|
while (rsp.next()) {
|
||||||
int id = rsp.getInt(1);
|
int id = rsp.getInt(1);
|
||||||
String domain = rsp.getString(2);
|
String domain = rsp.getString(2);
|
||||||
|
@ -65,7 +65,7 @@ public class EdgeIndexClient extends AbstractDynamicClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@CheckReturnValue
|
@CheckReturnValue
|
||||||
public List<EdgeDomainSearchResults> queryDomains(Context ctx, EdgeDomainSearchSpecification... specs) {
|
public List<EdgeDomainSearchResults> queryDomains(Context ctx, List<EdgeDomainSearchSpecification> specs) {
|
||||||
return Observable.fromArray(specs)
|
return Observable.fromArray(specs)
|
||||||
.concatMap(s -> postGet(ctx, "/search-domain/", s, EdgeDomainSearchResults.class)
|
.concatMap(s -> postGet(ctx, "/search-domain/", s, EdgeDomainSearchResults.class)
|
||||||
.subscribeOn(Schedulers.io())
|
.subscribeOn(Schedulers.io())
|
||||||
|
@ -112,22 +112,34 @@ public class EdgeSearchOperator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
||||||
var requests = specs.subqueries.stream()
|
|
||||||
|
List<Integer> buckets = specs.buckets.stream().limit(specs.stagger ? 2 : 1).toList();
|
||||||
|
List<String> keywords = specs.subqueries.stream()
|
||||||
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
|
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
|
||||||
.flatMap(sq -> sq.searchTermsInclude.stream())
|
.map(sq -> sq.searchTermsInclude.get(0))
|
||||||
.distinct()
|
.distinct()
|
||||||
.flatMap(keyword ->
|
.toList();
|
||||||
specs.buckets.stream().map(bucket -> new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, 2_000_000/specs.buckets.size(), 10, 25))
|
|
||||||
)
|
|
||||||
.toArray(EdgeDomainSearchSpecification[]::new);
|
|
||||||
|
|
||||||
if (requests.length == 0)
|
List<EdgeDomainSearchSpecification> requests = new ArrayList<>(keywords.size() * buckets.size());
|
||||||
|
|
||||||
|
for (var keyword : keywords) {
|
||||||
|
for (var bucket : buckets) {
|
||||||
|
requests.add(new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword,
|
||||||
|
1_000_000, 10, 25));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requests.isEmpty()) {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
List<EdgeId<EdgeUrl>> results = indexClient.queryDomains(ctx, requests)
|
Set<EdgeId<EdgeUrl>> results = new LinkedHashSet<>();
|
||||||
.stream().flatMap(rs -> rs.results.stream()).distinct().toList();
|
|
||||||
|
|
||||||
return edgeDataStoreDao.getBrowseResultFromUrlIds(results);
|
for (var result : indexClient.queryDomains(ctx, requests)) {
|
||||||
|
results.addAll(result.results);
|
||||||
|
}
|
||||||
|
|
||||||
|
return edgeDataStoreDao.getBrowseResultFromUrlIds(new ArrayList<>(results));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getEvalResult(@Nullable Future<String> eval) {
|
private String getEvalResult(@Nullable Future<String> eval) {
|
||||||
|
Loading…
Reference in New Issue
Block a user