mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Merge pull request 'Experimental domain-searching feature' (#68) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/68
This commit is contained in:
commit
044e8abac7
@ -99,7 +99,7 @@ public class EdgeSearchOperator {
|
|||||||
|
|
||||||
String evalResult = getEvalResult(eval);
|
String evalResult = getEvalResult(eval);
|
||||||
|
|
||||||
List<BrowseResult> domainResults = getDomainResults(ctx, processedQuery.specs);
|
List<BrowseResult> domainResults = getDomainResults(ctx, processedQuery.specs, queryResults);
|
||||||
|
|
||||||
return new DecoratedSearchResults(params,
|
return new DecoratedSearchResults(params,
|
||||||
getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery),
|
getProblems(ctx, params.humanQuery(), evalResult, queryResults, processedQuery),
|
||||||
@ -111,7 +111,9 @@ public class EdgeSearchOperator {
|
|||||||
getDomainId(processedQuery.domain));
|
getDomainId(processedQuery.domain));
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
private List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs, DecoratedSearchResultSet queryResults) {
|
||||||
|
|
||||||
|
Set<EdgeDomain> resultDomains = queryResults.resultSet.stream().map(rs -> rs.url.domain).collect(Collectors.toSet());
|
||||||
|
|
||||||
List<Integer> buckets = specs.buckets.stream().limit(specs.stagger ? 2 : 1).toList();
|
List<Integer> buckets = specs.buckets.stream().limit(specs.stagger ? 2 : 1).toList();
|
||||||
List<String> keywords = specs.subqueries.stream()
|
List<String> keywords = specs.subqueries.stream()
|
||||||
@ -125,7 +127,7 @@ public class EdgeSearchOperator {
|
|||||||
for (var keyword : keywords) {
|
for (var keyword : keywords) {
|
||||||
for (var bucket : buckets) {
|
for (var bucket : buckets) {
|
||||||
requests.add(new EdgeDomainSearchSpecification(bucket, IndexBlock.TitleKeywords, keyword,
|
requests.add(new EdgeDomainSearchSpecification(bucket, IndexBlock.TitleKeywords, keyword,
|
||||||
1_000_000, 25, 25));
|
1_000_000, 5, 25));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,23 +137,13 @@ public class EdgeSearchOperator {
|
|||||||
|
|
||||||
Set<EdgeId<EdgeUrl>> results = new LinkedHashSet<>();
|
Set<EdgeId<EdgeUrl>> results = new LinkedHashSet<>();
|
||||||
|
|
||||||
List<Iterator<EdgeId<EdgeUrl>>> iters = new ArrayList<>();
|
|
||||||
|
|
||||||
for (var result : indexClient.queryDomains(ctx, requests)) {
|
for (var result : indexClient.queryDomains(ctx, requests)) {
|
||||||
iters.add(result.results.iterator());
|
results.addAll(result.getResults());
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!iters.isEmpty()) {
|
var ret = edgeDataStoreDao.getBrowseResultFromUrlIds(new ArrayList<>(results));
|
||||||
iters.removeIf(iter -> {
|
ret.removeIf(result -> !resultDomains.contains(result.url.domain));
|
||||||
if (!iter.hasNext()) return true;
|
return ret;
|
||||||
else {
|
|
||||||
results.add(iter.next());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return edgeDataStoreDao.getBrowseResultFromUrlIds(new ArrayList<>(results));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getEvalResult(@Nullable Future<String> eval) {
|
private String getEvalResult(@Nullable Future<String> eval) {
|
||||||
|
Loading…
Reference in New Issue
Block a user