mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Added timeout to queries
This commit is contained in:
parent
18b1153136
commit
ec87c0689f
@ -41,7 +41,7 @@ import static spark.Spark.get;
|
||||
import static spark.Spark.halt;
|
||||
|
||||
public class EdgeIndexService extends Service {
|
||||
private static final int SEARCH_BUDGET_LIMIT = 1_000_000;
|
||||
private static final int SEARCH_BUDGET_TIMEOUT_MS = 100;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@ -243,7 +243,7 @@ public class EdgeIndexService extends Service {
|
||||
new DomainResultCountFilter(specsSet.limitByDomain)
|
||||
};
|
||||
|
||||
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
|
||||
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
|
||||
final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket);
|
||||
|
||||
for (int i = 0; i < specsSet.buckets.size(); i+=2) {
|
||||
@ -279,10 +279,6 @@ public class EdgeIndexService extends Service {
|
||||
}
|
||||
}
|
||||
|
||||
if (budget.used() > 0) {
|
||||
logger.debug("Query used ${}", budget.used());
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
@ -294,7 +290,7 @@ public class EdgeIndexService extends Service {
|
||||
|
||||
final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain);
|
||||
|
||||
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
|
||||
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
|
||||
for (var sq : specsSet.subqueries) {
|
||||
Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq);
|
||||
|
||||
@ -316,10 +312,6 @@ public class EdgeIndexService extends Service {
|
||||
}
|
||||
}
|
||||
|
||||
if (budget.used() > 0) {
|
||||
logger.debug("Query used ${}", budget.used());
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,9 @@ public class SearchIndexReader implements AutoCloseable {
|
||||
IndexSearchBudget budget,
|
||||
LongPredicate filter,
|
||||
int wordId) {
|
||||
|
||||
var builder = underspecifiedQueryBuilders.get(block);
|
||||
|
||||
if (null != builder) {
|
||||
return builder.buildUnderspecified(budget, filter, wordId);
|
||||
}
|
||||
|
@ -32,7 +32,9 @@ public class IndexQueryBuilder {
|
||||
return new QueryForIndices(budget, filter, wordId);
|
||||
}
|
||||
|
||||
// Special treatment for queries with few terms, prefer hits that appear in multiple buckets
|
||||
public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) {
|
||||
|
||||
if (requiredIndices.size() == 1) {
|
||||
return build(budget, filter, wordId);
|
||||
}
|
||||
@ -51,7 +53,7 @@ public class IndexQueryBuilder {
|
||||
|
||||
return new QueryForIndices(budget, () ->
|
||||
Streams.concat(IntStream.range(1, relevantIndices.length)
|
||||
.mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId))
|
||||
.mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId))
|
||||
.flatMapToLong(Function.identity()),
|
||||
fstRange.stream().takeWhile(budget::take))
|
||||
.filter(filter)
|
||||
@ -59,17 +61,20 @@ public class IndexQueryBuilder {
|
||||
}
|
||||
|
||||
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
|
||||
SearchIndex first = requiredIndices.get(firstIdx),
|
||||
second = requiredIndices.get(otherIdx);
|
||||
SearchIndex firstTmp = requiredIndices.get(firstIdx),
|
||||
secondTmp = requiredIndices.get(otherIdx);
|
||||
|
||||
if (first.numUrls(wordId) > second.numUrls(wordId)) {
|
||||
SearchIndex tmp = first;
|
||||
first = second;
|
||||
second = tmp;
|
||||
final SearchIndex fst;
|
||||
final SearchIndex snd;
|
||||
|
||||
if (firstTmp.numUrls(wordId) > secondTmp.numUrls(wordId)) {
|
||||
fst = secondTmp;
|
||||
snd = firstTmp;
|
||||
}
|
||||
else {
|
||||
fst = firstTmp;
|
||||
snd = secondTmp;
|
||||
}
|
||||
|
||||
SearchIndex fst = first;
|
||||
SearchIndex snd = second;
|
||||
|
||||
var sndRange = snd.rangeForWord(wordId);
|
||||
|
||||
|
@ -1,18 +1,16 @@
|
||||
package nu.marginalia.wmsa.edge.index.service.query;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class IndexSearchBudget {
|
||||
private final long limit;
|
||||
private long used = 0;
|
||||
private long timeout;
|
||||
|
||||
public IndexSearchBudget(long limitTime) {
|
||||
this.timeout = System.currentTimeMillis() + limitTime;
|
||||
}
|
||||
|
||||
// Used for short-circuiting Stream-objects using takeWhile, we don't care
|
||||
public boolean take(long unused) {
|
||||
return used++ < limit;
|
||||
return System.currentTimeMillis() < timeout;
|
||||
}
|
||||
|
||||
public long used() {
|
||||
return used;
|
||||
}
|
||||
public long limit() { return limit; }
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ class SearchIndexWriterTest {
|
||||
}
|
||||
|
||||
public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) {
|
||||
IndexSearchBudget budget = new IndexSearchBudget(1_000_000);
|
||||
IndexSearchBudget budget = new IndexSearchBudget(100);
|
||||
return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray();
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user