Add time-based timeout to queries (#24)

Co-authored-by: vlofgren <vlofgren@gmail.com>
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/24
This commit is contained in:
Viktor Lofgren 2022-05-31 13:38:26 +02:00
parent fcd2708fe3
commit 9474f39225
5 changed files with 28 additions and 31 deletions

View File

@ -41,7 +41,7 @@ import static spark.Spark.get;
import static spark.Spark.halt;
public class EdgeIndexService extends Service {
private static final int SEARCH_BUDGET_LIMIT = 1_000_000;
private static final int SEARCH_BUDGET_TIMEOUT_MS = 100;
private final Logger logger = LoggerFactory.getLogger(getClass());
@ -243,7 +243,7 @@ public class EdgeIndexService extends Service {
new DomainResultCountFilter(specsSet.limitByDomain)
};
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket);
for (int i = 0; i < specsSet.buckets.size(); i+=2) {
@ -279,10 +279,6 @@ public class EdgeIndexService extends Service {
}
}
if (budget.used() > 0) {
logger.debug("Query used ${}", budget.used());
}
return results;
}
@ -294,7 +290,7 @@ public class EdgeIndexService extends Service {
final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain);
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
for (var sq : specsSet.subqueries) {
Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq);
@ -316,10 +312,6 @@ public class EdgeIndexService extends Service {
}
}
if (budget.used() > 0) {
logger.debug("Query used ${}", budget.used());
}
return results;
}

View File

@ -74,7 +74,9 @@ public class SearchIndexReader implements AutoCloseable {
IndexSearchBudget budget,
LongPredicate filter,
int wordId) {
var builder = underspecifiedQueryBuilders.get(block);
if (null != builder) {
return builder.buildUnderspecified(budget, filter, wordId);
}

View File

@ -32,7 +32,9 @@ public class IndexQueryBuilder {
return new QueryForIndices(budget, filter, wordId);
}
// Special treatment for queries with few terms, prefer hits that appear in multiple buckets
public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) {
if (requiredIndices.size() == 1) {
return build(budget, filter, wordId);
}
@ -51,7 +53,7 @@ public class IndexQueryBuilder {
return new QueryForIndices(budget, () ->
Streams.concat(IntStream.range(1, relevantIndices.length)
.mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId))
.mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId))
.flatMapToLong(Function.identity()),
fstRange.stream().takeWhile(budget::take))
.filter(filter)
@ -59,17 +61,20 @@ public class IndexQueryBuilder {
}
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
SearchIndex first = requiredIndices.get(firstIdx),
second = requiredIndices.get(otherIdx);
SearchIndex firstTmp = requiredIndices.get(firstIdx),
secondTmp = requiredIndices.get(otherIdx);
if (first.numUrls(wordId) > second.numUrls(wordId)) {
SearchIndex tmp = first;
first = second;
second = tmp;
final SearchIndex fst;
final SearchIndex snd;
if (firstTmp.numUrls(wordId) > secondTmp.numUrls(wordId)) {
fst = secondTmp;
snd = firstTmp;
}
else {
fst = firstTmp;
snd = secondTmp;
}
SearchIndex fst = first;
SearchIndex snd = second;
var sndRange = snd.rangeForWord(wordId);

View File

@ -1,18 +1,16 @@
package nu.marginalia.wmsa.edge.index.service.query;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class IndexSearchBudget {
private final long limit;
private long used = 0;
private long timeout;
public IndexSearchBudget(long limitTime) {
this.timeout = System.currentTimeMillis() + limitTime;
}
// Used for short-circuiting Stream-objects using takeWhile, we don't care
public boolean take(long unused) {
return used++ < limit;
return System.currentTimeMillis() < timeout;
}
public long used() {
return used;
}
public long limit() { return limit; }
}

View File

@ -59,7 +59,7 @@ class SearchIndexWriterTest {
}
public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) {
IndexSearchBudget budget = new IndexSearchBudget(1_000_000);
IndexSearchBudget budget = new IndexSearchBudget(100);
return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray();
}