Added timeout to queries

This commit is contained in:
vlofgren 2022-05-31 13:37:20 +02:00
parent 18b1153136
commit ec87c0689f
5 changed files with 28 additions and 31 deletions

View File

@ -41,7 +41,7 @@ import static spark.Spark.get;
import static spark.Spark.halt; import static spark.Spark.halt;
public class EdgeIndexService extends Service { public class EdgeIndexService extends Service {
private static final int SEARCH_BUDGET_LIMIT = 1_000_000; private static final int SEARCH_BUDGET_TIMEOUT_MS = 100;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
@ -243,7 +243,7 @@ public class EdgeIndexService extends Service {
new DomainResultCountFilter(specsSet.limitByDomain) new DomainResultCountFilter(specsSet.limitByDomain)
}; };
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket); final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket);
for (int i = 0; i < specsSet.buckets.size(); i+=2) { for (int i = 0; i < specsSet.buckets.size(); i+=2) {
@ -279,10 +279,6 @@ public class EdgeIndexService extends Service {
} }
} }
if (budget.used() > 0) {
logger.debug("Query used ${}", budget.used());
}
return results; return results;
} }
@ -294,7 +290,7 @@ public class EdgeIndexService extends Service {
final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain); final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain);
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
for (var sq : specsSet.subqueries) { for (var sq : specsSet.subqueries) {
Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq); Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq);
@ -316,10 +312,6 @@ public class EdgeIndexService extends Service {
} }
} }
if (budget.used() > 0) {
logger.debug("Query used ${}", budget.used());
}
return results; return results;
} }

View File

@ -74,7 +74,9 @@ public class SearchIndexReader implements AutoCloseable {
IndexSearchBudget budget, IndexSearchBudget budget,
LongPredicate filter, LongPredicate filter,
int wordId) { int wordId) {
var builder = underspecifiedQueryBuilders.get(block); var builder = underspecifiedQueryBuilders.get(block);
if (null != builder) { if (null != builder) {
return builder.buildUnderspecified(budget, filter, wordId); return builder.buildUnderspecified(budget, filter, wordId);
} }

View File

@ -32,7 +32,9 @@ public class IndexQueryBuilder {
return new QueryForIndices(budget, filter, wordId); return new QueryForIndices(budget, filter, wordId);
} }
// Special treatment for queries with few terms, prefer hits that appear in multiple buckets
public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) { public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) {
if (requiredIndices.size() == 1) { if (requiredIndices.size() == 1) {
return build(budget, filter, wordId); return build(budget, filter, wordId);
} }
@ -51,7 +53,7 @@ public class IndexQueryBuilder {
return new QueryForIndices(budget, () -> return new QueryForIndices(budget, () ->
Streams.concat(IntStream.range(1, relevantIndices.length) Streams.concat(IntStream.range(1, relevantIndices.length)
.mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId)) .mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId))
.flatMapToLong(Function.identity()), .flatMapToLong(Function.identity()),
fstRange.stream().takeWhile(budget::take)) fstRange.stream().takeWhile(budget::take))
.filter(filter) .filter(filter)
@ -59,17 +61,20 @@ public class IndexQueryBuilder {
} }
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) { private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
SearchIndex first = requiredIndices.get(firstIdx), SearchIndex firstTmp = requiredIndices.get(firstIdx),
second = requiredIndices.get(otherIdx); secondTmp = requiredIndices.get(otherIdx);
if (first.numUrls(wordId) > second.numUrls(wordId)) { final SearchIndex fst;
SearchIndex tmp = first; final SearchIndex snd;
first = second;
second = tmp; if (firstTmp.numUrls(wordId) > secondTmp.numUrls(wordId)) {
fst = secondTmp;
snd = firstTmp;
}
else {
fst = firstTmp;
snd = secondTmp;
} }
SearchIndex fst = first;
SearchIndex snd = second;
var sndRange = snd.rangeForWord(wordId); var sndRange = snd.rangeForWord(wordId);

View File

@ -1,18 +1,16 @@
package nu.marginalia.wmsa.edge.index.service.query; package nu.marginalia.wmsa.edge.index.service.query;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class IndexSearchBudget { public class IndexSearchBudget {
private final long limit; private long timeout;
private long used = 0;
public IndexSearchBudget(long limitTime) {
this.timeout = System.currentTimeMillis() + limitTime;
}
// Used for short-circuiting Stream-objects using takeWhile, we don't care
public boolean take(long unused) { public boolean take(long unused) {
return used++ < limit; return System.currentTimeMillis() < timeout;
} }
public long used() {
return used;
}
public long limit() { return limit; }
} }

View File

@ -59,7 +59,7 @@ class SearchIndexWriterTest {
} }
public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) { public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) {
IndexSearchBudget budget = new IndexSearchBudget(1_000_000); IndexSearchBudget budget = new IndexSearchBudget(100);
return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray(); return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray();
} }