mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Added timeout to queries
This commit is contained in:
parent
18b1153136
commit
ec87c0689f
@ -41,7 +41,7 @@ import static spark.Spark.get;
|
|||||||
import static spark.Spark.halt;
|
import static spark.Spark.halt;
|
||||||
|
|
||||||
public class EdgeIndexService extends Service {
|
public class EdgeIndexService extends Service {
|
||||||
private static final int SEARCH_BUDGET_LIMIT = 1_000_000;
|
private static final int SEARCH_BUDGET_TIMEOUT_MS = 100;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
@ -243,7 +243,7 @@ public class EdgeIndexService extends Service {
|
|||||||
new DomainResultCountFilter(specsSet.limitByDomain)
|
new DomainResultCountFilter(specsSet.limitByDomain)
|
||||||
};
|
};
|
||||||
|
|
||||||
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
|
final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
|
||||||
final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket);
|
final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket);
|
||||||
|
|
||||||
for (int i = 0; i < specsSet.buckets.size(); i+=2) {
|
for (int i = 0; i < specsSet.buckets.size(); i+=2) {
|
||||||
@ -279,10 +279,6 @@ public class EdgeIndexService extends Service {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (budget.used() > 0) {
|
|
||||||
logger.debug("Query used ${}", budget.used());
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -294,7 +290,7 @@ public class EdgeIndexService extends Service {
|
|||||||
|
|
||||||
final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain);
|
final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain);
|
||||||
|
|
||||||
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT);
|
IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS);
|
||||||
for (var sq : specsSet.subqueries) {
|
for (var sq : specsSet.subqueries) {
|
||||||
Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq);
|
Optional<EdgeIndexSearchTerms> searchTerms = getSearchTerms(sq);
|
||||||
|
|
||||||
@ -316,10 +312,6 @@ public class EdgeIndexService extends Service {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (budget.used() > 0) {
|
|
||||||
logger.debug("Query used ${}", budget.used());
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,9 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
IndexSearchBudget budget,
|
IndexSearchBudget budget,
|
||||||
LongPredicate filter,
|
LongPredicate filter,
|
||||||
int wordId) {
|
int wordId) {
|
||||||
|
|
||||||
var builder = underspecifiedQueryBuilders.get(block);
|
var builder = underspecifiedQueryBuilders.get(block);
|
||||||
|
|
||||||
if (null != builder) {
|
if (null != builder) {
|
||||||
return builder.buildUnderspecified(budget, filter, wordId);
|
return builder.buildUnderspecified(budget, filter, wordId);
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,9 @@ public class IndexQueryBuilder {
|
|||||||
return new QueryForIndices(budget, filter, wordId);
|
return new QueryForIndices(budget, filter, wordId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Special treatment for queries with few terms, prefer hits that appear in multiple buckets
|
||||||
public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) {
|
public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) {
|
||||||
|
|
||||||
if (requiredIndices.size() == 1) {
|
if (requiredIndices.size() == 1) {
|
||||||
return build(budget, filter, wordId);
|
return build(budget, filter, wordId);
|
||||||
}
|
}
|
||||||
@ -51,7 +53,7 @@ public class IndexQueryBuilder {
|
|||||||
|
|
||||||
return new QueryForIndices(budget, () ->
|
return new QueryForIndices(budget, () ->
|
||||||
Streams.concat(IntStream.range(1, relevantIndices.length)
|
Streams.concat(IntStream.range(1, relevantIndices.length)
|
||||||
.mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId))
|
.mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId))
|
||||||
.flatMapToLong(Function.identity()),
|
.flatMapToLong(Function.identity()),
|
||||||
fstRange.stream().takeWhile(budget::take))
|
fstRange.stream().takeWhile(budget::take))
|
||||||
.filter(filter)
|
.filter(filter)
|
||||||
@ -59,17 +61,20 @@ public class IndexQueryBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
|
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
|
||||||
SearchIndex first = requiredIndices.get(firstIdx),
|
SearchIndex firstTmp = requiredIndices.get(firstIdx),
|
||||||
second = requiredIndices.get(otherIdx);
|
secondTmp = requiredIndices.get(otherIdx);
|
||||||
|
|
||||||
if (first.numUrls(wordId) > second.numUrls(wordId)) {
|
final SearchIndex fst;
|
||||||
SearchIndex tmp = first;
|
final SearchIndex snd;
|
||||||
first = second;
|
|
||||||
second = tmp;
|
if (firstTmp.numUrls(wordId) > secondTmp.numUrls(wordId)) {
|
||||||
|
fst = secondTmp;
|
||||||
|
snd = firstTmp;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fst = firstTmp;
|
||||||
|
snd = secondTmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchIndex fst = first;
|
|
||||||
SearchIndex snd = second;
|
|
||||||
|
|
||||||
var sndRange = snd.rangeForWord(wordId);
|
var sndRange = snd.rangeForWord(wordId);
|
||||||
|
|
||||||
|
@ -1,18 +1,16 @@
|
|||||||
package nu.marginalia.wmsa.edge.index.service.query;
|
package nu.marginalia.wmsa.edge.index.service.query;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
public class IndexSearchBudget {
|
public class IndexSearchBudget {
|
||||||
private final long limit;
|
private long timeout;
|
||||||
private long used = 0;
|
|
||||||
|
|
||||||
|
public IndexSearchBudget(long limitTime) {
|
||||||
|
this.timeout = System.currentTimeMillis() + limitTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for short-circuiting Stream-objects using takeWhile, we don't care
|
||||||
public boolean take(long unused) {
|
public boolean take(long unused) {
|
||||||
return used++ < limit;
|
return System.currentTimeMillis() < timeout;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long used() {
|
|
||||||
return used;
|
|
||||||
}
|
|
||||||
public long limit() { return limit; }
|
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ class SearchIndexWriterTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) {
|
public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) {
|
||||||
IndexSearchBudget budget = new IndexSearchBudget(1_000_000);
|
IndexSearchBudget budget = new IndexSearchBudget(100);
|
||||||
return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray();
|
return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user