diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java index 3fa3625b..81d57139 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java @@ -41,7 +41,7 @@ import static spark.Spark.get; import static spark.Spark.halt; public class EdgeIndexService extends Service { - private static final int SEARCH_BUDGET_LIMIT = 1_000_000; + private static final int SEARCH_BUDGET_TIMEOUT_MS = 100; private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -243,7 +243,7 @@ public class EdgeIndexService extends Service { new DomainResultCountFilter(specsSet.limitByDomain) }; - final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); + final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS); final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket); for (int i = 0; i < specsSet.buckets.size(); i+=2) { @@ -279,10 +279,6 @@ public class EdgeIndexService extends Service { } } - if (budget.used() > 0) { - logger.debug("Query used ${}", budget.used()); - } - return results; } @@ -294,7 +290,7 @@ public class EdgeIndexService extends Service { final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain); - IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); + IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_TIMEOUT_MS); for (var sq : specsSet.subqueries) { Optional searchTerms = getSearchTerms(sq); @@ -316,10 +312,6 @@ public class EdgeIndexService extends Service { } } - if (budget.used() > 0) { - logger.debug("Query used ${}", budget.used()); - } - return results; } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java index df269034..7baeb8ae 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java @@ -74,7 +74,9 @@ public class SearchIndexReader implements AutoCloseable { IndexSearchBudget budget, LongPredicate filter, int wordId) { + var builder = underspecifiedQueryBuilders.get(block); + if (null != builder) { return builder.buildUnderspecified(budget, filter, wordId); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java index de3f1435..be217057 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java @@ -32,7 +32,9 @@ public class IndexQueryBuilder { return new QueryForIndices(budget, filter, wordId); } + // Special treatment for queries with few terms, prefer hits that appear in multiple buckets public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) { + if (requiredIndices.size() == 1) { return build(budget, filter, wordId); } @@ -51,7 +53,7 @@ public class IndexQueryBuilder { return new QueryForIndices(budget, () -> Streams.concat(IntStream.range(1, relevantIndices.length) - .mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId)) + .mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId)) .flatMapToLong(Function.identity()), fstRange.stream().takeWhile(budget::take)) .filter(filter) @@ -59,17 +61,20 @@ public class IndexQueryBuilder { } private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) { - SearchIndex first = requiredIndices.get(firstIdx), - second = requiredIndices.get(otherIdx); + SearchIndex firstTmp = requiredIndices.get(firstIdx), + secondTmp = requiredIndices.get(otherIdx); - if (first.numUrls(wordId) > second.numUrls(wordId)) { - SearchIndex tmp = first; - first = second; - second = tmp; + final SearchIndex fst; + final SearchIndex snd; + + if (firstTmp.numUrls(wordId) > secondTmp.numUrls(wordId)) { + fst = secondTmp; + snd = firstTmp; + } + else { + fst = firstTmp; + snd = secondTmp; } - - SearchIndex fst = first; - SearchIndex snd = second; var sndRange = snd.rangeForWord(wordId); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java index e84af8fe..2ec30e65 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java @@ -1,18 +1,16 @@ package nu.marginalia.wmsa.edge.index.service.query; -import lombok.RequiredArgsConstructor; -@RequiredArgsConstructor public class IndexSearchBudget { - private final long limit; - private long used = 0; + private long timeout; + public IndexSearchBudget(long limitTime) { + this.timeout = System.currentTimeMillis() + limitTime; + } + + // Used for short-circuiting Stream-objects using takeWhile, we don't care public boolean take(long unused) { - return used++ < limit; + return System.currentTimeMillis() < timeout; } - public long used() { - return used; - } - public long limit() { return limit; } } diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java index f9cd8a6a..5f1d2a0f 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java @@ -59,7 +59,7 @@ class SearchIndexWriterTest { } public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) { - IndexSearchBudget budget = new IndexSearchBudget(1_000_000); + IndexSearchBudget budget = new IndexSearchBudget(100); return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray(); }