From c201201c2da549d33c8d9f2c6635092f53bf6ad4 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 30 May 2022 21:02:53 +0200 Subject: [PATCH 1/2] Instrumentation for search + index madvise tweaks (#17) Co-authored-by: vlofgren Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/17 --- .../wmsa/edge/index/service/index/SearchIndex.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java index c25100f4..222c332e 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java @@ -3,15 +3,18 @@ package nu.marginalia.wmsa.edge.index.service.index; import com.google.inject.Inject; import com.google.inject.name.Named; import com.upserve.uppend.blobs.NativeIO; +import io.reactivex.rxjava3.schedulers.Schedulers; import nu.marginalia.wmsa.edge.index.service.index.wordstable.IndexWordsTable; import nu.marginalia.util.btree.BTreeReader; import nu.marginalia.util.multimap.MultimapFileLong; +import org.eclipse.jetty.util.thread.ThreadPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; +import java.util.concurrent.ForkJoinPool; import java.util.stream.LongStream; public class SearchIndex implements AutoCloseable { @@ -40,12 +43,12 @@ public class SearchIndex implements AutoCloseable { bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext); - madvise(urls, bTreeReader); + Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader)); } private void madvise(MultimapFileLong urls, BTreeReader reader) { - urls.advice(NativeIO.Advice.Sequential); + urls.advice(NativeIO.Advice.Random); words.forEachWordsOffset(offset -> { var h = reader.getHeader(offset); int length = (int) (h.dataOffsetLongs() - h.indexOffsetLongs()); From 44bee371e67d92f5c9be22ff73ba52ae734dac9a Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 30 May 2022 21:12:15 +0200 Subject: [PATCH 2/2] Actually add the commit with the previously mentioned instrumetation (#18) Co-authored-by: vlofgren Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/18 --- .../wmsa/edge/search/EdgeSearchOperator.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java index dd37c515..10675cc5 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java @@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.search; import com.google.inject.Inject; import com.google.inject.Singleton; +import io.prometheus.client.Summary; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.schedulers.Schedulers; import nu.marginalia.wmsa.configuration.server.Context; @@ -47,6 +48,9 @@ public class EdgeSearchOperator { private final SearchResultValuator valuator; private final Comparator resultListComparator; + private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register(); + private static final Summary wmsa_search_result_decoration_time = Summary.build().name("wmsa_search_result_decoration_time").help("-").register(); + @Inject public EdgeSearchOperator(AssistantClient assistantClient, EncyclopediaClient encyclopediaClient, @@ -141,27 +145,30 @@ public class EdgeSearchOperator { AccumulatedQueryResults queryResults = new AccumulatedQueryResults(); UrlDeduplicator deduplicator = new UrlDeduplicator(processedQuery.specs.limitByDomain); - - if (processedQuery.searchTermsHuman.size()<=4 && !asFastAsPossible) { - fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator); - } - else { - fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator); - } - List resultList = new ArrayList<>(queryResults.size()); - for (var details : queryResults.results) { - if (details.getUrlQuality() < -100) { - continue; + wmsa_search_index_api_time.time(() -> { + if (processedQuery.searchTermsHuman.size() <= 4 && !asFastAsPossible) { + fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator); + } else { + fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator); + } + }); + + wmsa_search_result_decoration_time.time(() -> { + for (var details : queryResults.results) { + if (details.getUrlQuality() < -100) { + continue; + } + var scoreAdjustment = adjustScoreBasedOnQuery(details, processedQuery.specs); + details = details.withUrlQualityAdjustment(scoreAdjustment); + + resultList.add(details); } - var scoreAdjustment = adjustScoreBasedOnQuery(details, processedQuery.specs); - details = details.withUrlQualityAdjustment(scoreAdjustment); - resultList.add(details); + resultList.sort(resultListComparator); } - - resultList.sort(resultListComparator); + ); return new DecoratedSearchResultSet(resultList); } @@ -254,31 +261,14 @@ public class EdgeSearchOperator { var blocksOrder = processedQuery.specs.subqueries.stream().map(sq -> sq.block).distinct().sorted(Comparator.comparing(block -> block.sortOrder)).toList(); + EdgeSearchSpecification[] specsArray = processedQuery.specs.subqueries.stream() .filter(sq -> sq.block == IndexBlock.TitleKeywords) .map(sq -> processedQuery.specs.withSubqueries(blocksOrder.stream().map(sq::withBlock).collect(Collectors.toList()))) - //.flatMap(specs -> processedQuery.specs.buckets.stream().map(bucket -> specs.withBuckets(List.of(bucket)))) .toArray(EdgeSearchSpecification[]::new); var resultSets = indexClient.multiQuery(ctx, specsArray); - if (debug) { - for (var s : specsArray) { - logger.info("{}", s); - } - for (IndexBlock block : indexBlockSearchOrder) { - resultSets.forEach(res -> { - res.resultsList.getOrDefault(block, Collections.emptyList()).forEach(b2 -> { - b2.results.forEach((idx,items) -> { - items.forEach(i -> - logger.info("{} {} - {}", block, idx, i) - ); - }); - }); - }); - } - } - Set> seenUrls = new HashSet<>(); for (IndexBlock block : indexBlockSearchOrder) { var resultsJoined = resultSets.stream().flatMap(rs -> rs.resultsList.getOrDefault(block, Collections.emptyList()).stream())