(prometheus) Add instrumentation to the search, qs and index services.

This commit is contained in:
Viktor Lofgren 2024-01-02 15:02:29 +01:00
parent 116595d218
commit 31232e49fb
5 changed files with 125 additions and 51 deletions

View File

@ -1,6 +1,8 @@
package nu.marginalia.search; package nu.marginalia.search;
import com.google.inject.Inject; import com.google.inject.Inject;
import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl; import nu.marginalia.WebsiteUrl;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
@ -11,6 +13,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import spark.Request; import spark.Request;
import spark.Response; import spark.Response;
import spark.Route;
import spark.Spark; import spark.Spark;
import java.net.URLEncoder; import java.net.URLEncoder;
@ -22,6 +25,17 @@ public class SearchService extends Service {
private final StaticResources staticResources; private final StaticResources staticResources;
private static final Logger logger = LoggerFactory.getLogger(SearchService.class); private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
private static final Histogram wmsa_search_service_request_time = Histogram.build()
.name("wmsa_search_service_request_time")
.linearBuckets(0.005, 0.005, 15)
.labelNames("matchedPath", "method")
.help("Search service request time (seconds)")
.register();
private static final Counter wmsa_search_service_error_count = Counter.build()
.name("wmsa_search_service_error_count")
.labelNames("matchedPath", "method")
.help("Search service error count")
.register();
@SneakyThrows @SneakyThrows
@Inject @Inject
@ -42,30 +56,56 @@ public class SearchService extends Service {
Spark.staticFiles.expireTime(600); Spark.staticFiles.expireTime(600);
Spark.get("/search", searchQueryService::pathSearch); SearchServiceMetrics.get("/search", searchQueryService::pathSearch);
SearchServiceMetrics.get("/public/search", searchQueryService::pathSearch);
Spark.get("/public/search", searchQueryService::pathSearch); SearchServiceMetrics.get("/public/", frontPageService::render);
Spark.get("/public/", frontPageService::render); SearchServiceMetrics.get("/public/news.xml", frontPageService::renderNewsFeed);
Spark.get("/public/news.xml", frontPageService::renderNewsFeed); SearchServiceMetrics.get("/public/:resource", this::serveStatic);
Spark.get("/public/:resource", this::serveStatic);
Spark.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling); SearchServiceMetrics.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling);
Spark.get("/public/site-search/:site/*", this::siteSearchRedir); SearchServiceMetrics.get("/public/site-search/:site/*", this::siteSearchRedir);
Spark.get("/public/site/:site", siteInfoService::handle); SearchServiceMetrics.get("/public/site/:site", siteInfoService::handle);
Spark.post("/public/site/:site", siteInfoService::handlePost); SearchServiceMetrics.post("/public/site/:site", siteInfoService::handlePost);
Spark.get("/public/crosstalk/", crosstalkService::handle); SearchServiceMetrics.get("/public/crosstalk/", crosstalkService::handle);
Spark.exception(Exception.class, (e,p,q) -> { Spark.exception(Exception.class, (e,p,q) -> {
logger.error("Error during processing", e); logger.error("Error during processing", e);
wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc();
errorPageService.serveError(Context.fromRequest(p), p, q); errorPageService.serveError(Context.fromRequest(p), p, q);
}); });
Spark.awaitInitialization(); Spark.awaitInitialization();
} }
/** Wraps a route with a timer and a counter */
private static class SearchServiceMetrics implements Route {
private final Route delegatedRoute;
static void get(String path, Route route) {
Spark.get(path, new SearchServiceMetrics(route));
}
static void post(String path, Route route) {
Spark.post(path, new SearchServiceMetrics(route));
}
private SearchServiceMetrics(Route delegatedRoute) {
this.delegatedRoute = delegatedRoute;
}
@Override
public Object handle(Request request, Response response) throws Exception {
return wmsa_search_service_request_time
.labels(request.pathInfo(), request.requestMethod())
.time(() -> delegatedRoute.handle(request, response));
}
}
private Object serveStatic(Request request, Response response) { private Object serveStatic(Request request, Response response) {
String resource = request.params("resource"); String resource = request.params("resource");
staticResources.serveStatic("search", resource, request, response); staticResources.serveStatic("search", resource, request, response);

View File

@ -1,6 +1,7 @@
package nu.marginalia.search.svc; package nu.marginalia.search.svc;
import com.google.inject.Inject; import com.google.inject.Inject;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl; import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchAdtechParameter; import nu.marginalia.search.command.SearchAdtechParameter;

View File

@ -55,19 +55,19 @@ public class IndexQueryService extends IndexApiImplBase {
private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
private static final Counter wmsa_query_timeouts = Counter.build() private static final Counter wmsa_query_timeouts = Counter.build()
.name("wmsa_query_timeouts") .name("wmsa_index_query_timeouts")
.help("Query timeout counter") .help("Query timeout counter")
.labelNames("node") .labelNames("node", "api")
.register(); .register();
private static final Gauge wmsa_query_cost = Gauge.build() private static final Gauge wmsa_query_cost = Gauge.build()
.name("wmsa_query_cost") .name("wmsa_index_query_cost")
.help("Computational cost of query") .help("Computational cost of query")
.labelNames("node") .labelNames("node", "api")
.register(); .register();
private static final Histogram wmsa_query_time = Histogram.build() private static final Histogram wmsa_query_time = Histogram.build()
.name("wmsa_query_time") .name("wmsa_index_query_time")
.linearBuckets(50., 50., 15) .linearBuckets(0.005, 0.005, 15)
.labelNames("node") .labelNames("node", "api")
.help("Index-side query time") .help("Index-side query time")
.register(); .register();
@ -138,7 +138,7 @@ public class IndexQueryService extends IndexApiImplBase {
try { try {
return wmsa_query_time return wmsa_query_time
.labels(nodeName) .labels(nodeName, "REST")
.time(() -> { .time(() -> {
var params = new SearchParameters(specsSet, getSearchSet(specsSet)); var params = new SearchParameters(specsSet, getSearchSet(specsSet));
@ -147,11 +147,12 @@ public class IndexQueryService extends IndexApiImplBase {
logger.info(queryMarker, "Index Result Count: {}", results.size()); logger.info(queryMarker, "Index Result Count: {}", results.size());
wmsa_query_cost wmsa_query_cost
.labels(nodeName) .labels(nodeName, "REST")
.set(params.getDataCost()); .set(params.getDataCost());
if (!params.hasTimeLeft()) { if (!params.hasTimeLeft()) {
wmsa_query_timeouts wmsa_query_timeouts
.labels(nodeName) .labels(nodeName, "REST")
.inc(); .inc();
} }
@ -179,7 +180,22 @@ public class IndexQueryService extends IndexApiImplBase {
try { try {
var params = new SearchParameters(request, getSearchSet(request)); var params = new SearchParameters(request, getSearchSet(request));
SearchResultSet results = executeSearch(params); final String nodeName = Integer.toString(nodeId);
SearchResultSet results = wmsa_query_time
.labels(nodeName, "GRPC")
.time(() -> executeSearch(params));
wmsa_query_cost
.labels(nodeName, "GRPC")
.set(params.getDataCost());
if (!params.hasTimeLeft()) {
wmsa_query_timeouts
.labels(nodeName, "GRPC")
.inc();
}
for (var result : results.results) { for (var result : results.results) {
var rawResult = result.rawIndexResult; var rawResult = result.rawIndexResult;

View File

@ -3,6 +3,8 @@ package nu.marginalia.query;
import com.google.inject.Inject; import com.google.inject.Inject;
import io.grpc.ManagedChannel; import io.grpc.ManagedChannel;
import io.grpc.ManagedChannelBuilder; import io.grpc.ManagedChannelBuilder;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.db.DomainBlacklist; import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.index.api.*; import nu.marginalia.index.api.*;
import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.id.UrlIdCodec;
@ -19,6 +21,12 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
private final Logger logger = LoggerFactory.getLogger(QueryGRPCService.class); private final Logger logger = LoggerFactory.getLogger(QueryGRPCService.class);
private static final Histogram wmsa_qs_query_time_grpc = Histogram.build()
.name("wmsa_qs_query_time_grpc")
.linearBuckets(0.005, 0.005, 15)
.help("QS-side query time (GRPC endpoint)")
.register();
private final Map<ServiceAndNode, ManagedChannel> channels private final Map<ServiceAndNode, ManagedChannel> channels
= new ConcurrentHashMap<>(); = new ConcurrentHashMap<>();
private final Map<ServiceAndNode, IndexApiGrpc.IndexApiBlockingStub> actorRpcApis private final Map<ServiceAndNode, IndexApiGrpc.IndexApiBlockingStub> actorRpcApis
@ -61,23 +69,25 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcQsResponse> responseObserver) io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcQsResponse> responseObserver)
{ {
try { try {
var params = QueryProtobufCodec.convertRequest(request); wmsa_qs_query_time_grpc.time(() -> {
var query = queryFactory.createQuery(params); var params = QueryProtobufCodec.convertRequest(request);
var query = queryFactory.createQuery(params);
RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query); RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query);
List<RpcDecoratedResultItem> bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal()); List<RpcDecoratedResultItem> bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal());
var responseBuilder = RpcQsResponse.newBuilder() var responseBuilder = RpcQsResponse.newBuilder()
.addAllResults(bestItems) .addAllResults(bestItems)
.setSpecs(indexRequest) .setSpecs(indexRequest)
.addAllSearchTermsHuman(query.searchTermsHuman); .addAllSearchTermsHuman(query.searchTermsHuman);
if (query.domain != null) if (query.domain != null)
responseBuilder.setDomain(query.domain); responseBuilder.setDomain(query.domain);
responseObserver.onNext(responseBuilder.build()); responseObserver.onNext(responseBuilder.build());
responseObserver.onCompleted(); responseObserver.onCompleted();
});
} catch (Exception e) { } catch (Exception e) {
logger.error("Exception", e); logger.error("Exception", e);
responseObserver.onError(e); responseObserver.onError(e);
@ -89,8 +99,8 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
private static final Comparator<RpcDecoratedResultItem> comparator = private static final Comparator<RpcDecoratedResultItem> comparator =
Comparator.comparing(RpcDecoratedResultItem::getRankingScore); Comparator.comparing(RpcDecoratedResultItem::getRankingScore);
private List<RpcDecoratedResultItem> executeQueries(RpcIndexQuery indexRequest, int totalSize) throws InterruptedException @SneakyThrows
{ private List<RpcDecoratedResultItem> executeQueries(RpcIndexQuery indexRequest, int totalSize) {
List<Callable<List<RpcDecoratedResultItem>>> tasks = createTasks(indexRequest); List<Callable<List<RpcDecoratedResultItem>>> tasks = createTasks(indexRequest);
return es.invokeAll(tasks).stream() return es.invokeAll(tasks).stream()

View File

@ -3,13 +3,13 @@ package nu.marginalia.query;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.inject.Inject; import com.google.inject.Inject;
import io.grpc.ServerBuilder; import io.grpc.ServerBuilder;
import io.prometheus.client.Histogram;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.db.DomainBlacklist; import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.index.client.IndexClient; import nu.marginalia.index.client.IndexClient;
import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultSet; import nu.marginalia.index.client.model.results.SearchResultSet;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.query.model.QueryParams; import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse; import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.query.svc.NodeConfigurationWatcher; import nu.marginalia.query.svc.NodeConfigurationWatcher;
@ -21,9 +21,7 @@ import spark.Response;
import spark.Spark; import spark.Spark;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
public class QueryService extends Service { public class QueryService extends Service {
@ -33,6 +31,13 @@ public class QueryService extends Service {
private final DomainBlacklist blacklist; private final DomainBlacklist blacklist;
private final QueryFactory queryFactory; private final QueryFactory queryFactory;
private static final Histogram wmsa_qs_query_time_rest = Histogram.build()
.name("wmsa_qs_query_time_rest")
.linearBuckets(0.005, 0.005, 15)
.help("QS-side query time (REST endpoint)")
.register();
@Inject @Inject
public QueryService(BaseServiceParams params, public QueryService(BaseServiceParams params,
IndexClient indexClient, IndexClient indexClient,
@ -58,23 +63,25 @@ public class QueryService extends Service {
} }
private Object search(Request request, Response response) { private Object search(Request request, Response response) {
String json = request.body(); return wmsa_qs_query_time_rest.time(() -> {
QueryParams params = gson.fromJson(json, QueryParams.class); String json = request.body();
QueryParams params = gson.fromJson(json, QueryParams.class);
var query = queryFactory.createQuery(params); var query = queryFactory.createQuery(params);
var rsp = executeQuery(Context.fromRequest(request), query.specs); var rsp = executeQuery(Context.fromRequest(request), query.specs);
rsp.results.removeIf(this::isBlacklisted); rsp.results.removeIf(this::isBlacklisted);
response.type("application/json"); response.type("application/json");
return new QueryResponse( return new QueryResponse(
query.specs, query.specs,
rsp.results, rsp.results,
query.searchTermsHuman, query.searchTermsHuman,
List.of(), // no problems List.of(), // no problems
query.domain query.domain
); );
});
} }
private SearchResultSet delegateToIndex(Request request, Response response) { private SearchResultSet delegateToIndex(Request request, Response response) {