diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java index 8c1d303d..efb59a7a 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java @@ -2,6 +2,8 @@ package nu.marginalia.api; import com.google.gson.Gson; import com.google.inject.Inject; +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; import nu.marginalia.api.model.ApiLicense; import nu.marginalia.api.model.ApiSearchResults; import nu.marginalia.api.svc.LicenseService; @@ -34,6 +36,23 @@ public class ApiService extends Service { // Marker for filtering out sensitive content from the persistent logs private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); + private final Counter wmsa_api_timeout_count = Counter.build() + .name("wmsa_api_timeout_count") + .labelNames("key") + .help("API timeout count") + .register(); + private final Counter wmsa_api_cache_hit_count = Counter.build() + .name("wmsa_api_cache_hit_count") + .labelNames("key") + .help("API cache hit count") + .register(); + + private static final Histogram wmsa_api_query_time = Histogram.build() + .name("wmsa_api_query_time") + .linearBuckets(0.005, 0.005, 15) + .help("API-side query time") + .register(); + @Inject public ApiService(BaseServiceParams params, QueryClient queryClient, @@ -83,6 +102,7 @@ public class ApiService extends Service { var cachedResponse = responseCache.getResults(license, args[0], request.queryString()); if (cachedResponse.isPresent()) { + wmsa_api_cache_hit_count.labels(license.key).inc(); return cachedResponse.get(); } @@ -98,6 +118,7 @@ public class ApiService extends Service { private ApiSearchResults doSearch(ApiLicense license, String query, Request request) { if (!rateLimiterService.isAllowed(license)) { + wmsa_api_timeout_count.labels(license.key).inc(); Spark.halt(503, "Slow down"); } @@ -106,9 +127,13 @@ public class ApiService extends Service { logger.info(queryMarker, "{} Search {}", license.key, query); - return searchOperator - .query(Context.fromRequest(request), query, count, index) - .withLicense(license.getLicense()); + return wmsa_api_query_time + .labels(license.key) + .time(() -> + searchOperator + .query(Context.fromRequest(request), query, count, index) + .withLicense(license.getLicense()) + ); } private int intParam(Request request, String name, int defaultValue) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java index b2d54cb5..2c486b6e 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java @@ -1,6 +1,8 @@ package nu.marginalia.search; import com.google.inject.Inject; +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; import nu.marginalia.client.Context; @@ -11,6 +13,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; import spark.Response; +import spark.Route; import spark.Spark; import java.net.URLEncoder; @@ -22,6 +25,17 @@ public class SearchService extends Service { private final StaticResources staticResources; private static final Logger logger = LoggerFactory.getLogger(SearchService.class); + private static final Histogram wmsa_search_service_request_time = Histogram.build() + .name("wmsa_search_service_request_time") + .linearBuckets(0.005, 0.005, 15) + .labelNames("matchedPath", "method") + .help("Search service request time (seconds)") + .register(); + private static final Counter wmsa_search_service_error_count = Counter.build() + .name("wmsa_search_service_error_count") + .labelNames("matchedPath", "method") + .help("Search service error count") + .register(); @SneakyThrows @Inject @@ -42,30 +56,56 @@ public class SearchService extends Service { Spark.staticFiles.expireTime(600); - Spark.get("/search", searchQueryService::pathSearch); + SearchServiceMetrics.get("/search", searchQueryService::pathSearch); + SearchServiceMetrics.get("/public/search", searchQueryService::pathSearch); - Spark.get("/public/search", searchQueryService::pathSearch); - Spark.get("/public/", frontPageService::render); - Spark.get("/public/news.xml", frontPageService::renderNewsFeed); - Spark.get("/public/:resource", this::serveStatic); + SearchServiceMetrics.get("/public/", frontPageService::render); + SearchServiceMetrics.get("/public/news.xml", frontPageService::renderNewsFeed); + SearchServiceMetrics.get("/public/:resource", this::serveStatic); - Spark.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling); + SearchServiceMetrics.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling); - Spark.get("/public/site-search/:site/*", this::siteSearchRedir); + SearchServiceMetrics.get("/public/site-search/:site/*", this::siteSearchRedir); - Spark.get("/public/site/:site", siteInfoService::handle); - Spark.post("/public/site/:site", siteInfoService::handlePost); + SearchServiceMetrics.get("/public/site/:site", siteInfoService::handle); + SearchServiceMetrics.post("/public/site/:site", siteInfoService::handlePost); - Spark.get("/public/crosstalk/", crosstalkService::handle); + SearchServiceMetrics.get("/public/crosstalk/", crosstalkService::handle); Spark.exception(Exception.class, (e,p,q) -> { logger.error("Error during processing", e); + wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc(); errorPageService.serveError(Context.fromRequest(p), p, q); }); Spark.awaitInitialization(); } + + + /** Wraps a route with a timer and a counter */ + private static class SearchServiceMetrics implements Route { + private final Route delegatedRoute; + + static void get(String path, Route route) { + Spark.get(path, new SearchServiceMetrics(route)); + } + static void post(String path, Route route) { + Spark.post(path, new SearchServiceMetrics(route)); + } + + private SearchServiceMetrics(Route delegatedRoute) { + this.delegatedRoute = delegatedRoute; + } + + @Override + public Object handle(Request request, Response response) throws Exception { + return wmsa_search_service_request_time + .labels(request.pathInfo(), request.requestMethod()) + .time(() -> delegatedRoute.handle(request, response)); + } + } + private Object serveStatic(Request request, Response response) { String resource = request.params("resource"); staticResources.serveStatic("search", resource, request, response); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java index 9836e6d2..9593623c 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java @@ -1,6 +1,7 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; +import io.prometheus.client.Histogram; import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; import nu.marginalia.search.command.SearchAdtechParameter; diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index 476ea991..29d28a45 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -55,19 +55,19 @@ public class IndexQueryService extends IndexApiImplBase { private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); private static final Counter wmsa_query_timeouts = Counter.build() - .name("wmsa_query_timeouts") + .name("wmsa_index_query_timeouts") .help("Query timeout counter") - .labelNames("node") + .labelNames("node", "api") .register(); private static final Gauge wmsa_query_cost = Gauge.build() - .name("wmsa_query_cost") + .name("wmsa_index_query_cost") .help("Computational cost of query") - .labelNames("node") + .labelNames("node", "api") .register(); private static final Histogram wmsa_query_time = Histogram.build() - .name("wmsa_query_time") - .linearBuckets(50., 50., 15) - .labelNames("node") + .name("wmsa_index_query_time") + .linearBuckets(0.005, 0.005, 15) + .labelNames("node", "api") .help("Index-side query time") .register(); @@ -138,7 +138,7 @@ public class IndexQueryService extends IndexApiImplBase { try { return wmsa_query_time - .labels(nodeName) + .labels(nodeName, "REST") .time(() -> { var params = new SearchParameters(specsSet, getSearchSet(specsSet)); @@ -147,11 +147,12 @@ public class IndexQueryService extends IndexApiImplBase { logger.info(queryMarker, "Index Result Count: {}", results.size()); wmsa_query_cost - .labels(nodeName) + .labels(nodeName, "REST") .set(params.getDataCost()); + if (!params.hasTimeLeft()) { wmsa_query_timeouts - .labels(nodeName) + .labels(nodeName, "REST") .inc(); } @@ -179,7 +180,22 @@ public class IndexQueryService extends IndexApiImplBase { try { var params = new SearchParameters(request, getSearchSet(request)); - SearchResultSet results = executeSearch(params); + final String nodeName = Integer.toString(nodeId); + + SearchResultSet results = wmsa_query_time + .labels(nodeName, "GRPC") + .time(() -> executeSearch(params)); + + wmsa_query_cost + .labels(nodeName, "GRPC") + .set(params.getDataCost()); + + if (!params.hasTimeLeft()) { + wmsa_query_timeouts + .labels(nodeName, "GRPC") + .inc(); + } + for (var result : results.results) { var rawResult = result.rawIndexResult; diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java index aa9fdaf5..3ec7acb7 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java @@ -3,6 +3,8 @@ package nu.marginalia.query; import com.google.inject.Inject; import io.grpc.ManagedChannel; import io.grpc.ManagedChannelBuilder; +import io.prometheus.client.Histogram; +import lombok.SneakyThrows; import nu.marginalia.db.DomainBlacklist; import nu.marginalia.index.api.*; import nu.marginalia.model.id.UrlIdCodec; @@ -19,6 +21,12 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { private final Logger logger = LoggerFactory.getLogger(QueryGRPCService.class); + private static final Histogram wmsa_qs_query_time_grpc = Histogram.build() + .name("wmsa_qs_query_time_grpc") + .linearBuckets(0.005, 0.005, 15) + .help("QS-side query time (GRPC endpoint)") + .register(); + private final Map channels = new ConcurrentHashMap<>(); private final Map actorRpcApis @@ -61,23 +69,25 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { io.grpc.stub.StreamObserver responseObserver) { try { - var params = QueryProtobufCodec.convertRequest(request); - var query = queryFactory.createQuery(params); + wmsa_qs_query_time_grpc.time(() -> { + var params = QueryProtobufCodec.convertRequest(request); + var query = queryFactory.createQuery(params); - RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query); - List bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal()); + RpcIndexQuery indexRequest = QueryProtobufCodec.convertQuery(request, query); + List bestItems = executeQueries(indexRequest, request.getQueryLimits().getResultsTotal()); - var responseBuilder = RpcQsResponse.newBuilder() - .addAllResults(bestItems) - .setSpecs(indexRequest) - .addAllSearchTermsHuman(query.searchTermsHuman); + var responseBuilder = RpcQsResponse.newBuilder() + .addAllResults(bestItems) + .setSpecs(indexRequest) + .addAllSearchTermsHuman(query.searchTermsHuman); - if (query.domain != null) - responseBuilder.setDomain(query.domain); + if (query.domain != null) + responseBuilder.setDomain(query.domain); - responseObserver.onNext(responseBuilder.build()); + responseObserver.onNext(responseBuilder.build()); - responseObserver.onCompleted(); + responseObserver.onCompleted(); + }); } catch (Exception e) { logger.error("Exception", e); responseObserver.onError(e); @@ -89,8 +99,8 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { private static final Comparator comparator = Comparator.comparing(RpcDecoratedResultItem::getRankingScore); - private List executeQueries(RpcIndexQuery indexRequest, int totalSize) throws InterruptedException - { + @SneakyThrows + private List executeQueries(RpcIndexQuery indexRequest, int totalSize) { List>> tasks = createTasks(indexRequest); return es.invokeAll(tasks).stream() diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java index d950a8d0..7f003df7 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java @@ -3,13 +3,13 @@ package nu.marginalia.query; import com.google.gson.Gson; import com.google.inject.Inject; import io.grpc.ServerBuilder; +import io.prometheus.client.Histogram; import nu.marginalia.client.Context; import nu.marginalia.db.DomainBlacklist; import nu.marginalia.index.client.IndexClient; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; import nu.marginalia.index.client.model.results.SearchResultSet; -import nu.marginalia.nodecfg.NodeConfigurationService; import nu.marginalia.query.model.QueryParams; import nu.marginalia.query.model.QueryResponse; import nu.marginalia.query.svc.NodeConfigurationWatcher; @@ -21,9 +21,7 @@ import spark.Response; import spark.Spark; import java.io.IOException; -import java.util.ArrayList; import java.util.List; -import java.util.concurrent.TimeUnit; public class QueryService extends Service { @@ -33,6 +31,13 @@ public class QueryService extends Service { private final DomainBlacklist blacklist; private final QueryFactory queryFactory; + private static final Histogram wmsa_qs_query_time_rest = Histogram.build() + .name("wmsa_qs_query_time_rest") + .linearBuckets(0.005, 0.005, 15) + .help("QS-side query time (REST endpoint)") + .register(); + + @Inject public QueryService(BaseServiceParams params, IndexClient indexClient, @@ -58,23 +63,25 @@ public class QueryService extends Service { } private Object search(Request request, Response response) { - String json = request.body(); - QueryParams params = gson.fromJson(json, QueryParams.class); + return wmsa_qs_query_time_rest.time(() -> { + String json = request.body(); + QueryParams params = gson.fromJson(json, QueryParams.class); - var query = queryFactory.createQuery(params); - var rsp = executeQuery(Context.fromRequest(request), query.specs); + var query = queryFactory.createQuery(params); + var rsp = executeQuery(Context.fromRequest(request), query.specs); - rsp.results.removeIf(this::isBlacklisted); + rsp.results.removeIf(this::isBlacklisted); - response.type("application/json"); + response.type("application/json"); - return new QueryResponse( - query.specs, - rsp.results, - query.searchTermsHuman, - List.of(), // no problems - query.domain - ); + return new QueryResponse( + query.specs, + rsp.results, + query.searchTermsHuman, + List.of(), // no problems + query.domain + ); + }); } private SearchResultSet delegateToIndex(Request request, Response response) { diff --git a/docker-compose.yml b/docker-compose.yml index 6754a298..2fdbe6ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,10 +8,8 @@ x-svc: &service - logs:/var/log/wmsa networks: - wmsa - depends_on: - mariadb: - condition: service_healthy - + labels: + - "__meta_docker_port_private=7000" x-p1: &partition-1 env_file: - "run/env/service.env" @@ -201,6 +199,18 @@ services: - "/var/run/docker.sock:/var/run/docker.sock:ro" networks: - wmsa + prometheus: + image: "prom/prometheus" + container_name: "prometheus" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + ports: + - "127.0.0.1:8091:9090" + volumes: + - "./run/prometheus.yml:/etc/prometheus/prometheus.yml" + - "/var/run/docker.sock:/var/run/docker.sock:ro" + networks: + - wmsa networks: wmsa: volumes: diff --git a/run/prometheus.yml b/run/prometheus.yml new file mode 100644 index 00000000..f4270e23 --- /dev/null +++ b/run/prometheus.yml @@ -0,0 +1,13 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + relabel_configs: + - source_labels: + - '__meta_docker_network_ip' + target_label: '__address__' + replacement: '$1:7000' \ No newline at end of file