From d0b17fb36c77300f87511e3fb55758952c357855 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Tue, 24 Sep 2024 16:24:43 -0600 Subject: [PATCH] Add some additional search metrics --- quickwit/quickwit-search/src/client.rs | 69 +++++++++++++++++-------- quickwit/quickwit-search/src/metrics.rs | 40 ++++++++++++-- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/quickwit/quickwit-search/src/client.rs b/quickwit/quickwit-search/src/client.rs index 434b4a430e7..86a53caf392 100644 --- a/quickwit/quickwit-search/src/client.rs +++ b/quickwit/quickwit-search/src/client.rs @@ -20,7 +20,7 @@ use std::fmt; use std::net::SocketAddr; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use bytesize::ByteSize; use futures::{StreamExt, TryStreamExt}; @@ -37,6 +37,7 @@ use tower::timeout::Timeout; use tracing::{info_span, warn, Instrument}; use crate::error::parse_grpc_error; +use crate::metrics::SEARCH_METRICS; use crate::SearchService; /// Impl is an enumeration that meant to manage Quickwit's search service client types. @@ -110,17 +111,30 @@ impl SearchServiceClient { &mut self, request: quickwit_proto::search::SearchRequest, ) -> crate::Result { - match &mut self.client_impl { - SearchServiceClientImpl::Grpc(grpc_client) => { - let tonic_request = Request::new(request); - let tonic_response = grpc_client - .root_search(tonic_request) - .await - .map_err(|tonic_error| parse_grpc_error(&tonic_error))?; - Ok(tonic_response.into_inner()) - } + let start = Instant::now(); + let response_result = match &mut self.client_impl { + SearchServiceClientImpl::Grpc(grpc_client) => grpc_client + .root_search(request) + .await + .map(|tonic_response| tonic_response.into_inner()) + .map_err(|tonic_error| parse_grpc_error(&tonic_error)), SearchServiceClientImpl::Local(service) => service.root_search(request).await, - } + }; + let elapsed = start.elapsed().as_secs_f64(); + let label_values = if response_result.is_ok() { + ["success"] + } else { + ["error"] + }; + SEARCH_METRICS + .root_search_requests_total + .with_label_values(label_values) + .inc(); + SEARCH_METRICS + .root_search_request_duration_seconds + .with_label_values(label_values) + .observe(elapsed); + response_result } /// Perform leaf search. @@ -128,17 +142,30 @@ impl SearchServiceClient { &mut self, request: quickwit_proto::search::LeafSearchRequest, ) -> crate::Result { - match &mut self.client_impl { - SearchServiceClientImpl::Grpc(grpc_client) => { - let tonic_request = Request::new(request); - let tonic_response = grpc_client - .leaf_search(tonic_request) - .await - .map_err(|tonic_error| parse_grpc_error(&tonic_error))?; - Ok(tonic_response.into_inner()) - } + let start = Instant::now(); + let response_result = match &mut self.client_impl { + SearchServiceClientImpl::Grpc(grpc_client) => grpc_client + .leaf_search(request) + .await + .map(|tonic_response| tonic_response.into_inner()) + .map_err(|tonic_error| parse_grpc_error(&tonic_error)), SearchServiceClientImpl::Local(service) => service.leaf_search(request).await, - } + }; + let elapsed = start.elapsed().as_secs_f64(); + let label_values = if response_result.is_ok() { + ["success"] + } else { + ["error"] + }; + SEARCH_METRICS + .leaf_search_requests_total + .with_label_values(label_values) + .inc(); + SEARCH_METRICS + .leaf_search_request_duration_seconds + .with_label_values(label_values) + .observe(elapsed); + response_result } /// Perform leaf search. diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 449b747f162..23495f3bb6b 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -21,11 +21,15 @@ use once_cell::sync::Lazy; use quickwit_common::metrics::{ - exponential_buckets, new_counter, new_counter_vec, new_histogram, Histogram, IntCounter, - IntCounterVec, + exponential_buckets, new_counter, new_counter_vec, new_histogram, new_histogram_vec, Histogram, + HistogramVec, IntCounter, IntCounterVec, }; pub struct SearchMetrics { + pub root_search_requests_total: IntCounterVec<1>, + pub root_search_request_duration_seconds: HistogramVec<1>, + pub leaf_search_requests_total: IntCounterVec<1>, + pub leaf_search_request_duration_seconds: HistogramVec<1>, pub leaf_searches_splits_total: IntCounter, pub leaf_search_split_duration_secs: Histogram, pub job_assigned_total: IntCounterVec<1>, @@ -34,6 +38,36 @@ pub struct SearchMetrics { impl Default for SearchMetrics { fn default() -> Self { SearchMetrics { + root_search_requests_total: new_counter_vec( + "root_search_requests_total", + "Total number of root search gRPC requests processed.", + "search", + &[("kind", "server")], + ["status"], + ), + root_search_request_duration_seconds: new_histogram_vec( + "root_search_request_duration_seconds", + "Duration of request in seconds.", + "search", + &[("kind", "server")], + ["status"], + exponential_buckets(0.001, 2.0, 15).unwrap(), + ), + leaf_search_requests_total: new_counter_vec( + "leaf_search_requests_total", + "Total number of gRPC requests processed.", + "search", + &[("kind", "server")], + ["status"], + ), + leaf_search_request_duration_seconds: new_histogram_vec( + "leaf_search_request_duration_seconds", + "Duration of request in seconds.", + "search", + &[("kind", "server")], + ["status"], + exponential_buckets(0.001, 2.0, 15).unwrap(), + ), leaf_searches_splits_total: new_counter( "leaf_searches_splits_total", "Number of leaf searches (count of splits) started.", @@ -45,7 +79,7 @@ impl Default for SearchMetrics { "Number of seconds required to run a leaf search over a single split. The timer \ starts after the semaphore is obtained.", "search", - exponential_buckets(0.005, 2.0, 10).unwrap(), + exponential_buckets(0.001, 2.0, 15).unwrap(), ), job_assigned_total: new_counter_vec( "job_assigned_total",