From 1cf019f17c70d7923517922801f82eb16fcdb809 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Thu, 3 Oct 2024 11:58:23 -0400 Subject: [PATCH] Add number of splits per root/leaf search histograms (#5472) --- quickwit/quickwit-search/src/leaf.rs | 23 +++++++++++---- quickwit/quickwit-search/src/metrics.rs | 39 +++++++++++++++++++++---- quickwit/quickwit-search/src/root.rs | 21 ++++++++++--- 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 22e4b5551d1..7ee3617e102 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -48,6 +48,7 @@ use tokio::task::JoinError; use tracing::*; use crate::collector::{make_collector_for_split, make_merge_collector, IncrementalCollector}; +use crate::metrics::SEARCH_METRICS; use crate::root::is_metadata_count_request_with_ast; use crate::service::{deserialize_doc_mapper, SearcherContext}; use crate::{QuickwitAggregations, SearchError}; @@ -1304,13 +1305,23 @@ pub async fn leaf_search( }); } - let result = crate::search_thread_pool() - .run_cpu_intensive(|| incremental_merge_collector.finalize()) - .instrument(info_span!("incremental_merge_intermediate")) - .await - .context("failed to merge split search responses")??; + let leaf_search_response_reresult: Result, _> = + crate::search_thread_pool() + .run_cpu_intensive(|| incremental_merge_collector.finalize()) + .instrument(info_span!("incremental_merge_intermediate")) + .await + .context("failed to merge split search responses"); + + let label_values = match leaf_search_response_reresult { + Ok(Ok(_)) => ["success"], + _ => ["error"], + }; + SEARCH_METRICS + .leaf_search_targeted_splits + .with_label_values(label_values) + .observe(num_splits as f64); - Ok(result) + Ok(leaf_search_response_reresult??) } #[allow(clippy::too_many_arguments)] diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 23495f3bb6b..ccdcc6a519c 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -21,15 +21,17 @@ use once_cell::sync::Lazy; use quickwit_common::metrics::{ - exponential_buckets, new_counter, new_counter_vec, new_histogram, new_histogram_vec, Histogram, - HistogramVec, IntCounter, IntCounterVec, + exponential_buckets, linear_buckets, new_counter, new_counter_vec, new_histogram, + new_histogram_vec, Histogram, HistogramVec, IntCounter, IntCounterVec, }; pub struct SearchMetrics { pub root_search_requests_total: IntCounterVec<1>, pub root_search_request_duration_seconds: HistogramVec<1>, + pub root_search_targeted_splits: HistogramVec<1>, pub leaf_search_requests_total: IntCounterVec<1>, pub leaf_search_request_duration_seconds: HistogramVec<1>, + pub leaf_search_targeted_splits: HistogramVec<1>, pub leaf_searches_splits_total: IntCounter, pub leaf_search_split_duration_secs: Histogram, pub job_assigned_total: IntCounterVec<1>, @@ -37,6 +39,17 @@ pub struct SearchMetrics { impl Default for SearchMetrics { fn default() -> Self { + let targeted_splits_buckets: Vec = [ + linear_buckets(0.0, 10.0, 10).unwrap(), + linear_buckets(100.0, 100.0, 9).unwrap(), + linear_buckets(1000.0, 1000.0, 9).unwrap(), + linear_buckets(10000.0, 10000.0, 10).unwrap(), + ] + .iter() + .flatten() + .copied() + .collect(); + SearchMetrics { root_search_requests_total: new_counter_vec( "root_search_requests_total", @@ -47,27 +60,43 @@ impl Default for SearchMetrics { ), root_search_request_duration_seconds: new_histogram_vec( "root_search_request_duration_seconds", - "Duration of request in seconds.", + "Duration of root search gRPC request in seconds.", "search", &[("kind", "server")], ["status"], exponential_buckets(0.001, 2.0, 15).unwrap(), ), + root_search_targeted_splits: new_histogram_vec( + "root_search_targeted_splits", + "Number of splits targeted per root search GRPC request.", + "search", + &[], + ["status"], + targeted_splits_buckets.clone(), + ), leaf_search_requests_total: new_counter_vec( "leaf_search_requests_total", - "Total number of gRPC requests processed.", + "Total number of leaf search gRPC requests processed.", "search", &[("kind", "server")], ["status"], ), leaf_search_request_duration_seconds: new_histogram_vec( "leaf_search_request_duration_seconds", - "Duration of request in seconds.", + "Duration of leaf search gRPC request in seconds.", "search", &[("kind", "server")], ["status"], exponential_buckets(0.001, 2.0, 15).unwrap(), ), + leaf_search_targeted_splits: new_histogram_vec( + "leaf_search_targeted_splits", + "Number of splits targeted per leaf search GRPC request.", + "search", + &[], + ["status"], + targeted_splits_buckets, + ), leaf_searches_splits_total: new_counter( "leaf_searches_splits_total", "Number of leaf searches (count of splits) started.", diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 3565f9f68f8..766029b159e 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -54,6 +54,7 @@ use tracing::{debug, info, info_span, instrument}; use crate::cluster_client::ClusterClient; use crate::collector::{make_merge_collector, QuickwitAggregations}; use crate::find_trace_ids_collector::Span; +use crate::metrics::SEARCH_METRICS; use crate::scroll_context::{ScrollContext, ScrollKeyAndStartOffset}; use crate::search_job_placer::{group_by, group_jobs_by_index_id, Job}; use crate::search_response_rest::StorageRequestCount; @@ -1159,17 +1160,29 @@ pub async fn root_search( current_span.record("num_docs", num_docs); current_span.record("num_splits", num_splits); - let mut search_response = root_search_aux( + let mut search_response_result = root_search_aux( searcher_context, &request_metadata.indexes_meta_for_leaf_search, search_request, split_metadatas, cluster_client, ) - .await?; + .await; + + if let Ok(search_response) = &mut search_response_result { + search_response.elapsed_time_micros = start_instant.elapsed().as_micros() as u64; + } + let label_values = if search_response_result.is_ok() { + ["success"] + } else { + ["error"] + }; + SEARCH_METRICS + .root_search_targeted_splits + .with_label_values(label_values) + .observe(num_splits as f64); - search_response.elapsed_time_micros = start_instant.elapsed().as_micros() as u64; - Ok(search_response) + search_response_result } /// Returns details on how a query would be executed