Skip to content

Commit

Permalink
Adding logging and metrics related to shard throughputs.
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Jul 16, 2024
1 parent fbd78bc commit 1e92dab
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 10 deletions.
6 changes: 3 additions & 3 deletions quickwit/quickwit-common/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use std::sync::OnceLock;

use once_cell::sync::Lazy;
pub use prometheus::{
exponential_buckets, Histogram, HistogramTimer, HistogramVec as PrometheusHistogramVec,
IntCounter, IntCounterVec as PrometheusIntCounterVec, IntGauge,
IntGaugeVec as PrometheusIntGaugeVec,
exponential_buckets, linear_buckets, Histogram, HistogramTimer,
HistogramVec as PrometheusHistogramVec, IntCounter, IntCounterVec as PrometheusIntCounterVec,
IntGauge, IntGaugeVec as PrometheusIntGaugeVec,
};
use prometheus::{Gauge, HistogramOpts, Opts, TextEncoder};

Expand Down
6 changes: 5 additions & 1 deletion quickwit/quickwit-config/src/node_config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use quickwit_common::uri::Uri;
use quickwit_proto::indexing::CpuCapacity;
use quickwit_proto::types::NodeId;
use serde::{Deserialize, Serialize};
use tracing::warn;
use tracing::{info, warn};

use crate::node_config::serialize::load_node_config_with_env;
use crate::service::QuickwitService;
Expand Down Expand Up @@ -325,6 +325,10 @@ impl IngestApiConfig {
self.max_queue_disk_usage,
self.max_queue_memory_usage
);
info!(
"ingestion shard throughput limit: {:?}",
self.shard_throughput_limit
);
ensure!(
self.shard_throughput_limit >= ByteSize::mib(1)
&& self.shard_throughput_limit <= ByteSize::mib(20),
Expand Down
16 changes: 12 additions & 4 deletions quickwit/quickwit-ingest/src/ingest_v2/broadcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,19 @@ impl ShardThroughputTimeSeriesMap {
let mut per_source_shard_infos: BTreeMap<SourceUid, ShardInfos> = BTreeMap::new();
for ((source_uid, shard_id), shard_time_series) in self.shard_time_series.iter() {
let shard_state = shard_time_series.shard_state;
let short_term_ingestion_rate_mib_per_sec_u64 =
shard_time_series.last().as_u64() / ONE_MIB.as_u64();
let short_term_ingestion_rate_mib_per_sec_u64: u64 =
shard_time_series.last().as_u64().div_ceil(ONE_MIB.as_u64());
let long_term_ingestion_rate_mib_per_sec_u64: u64 =
shard_time_series.average().as_u64().div_ceil(ONE_MIB.as_u64());
INGEST_V2_METRICS
.shard_st_throughput_mib
.observe(short_term_ingestion_rate_mib_per_sec_u64 as f64);
INGEST_V2_METRICS
.shard_lt_throughput_mib
.observe(long_term_ingestion_rate_mib_per_sec_u64 as f64);

let short_term_ingestion_rate =
RateMibPerSec(short_term_ingestion_rate_mib_per_sec_u64 as u16);
let long_term_ingestion_rate_mib_per_sec_u64 =
shard_time_series.average().as_u64() / ONE_MIB.as_u64();
let long_term_ingestion_rate =
RateMibPerSec(long_term_ingestion_rate_mib_per_sec_u64 as u16);
let shard_info = ShardInfo {
Expand All @@ -212,6 +219,7 @@ impl ShardThroughputTimeSeriesMap {
short_term_ingestion_rate,
long_term_ingestion_rate,
};

per_source_shard_infos
.entry(source_uid.clone())
.or_default()
Expand Down
18 changes: 16 additions & 2 deletions quickwit/quickwit-ingest/src/ingest_v2/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@
use mrecordlog::ResourceUsage;
use once_cell::sync::Lazy;
use quickwit_common::metrics::{
exponential_buckets, new_counter_vec, new_gauge, new_gauge_vec, new_histogram_vec,
HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
exponential_buckets, linear_buckets, new_counter_vec, new_gauge, new_gauge_vec, new_histogram,
new_histogram_vec, Histogram, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
};

pub(super) struct IngestV2Metrics {
pub reset_shards_operations_total: IntCounterVec<1>,
pub open_shards: IntGauge,
pub closed_shards: IntGauge,
pub shard_lt_throughput_mib: Histogram,
pub shard_st_throughput_mib: Histogram,
pub wal_acquire_lock_requests_in_flight: IntGaugeVec<2>,
pub wal_acquire_lock_request_duration_secs: HistogramVec<2>,
pub wal_disk_used_bytes: IntGauge,
Expand Down Expand Up @@ -56,6 +58,18 @@ impl Default for IngestV2Metrics {
"ingest",
&[("state", "closed")],
),
shard_lt_throughput_mib: new_histogram(
"shard_lt_throughput_mib",
"Shard long term throughput as reported through chitchat",
"ingest",
linear_buckets(0.0f64, 1.0f64, 15).unwrap(),
),
shard_st_throughput_mib: new_histogram(
"shard_st_throughput_mib",
"Shard short term throughput as reported through chitchat",
"ingest",
linear_buckets(0.0f64, 1.0f64, 15).unwrap(),
),
wal_acquire_lock_requests_in_flight: new_gauge_vec(
"wal_acquire_lock_requests_in_flight",
"Number of acquire lock requests in-flight.",
Expand Down

0 comments on commit 1e92dab

Please sign in to comment.