-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🌱 Add request latency, rate limiter latency and request retry metrics #2481
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,25 +18,47 @@ package metrics | |
|
||
import ( | ||
"context" | ||
"net/url" | ||
"time" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
clientmetrics "k8s.io/client-go/tools/metrics" | ||
) | ||
|
||
// this file contains setup logic to initialize the myriad of places | ||
// that client-go registers metrics. We copy the names and formats | ||
// from Kubernetes so that we match the core controllers. | ||
|
||
var ( | ||
// client metrics. | ||
|
||
requestResult = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Name: "rest_client_requests_total", | ||
Help: "Number of HTTP requests, partitioned by status code, method, and host.", | ||
}, | ||
[]string{"code", "method", "host"}, | ||
) | ||
|
||
requestLatency = prometheus.NewHistogramVec( | ||
prometheus.HistogramOpts{ | ||
Name: "rest_client_request_duration_seconds", | ||
Help: "Request latency in seconds. Broken down by verb. Intentionally not by host to avoid high cardinality.", | ||
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, | ||
}, | ||
[]string{"verb"}, | ||
) | ||
|
||
rateLimiterLatency = prometheus.NewHistogramVec( | ||
prometheus.HistogramOpts{ | ||
Name: "rest_client_rate_limiter_duration_seconds", | ||
Help: "Client side rate limiter latency in seconds. Broken down by verb. Intentionally not by host to avoid high cardinality.", | ||
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, | ||
}, | ||
[]string{"verb"}, | ||
) | ||
|
||
requestRetry = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Name: "rest_client_request_retries_total", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my understanding, this will only ever be populated for watch requests by the informer, because in all other cases, the error gets bubbled up to the application and the application then has to decide if it wants to retry which entails the underlying machinery doesn't know its a retry or am I wrong? |
||
Help: "Number of request retries, partitioned by status code and verb. Intentionally not by host to avoid high cardinality.", | ||
}, | ||
[]string{"code", "verb"}, | ||
) | ||
) | ||
|
||
func init() { | ||
|
@@ -47,10 +69,16 @@ func init() { | |
func registerClientMetrics() { | ||
// register the metrics with our registry | ||
Registry.MustRegister(requestResult) | ||
Registry.MustRegister(requestLatency) | ||
Registry.MustRegister(rateLimiterLatency) | ||
Registry.MustRegister(requestRetry) | ||
|
||
// register the metrics with client-go | ||
clientmetrics.Register(clientmetrics.RegisterOpts{ | ||
RequestResult: &resultAdapter{metric: requestResult}, | ||
RequestResult: &resultAdapter{metric: requestResult}, | ||
RequestLatency: &latencyAdapter{metric: requestLatency}, | ||
RateLimiterLatency: &latencyAdapter{metric: rateLimiterLatency}, | ||
RequestRetry: &retryAdapter{requestRetry}, | ||
}) | ||
} | ||
|
||
|
@@ -69,3 +97,23 @@ type resultAdapter struct { | |
func (r *resultAdapter) Increment(_ context.Context, code, method, host string) { | ||
r.metric.WithLabelValues(code, method, host).Inc() | ||
} | ||
|
||
type latencyAdapter struct { | ||
metric *prometheus.HistogramVec | ||
} | ||
|
||
// Observe increments the request latency metric for the given verb. | ||
// URL is ignored to avoid high cardinality. | ||
func (l *latencyAdapter) Observe(_ context.Context, verb string, _ url.URL, latency time.Duration) { | ||
l.metric.WithLabelValues(verb).Observe(latency.Seconds()) | ||
} | ||
|
||
type retryAdapter struct { | ||
metric *prometheus.CounterVec | ||
} | ||
|
||
// IncrementRetry increments the retry metric for the given code and method. | ||
// host is ignored to avoid high cardinality. | ||
func (r *retryAdapter) IncrementRetry(_ context.Context, code, method, _ string) { | ||
r.metric.WithLabelValues(code, method).Inc() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sig apimachineries guidance the last time around I asked was not to use client-side ratelimiting but rely on APF instead - Maybe skip this one?