diff --git a/pkg/controllers/metrics/pod/controller.go b/pkg/controllers/metrics/pod/controller.go index 9547fc56b6..d6fab75116 100644 --- a/pkg/controllers/metrics/pod/controller.go +++ b/pkg/controllers/metrics/pod/controller.go @@ -71,6 +71,15 @@ var ( Objectives: metrics.SummaryObjectives(), }, ) + podBoundDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "karpenter", + Subsystem: metrics.PodSubsystem, + Name: "bound_duration_seconds", + Help: "The time from pod creation until the pod is bound.", + }, + labelNames(), + ) ) // Controller for the resource @@ -82,7 +91,7 @@ type Controller struct { } func init() { - crmetrics.Registry.MustRegister(podState, podStartupDurationSeconds) + crmetrics.Registry.MustRegister(podState, podStartupDurationSeconds, podBoundDurationSeconds) } func labelNames() []string { @@ -132,13 +141,19 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco Labels: labels, }, }) - c.recordPodStartupMetric(pod) + c.recordPodStartupMetric(pod, labels) return reconcile.Result{}, nil } -func (c *Controller) recordPodStartupMetric(pod *corev1.Pod) { +func (c *Controller) recordPodStartupMetric(pod *corev1.Pod, labels prometheus.Labels) { key := client.ObjectKeyFromObject(pod).String() if pod.Status.Phase == phasePending { + cond, ok := lo.Find(pod.Status.Conditions, func(c corev1.PodCondition) bool { + return c.Type == corev1.PodScheduled + }) + if ok && cond.Status == corev1.ConditionTrue { + podBoundDurationSeconds.With(labels).Observe(cond.LastTransitionTime.Sub(pod.CreationTimestamp.Time).Seconds()) + } c.pendingPods.Insert(key) return } diff --git a/pkg/controllers/metrics/pod/suite_test.go b/pkg/controllers/metrics/pod/suite_test.go index 940bb6f2df..5183f5b8bc 100644 --- a/pkg/controllers/metrics/pod/suite_test.go +++ b/pkg/controllers/metrics/pod/suite_test.go @@ -20,6 +20,8 @@ import ( "context" "testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -84,6 +86,18 @@ var _ = Describe("Pod Metrics", func() { }) Expect(found).To(BeTrue()) }) + It("should update the pod bound_duration_seconds metric", func() { + p := test.Pod() + p.Status.Phase = corev1.PodPending + p.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodScheduled, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()}} + ExpectApplied(ctx, env.Client, p) + ExpectReconcileSucceeded(ctx, podController, client.ObjectKeyFromObject(p)) + _, found := FindMetricWithLabelValues("karpenter_pods_bound_duration_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeTrue()) + }) It("should delete the pod state metric on pod delete", func() { p := test.Pod() ExpectApplied(ctx, env.Client, p) diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index b8e49a3931..bd2d5cf3ff 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -129,6 +129,9 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile return reconcile.Result{RequeueAfter: 1 * time.Second}, nil } + NodesDrainedTotal.With(prometheus.Labels{ + metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey], + }).Inc() // In order for Pods associated with PersistentVolumes to smoothly migrate from the terminating Node, we wait // for VolumeAttachments of drain-able Pods to be cleaned up before terminating Node and removing its finalizer. // However, if TerminationGracePeriod is configured for Node, and we are past that period, we will skip waiting. diff --git a/pkg/controllers/node/termination/metrics.go b/pkg/controllers/node/termination/metrics.go index b31c558788..13808c9e38 100644 --- a/pkg/controllers/node/termination/metrics.go +++ b/pkg/controllers/node/termination/metrics.go @@ -28,7 +28,8 @@ import ( func init() { crmetrics.Registry.MustRegister( TerminationDurationSeconds, - NodeLifetimeDurationSeconds) + NodeLifetimeDurationSeconds, + NodesDrainedTotal) } const dayDuration = time.Hour * 24 @@ -44,6 +45,15 @@ var ( }, []string{metrics.NodePoolLabel}, ) + NodesDrainedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metrics.Namespace, + Subsystem: metrics.NodeSubsystem, + Name: "drained_total", + Help: "The total number of nodes drained by Karpenter", + }, + []string{metrics.NodePoolLabel}, + ) NodeLifetimeDurationSeconds = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: metrics.Namespace, diff --git a/pkg/controllers/node/termination/suite_test.go b/pkg/controllers/node/termination/suite_test.go index 32151e311f..da6daf60ad 100644 --- a/pkg/controllers/node/termination/suite_test.go +++ b/pkg/controllers/node/termination/suite_test.go @@ -95,6 +95,7 @@ var _ = Describe("Termination", func() { metrics.NodesTerminatedTotal.Reset() termination.TerminationDurationSeconds.Reset() termination.NodeLifetimeDurationSeconds.Reset() + termination.NodesDrainedTotal.Reset() }) Context("Reconciliation", func() { @@ -841,6 +842,7 @@ var _ = Describe("Termination", func() { node = ExpectNodeExists(ctx, env.Client, node.Name) // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectMetricCounterValue(termination.NodesDrainedTotal, 1, map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) ExpectObjectReconciled(ctx, env.Client, terminationController, node) m, ok := FindMetricWithLabelValues("karpenter_nodes_terminated_total", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index aafd3bcf68..eb712f1d6f 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -162,6 +162,7 @@ func (p *Provisioner) GetPendingPods(ctx context.Context) ([]*corev1.Pod, error) pods = lo.Reject(pods, func(po *corev1.Pod, _ int) bool { if err := p.Validate(ctx, po); err != nil { log.FromContext(ctx).WithValues("Pod", klog.KRef(po.Namespace, po.Name)).V(1).Info(fmt.Sprintf("ignoring pod, %s", err)) + metrics.IgnoredPodTotal.Inc() return true } return false @@ -346,6 +347,7 @@ func (p *Provisioner) Schedule(ctx context.Context) (scheduler.Results, error) { results := s.Solve(ctx, pods).TruncateInstanceTypes(scheduler.MaxInstanceTypes) if len(results.NewNodeClaims) > 0 { log.FromContext(ctx).WithValues("Pods", pretty.Slice(lo.Map(pods, func(p *corev1.Pod, _ int) string { return klog.KRef(p.Namespace, p.Name).String() }), 5), "duration", time.Since(start)).Info("found provisionable pod(s)") + scheduler.PodsNominatedTotal.With(prometheus.Labels{scheduler.ControllerLabel: injection.GetControllerName(ctx)}).Add(float64(len(pods))) } results.Record(ctx, p.recorder, p.cluster) return results, nil diff --git a/pkg/controllers/provisioning/scheduling/metrics.go b/pkg/controllers/provisioning/scheduling/metrics.go index 05fdbdc4ab..f645ab5111 100644 --- a/pkg/controllers/provisioning/scheduling/metrics.go +++ b/pkg/controllers/provisioning/scheduling/metrics.go @@ -24,7 +24,7 @@ import ( ) func init() { - crmetrics.Registry.MustRegister(SchedulingDurationSeconds, QueueDepth) + crmetrics.Registry.MustRegister(SchedulingDurationSeconds, QueueDepth, PodsNominatedTotal) } const ( @@ -58,4 +58,15 @@ var ( schedulingIDLabel, }, ) + PodsNominatedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metrics.Namespace, + Subsystem: schedulerSubsystem, + Name: "pods_nominated_total", + Help: "The number of pods that the scheduler has processed and made a decision for.", + }, + []string{ + ControllerLabel, + }, + ) ) diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go index 80c7e3d81e..6682238a38 100644 --- a/pkg/controllers/provisioning/suite_test.go +++ b/pkg/controllers/provisioning/suite_test.go @@ -22,6 +22,8 @@ import ( "testing" "time" + schedulingMetrics "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" @@ -97,6 +99,7 @@ var _ = AfterEach(func() { ExpectCleanedUp(ctx, env.Client) cloudProvider.Reset() cluster.Reset() + schedulingMetrics.PodsNominatedTotal.Reset() }) var _ = Describe("Provisioning", func() { @@ -104,6 +107,7 @@ var _ = Describe("Provisioning", func() { ExpectApplied(ctx, env.Client, test.NodePool()) pod := test.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectMetricCounterValue(schedulingMetrics.PodsNominatedTotal, 1, nil) nodes := &corev1.NodeList{} Expect(env.Client.List(ctx, nodes)).To(Succeed()) Expect(len(nodes.Items)).To(Equal(1)) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index fd21137cba..e972311cf7 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -89,9 +89,16 @@ var ( NodePoolLabel, }, ) + IgnoredPodTotal = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "ignored_pod_total", + Help: "Number of pods ignored during scheduling by Karpenter", + }, + ) ) func init() { crmetrics.Registry.MustRegister(NodeClaimsCreatedTotal, NodeClaimsTerminatedTotal, NodeClaimsDisruptedTotal, - NodesCreatedTotal, NodesTerminatedTotal) + NodesCreatedTotal, NodesTerminatedTotal, IgnoredPodTotal) }