Skip to content

Commit

Permalink
This is an automated cherry-pick of #4952 (#5052)
Browse files Browse the repository at this point in the history
Co-authored-by: Xuecheng Zhang <[email protected]>
Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jun 9, 2023
1 parent 5485d49 commit 71d8502
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 8 deletions.
2 changes: 0 additions & 2 deletions cmd/controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ func main() {
klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value)
})

metrics.RegisterMetrics()

hostName, err := os.Hostname()
if err != nil {
klog.Fatalf("failed to get hostname: %v", err)
Expand Down
23 changes: 22 additions & 1 deletion pkg/controller/tidbcluster/tidb_cluster_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,21 @@ func (c *defaultTidbClusterControl) defaulting(tc *v1alpha1.TidbCluster) {

func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster) error {
c.recordMetrics(tc)

ns := tc.GetNamespace()
tcName := tc.GetName()

// syncing all PVs managed by operator's reclaim policy to Retain
if err := c.reclaimPolicyManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "pv_reclaim_policy").Inc()
return err
}

// cleaning all orphan pods(pd, tikv or tiflash which don't have a related PVC) managed by operator
// this could be useful when failover run into an undesired situation as described in PD failover function
skipReasons, err := c.orphanPodsCleaner.Clean(tc)
if err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "orphan_pods_cleaner").Inc()
return err
}
if klog.V(10).Enabled() {
Expand All @@ -165,6 +171,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)

// reconcile TiDB discovery service
if err := c.discoveryManager.Reconcile(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "discovery").Inc()
return err
}

Expand All @@ -180,6 +187,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - scale out/in the pd cluster
// - failover the pd cluster
if err := c.pdMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "pd").Inc()
return err
}

Expand All @@ -192,6 +200,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - scale out/in the tiproxy cluster
// - failover the tiproxy cluster
if err := c.tiproxyMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "tiproxy").Inc()
return err
}

Expand All @@ -205,6 +214,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - scale out/in the tiflash cluster
// - failover the tiflash cluster
if err := c.tiflashMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "tiflash").Inc()
return err
}

Expand All @@ -218,11 +228,13 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - scale out/in the tikv cluster
// - failover the tikv cluster
if err := c.tikvMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "tikv").Inc()
return err
}

// syncing the pump cluster
if err := c.pumpMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "pump").Inc()
return err
}

Expand All @@ -235,6 +247,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - scale out/in the tidb cluster
// - failover the tidb cluster
if err := c.tidbMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "tidb").Inc()
return err
}

Expand All @@ -244,6 +257,7 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - create or update ticdc deployment
// - sync ticdc cluster status from pd to TidbCluster object
if err := c.ticdcMemberManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "ticdc").Inc()
return err
}

Expand All @@ -252,12 +266,14 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// - label.MemberIDLabelKey
// - label.NamespaceLabelKey
if err := c.metaManager.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "meta").Inc()
return err
}

// cleaning the pod scheduling annotation for pd and tikv
pvcSkipReasons, err := c.pvcCleaner.Clean(tc)
if err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "pvc_cleaner").Inc()
return err
}
if klog.V(10).Enabled() {
Expand All @@ -269,13 +285,18 @@ func (c *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster)
// modify volumes if necessary
if features.DefaultFeatureGate.Enabled(features.VolumeModifying) {
if err := c.pvcModifier.Sync(tc); err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "pvc_modifier").Inc()
return err
}
}

// syncing the some tidbcluster status attributes
// - sync tidbmonitor reference
return c.tidbClusterStatusManager.Sync(tc)
err = c.tidbClusterStatusManager.Sync(tc)
if err != nil {
metrics.ClusterUpdateErrors.WithLabelValues(ns, tcName, "cluster_status").Inc()
}
return err
}

func (c *defaultTidbClusterControl) recordMetrics(tc *v1alpha1.TidbCluster) {
Expand Down
8 changes: 3 additions & 5 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

// RegisterMetrics registers all metrics of tidb-operator.
func RegisterMetrics() {
prometheus.MustRegister(ClusterSpecReplicas)
}

// Label constants.
const (
LabelNamespace = "namespace"
Expand Down Expand Up @@ -77,5 +72,8 @@ func init() {
ReconcileTime,
WorkerCount,
ActiveWorkers,

ClusterSpecReplicas,
ClusterUpdateErrors,
)
}
8 changes: 8 additions & 0 deletions pkg/metrics/tidbcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,12 @@ var (
Name: "spec_replicas",
Help: "Desired replicas of each component in TidbCluster",
}, []string{LabelNamespace, LabelName, LabelComponent})

ClusterUpdateErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "tidb_operator",
Subsystem: "cluster",
Name: "update_errors",
Help: "Number of errors generated in each stage when updating TiDB Clusters",
}, []string{LabelNamespace, LabelName, LabelComponent})
)

0 comments on commit 71d8502

Please sign in to comment.