From 32166b6d960a0260ae7ff0ac881b74b1e23e334c Mon Sep 17 00:00:00 2001 From: Anish Shankar Date: Thu, 27 Jun 2024 20:03:52 -0700 Subject: [PATCH] Add support to customize spare replicas during VolumeReplace (#5666) Co-authored-by: Xuecheng Zhang --- docs/api-references/docs.md | 30 +++++++++++++++++++ manifests/crd.yaml | 8 +++++ .../crd/v1/pingcap.com_tidbclusters.yaml | 8 +++++ .../v1alpha1/defaulting/tidbcluster.go | 6 ++++ .../pingcap/v1alpha1/openapi_generated.go | 14 +++++++++ pkg/apis/pingcap/v1alpha1/tidbcluster.go | 4 +-- pkg/apis/pingcap/v1alpha1/types.go | 16 ++++++++++ .../pingcap/v1alpha1/zz_generated.deepcopy.go | 10 +++++++ .../member/tikv_member_manager_test.go | 4 ++- 9 files changed, 97 insertions(+), 3 deletions(-) diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index e862e55f33a..f1cad339202 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -12331,6 +12331,21 @@ string

Mode is the mode of PD cluster

+ + +spareVolReplaceReplicas
+ +int32 + + + +(Optional) +

The default number of spare replicas to scale up when using VolumeReplace feature. +In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid +zone skew after volume replace (total replicas always whole multiples of zones). +Optional: Defaults to 1

+ +

PDStatus

@@ -22653,6 +22668,21 @@ ScalePolicy

ScalePolicy is the scale configuration for TiKV

+ + +spareVolReplaceReplicas
+ +int32 + + + +(Optional) +

The default number of spare replicas to scale up when using VolumeReplace feature. +In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid +zone skew after volume replace (total replicas always whole multiples of zones). +Optional: Defaults to 1

+ +

TiKVStatus

diff --git a/manifests/crd.yaml b/manifests/crd.yaml index bd2bef1765f..4bad64b9564 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -24779,6 +24779,10 @@ spec: type: object serviceAccount: type: string + spareVolReplaceReplicas: + format: int32 + minimum: 0 + type: integer startTimeout: default: 30 type: integer @@ -41162,6 +41166,10 @@ spec: type: boolean serviceAccount: type: string + spareVolReplaceReplicas: + format: int32 + minimum: 0 + type: integer statefulSetUpdateStrategy: type: string storageClassName: diff --git a/manifests/crd/v1/pingcap.com_tidbclusters.yaml b/manifests/crd/v1/pingcap.com_tidbclusters.yaml index 29ae581defa..adf7a3551c2 100644 --- a/manifests/crd/v1/pingcap.com_tidbclusters.yaml +++ b/manifests/crd/v1/pingcap.com_tidbclusters.yaml @@ -5725,6 +5725,10 @@ spec: type: object serviceAccount: type: string + spareVolReplaceReplicas: + format: int32 + minimum: 0 + type: integer startTimeout: default: 30 type: integer @@ -22108,6 +22112,10 @@ spec: type: boolean serviceAccount: type: string + spareVolReplaceReplicas: + format: int32 + minimum: 0 + type: integer statefulSetUpdateStrategy: type: string storageClassName: diff --git a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go index c1a8136b645..68fa913e708 100644 --- a/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go +++ b/pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go @@ -115,6 +115,9 @@ func setTikvSpecDefault(tc *v1alpha1.TidbCluster) { if tc.Spec.TiKV.MaxFailoverCount == nil { tc.Spec.TiKV.MaxFailoverCount = pointer.Int32Ptr(3) } + if tc.Spec.TiKV.SpareVolReplaceReplicas == nil { + tc.Spec.TiKV.SpareVolReplaceReplicas = pointer.Int32Ptr(1) + } } func setPdSpecDefault(tc *v1alpha1.TidbCluster) { @@ -126,6 +129,9 @@ func setPdSpecDefault(tc *v1alpha1.TidbCluster) { if tc.Spec.PD.MaxFailoverCount == nil { tc.Spec.PD.MaxFailoverCount = pointer.Int32Ptr(3) } + if tc.Spec.PD.SpareVolReplaceReplicas == nil { + tc.Spec.PD.SpareVolReplaceReplicas = pointer.Int32Ptr(1) + } } func setPDMSSpecDefault(tc *v1alpha1.TidbCluster) { diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 8bd61aa522b..a5c71acef10 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -6914,6 +6914,13 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo Format: "", }, }, + "spareVolReplaceReplicas": { + SchemaProps: spec.SchemaProps{ + Description: "The default number of spare replicas to scale up when using VolumeReplace feature. In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid zone skew after volume replace (total replicas always whole multiples of zones). Optional: Defaults to 1", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"replicas"}, }, @@ -13401,6 +13408,13 @@ func schema_pkg_apis_pingcap_v1alpha1_TiKVSpec(ref common.ReferenceCallback) com Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ScalePolicy"), }, }, + "spareVolReplaceReplicas": { + SchemaProps: spec.SchemaProps{ + Description: "The default number of spare replicas to scale up when using VolumeReplace feature. In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid zone skew after volume replace (total replicas always whole multiples of zones). Optional: Defaults to 1", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"replicas"}, }, diff --git a/pkg/apis/pingcap/v1alpha1/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/tidbcluster.go index 0e1fb52a21b..a40557de86d 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbcluster.go +++ b/pkg/apis/pingcap/v1alpha1/tidbcluster.go @@ -631,7 +631,7 @@ func (tc *TidbCluster) PDStsDesiredReplicas() int32 { } var spareReplaceReplicas int32 = 0 if tc.Status.PD.VolReplaceInProgress { - spareReplaceReplicas = 1 + spareReplaceReplicas = *tc.Spec.PD.SpareVolReplaceReplicas } return tc.Spec.PD.Replicas + tc.GetPDDeletedFailureReplicas() + spareReplaceReplicas } @@ -709,7 +709,7 @@ func (tc *TidbCluster) TiKVStsDesiredReplicas() int32 { } var spareReplaceReplicas int32 = 0 if tc.Status.TiKV.VolReplaceInProgress { - spareReplaceReplicas = 1 + spareReplaceReplicas = *tc.Spec.TiKV.SpareVolReplaceReplicas } return tc.Spec.TiKV.Replicas + int32(len(tc.Status.TiKV.FailureStores)) + spareReplaceReplicas } diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go index 2963ca3deb4..f4063a56015 100644 --- a/pkg/apis/pingcap/v1alpha1/types.go +++ b/pkg/apis/pingcap/v1alpha1/types.go @@ -576,6 +576,14 @@ type PDSpec struct { // +optional // +kubebuilder:validation:Enum:="";"ms" Mode string `json:"mode,omitempty"` + + // The default number of spare replicas to scale up when using VolumeReplace feature. + // In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid + // zone skew after volume replace (total replicas always whole multiples of zones). + // Optional: Defaults to 1 + // +kubebuilder:validation:Minimum=0 + // +optional + SpareVolReplaceReplicas *int32 `json:"spareVolReplaceReplicas,omitempty"` } // +k8s:openapi-gen=true @@ -758,6 +766,14 @@ type TiKVSpec struct { // ScalePolicy is the scale configuration for TiKV // +optional ScalePolicy ScalePolicy `json:"scalePolicy,omitempty"` + + // The default number of spare replicas to scale up when using VolumeReplace feature. + // In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid + // zone skew after volume replace (total replicas always whole multiples of zones). + // Optional: Defaults to 1 + // +kubebuilder:validation:Minimum=0 + // +optional + SpareVolReplaceReplicas *int32 `json:"spareVolReplaceReplicas,omitempty"` } // TiFlashSpec contains details of TiFlash members diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index a5b834e726a..b65b9dca1d3 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -3976,6 +3976,11 @@ func (in *PDSpec) DeepCopyInto(out *PDSpec) { *out = new(bool) **out = **in } + if in.SpareVolReplaceReplicas != nil { + in, out := &in.SpareVolReplaceReplicas, &out.SpareVolReplaceReplicas + *out = new(int32) + **out = **in + } return } @@ -8346,6 +8351,11 @@ func (in *TiKVSpec) DeepCopyInto(out *TiKVSpec) { copy(*out, *in) } in.ScalePolicy.DeepCopyInto(&out.ScalePolicy) + if in.SpareVolReplaceReplicas != nil { + in, out := &in.SpareVolReplaceReplicas, &out.SpareVolReplaceReplicas + *out = new(int32) + **out = **in + } return } diff --git a/pkg/manager/member/tikv_member_manager_test.go b/pkg/manager/member/tikv_member_manager_test.go index b99e3de9b15..4100f357c9c 100644 --- a/pkg/manager/member/tikv_member_manager_test.go +++ b/pkg/manager/member/tikv_member_manager_test.go @@ -413,6 +413,7 @@ func TestTiKVMemberManagerSyncUpdate(t *testing.T) { modify: func(tc *v1alpha1.TidbCluster) { // Random test change to affect pod spec template. tc.Spec.TiKV.ServiceAccount = "test_new_account" + tc.Spec.TiKV.SpareVolReplaceReplicas = pointer.Int32(1) tc.Status.TiKV.VolReplaceInProgress = true }, pdStores: &pdapi.StoresInfo{Count: 0, Stores: []*pdapi.StoreInfo{}}, @@ -2149,7 +2150,8 @@ func TestGetNewTiKVSetForTidbCluster(t *testing.T) { }, Spec: v1alpha1.TidbClusterSpec{ TiKV: &v1alpha1.TiKVSpec{ - Replicas: 3, + Replicas: 3, + SpareVolReplaceReplicas: pointer.Int32(1), }, PD: &v1alpha1.PDSpec{}, TiDB: &v1alpha1.TiDBSpec{},