From 04adbc3d0a48bc2d815850998a94c711b5da6995 Mon Sep 17 00:00:00 2001 From: BornChanger Date: Mon, 14 Aug 2023 20:57:28 +0800 Subject: [PATCH] ebs br: check status globals for PD and TiKV nodes Signed-off-by: BornChanger --- pkg/apis/pingcap/v1alpha1/tidbcluster.go | 21 +++++++++++++++++++++ pkg/backup/restore/restore_manager.go | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pkg/apis/pingcap/v1alpha1/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/tidbcluster.go index 0b4abbf54a..d1de4f8af3 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbcluster.go +++ b/pkg/apis/pingcap/v1alpha1/tidbcluster.go @@ -531,6 +531,17 @@ func (tc *TidbCluster) PDAllMembersReady() bool { return true } +// PDAllPeerMembersReady return whether all peer members of PD are ready. +func (tc *TidbCluster) PDAllPeerMembersReady() bool { + + for _, member := range tc.Status.PD.PeerMembers { + if !member.Health { + return false + } + } + return true +} + func (tc *TidbCluster) PDAutoFailovering() bool { if len(tc.Status.PD.FailureMembers) == 0 { return false @@ -908,6 +919,16 @@ func (tc *TidbCluster) AllTiKVsAreAvailable() bool { return true } +func (tc *TidbCluster) AllPeerTiKVsAreAvailable() bool { + for _, store := range tc.Status.TiKV.PeerStores { + if store.State != TiKVStateUp { + return false + } + } + + return true +} + func (tc *TidbCluster) PumpIsAvailable() bool { lowerLimit := 1 if len(tc.Status.Pump.Members) < lowerLimit { diff --git a/pkg/backup/restore/restore_manager.go b/pkg/backup/restore/restore_manager.go index 7d6fc9db6c..fec41c7c15 100644 --- a/pkg/backup/restore/restore_manager.go +++ b/pkg/backup/restore/restore_manager.go @@ -131,12 +131,12 @@ func (rm *restoreManager) syncRestoreJob(restore *v1alpha1.Restore) error { }, nil) return err } - if !tc.PDAllMembersReady() { + if !tc.PDAllMembersReady() || !tc.PDAllPeerMembersReady() { return controller.RequeueErrorf("restore %s/%s: waiting for all PD members are ready in tidbcluster %s/%s", ns, name, tc.Namespace, tc.Name) } if v1alpha1.IsRestoreVolumeComplete(restore) && !v1alpha1.IsRestoreTiKVComplete(restore) { - if !tc.AllTiKVsAreAvailable() { + if !tc.AllTiKVsAreAvailable() || !tc.AllPeerTiKVsAreAvailable() { return controller.RequeueErrorf("restore %s/%s: waiting for all TiKVs are available in tidbcluster %s/%s", ns, name, tc.Namespace, tc.Name) } else { sel, err := label.New().Instance(tc.Name).TiKV().Selector()