From 002b454d186925821cd20330ac654d6381e76c30 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 10 Sep 2024 15:55:14 +0800 Subject: [PATCH] schedule: fix the down peer cannot be repaired (#7996) (#8196) close tikv/pd#7808 Signed-off-by: Ryan Leung Co-authored-by: Ryan Leung Co-authored-by: ShuNing Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/schedule/checker/replica_strategy.go | 7 ++- pkg/schedule/checker/rule_checker_test.go | 60 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pkg/schedule/checker/replica_strategy.go b/pkg/schedule/checker/replica_strategy.go index 8b9b055f0a7..b8037612531 100644 --- a/pkg/schedule/checker/replica_strategy.go +++ b/pkg/schedule/checker/replica_strategy.go @@ -95,7 +95,12 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, old uint64) (uint64, bool) { // trick to avoid creating a slice with `old` removed. s.swapStoreToFirst(coLocationStores, old) - return s.SelectStoreToAdd(coLocationStores[1:]) + // If the coLocationStores only has one store, no need to remove. + // Otherwise, the other stores will be filtered. + if len(coLocationStores) > 1 { + coLocationStores = coLocationStores[1:] + } + return s.SelectStoreToAdd(coLocationStores) } // SelectStoreToImprove returns a store to replace oldStore. The location diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index d4d37de2c3c..f3ef8ed8d23 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -2061,3 +2061,63 @@ func (suite *ruleCheckerTestSuite) TestRemoveOrphanPeer() { suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) } + +func (suite *ruleCheckerTestSuite) TestIssue7808() { + re := suite.Require() + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5", "disk_type": "ssd"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", 3, 4, 1) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "1", + Role: placement.Voter, + Count: 2, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "ssd", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + { + GroupID: "pd", + ID: "2", + Role: placement.Follower, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "mix", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + }) + re.NoError(err) + err = suite.ruleManager.DeleteRule("pd", "default") + re.NoError(err) + suite.cluster.SetStoreDown(1) + region := suite.cluster.GetRegion(1) + downPeer := []*pdpb.PeerStats{ + {Peer: region.GetStorePeer(1), DownSeconds: 6000}, + } + region = region.Clone(core.WithDownPeers(downPeer)) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + re.NotNil(op) + re.Equal("fast-replace-rule-down-peer", op.Desc()) + re.Contains(op.Brief(), "mv peer: store [1] to [2]") +}