Fix race condition in leader election for OCP

In OCP we have two steps to prepare a node before we drain it. first we get leader election and annotate the node with Draining and then we pause the MCP and mark the node as MCP paused. if the config_daemon get a reset between the fist part to the second it will get stuck because one node will take the leader election BUT it will not mark the node as Draining as there is another node already draining. and the node with the draining label will try to get the drain lock again but the first node has it. with this change if the node as Draning or MCP pause label it will not try to take the lock again and just continue after the reset. Signed-off-by: Sebastian Sch <[email protected]>
k8snetworkplumbingwg · Jun 11, 2023 · e81d9ad · e81d9ad
1 parent 109c2c2
commit e81d9ad
Showing 1 changed file with 24 additions and 18 deletions.
diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go
@@ -507,18 +507,22 @@ func (dn *Daemon) nodeStateSyncHandler() error {
 				go dn.getDrainLock(ctx, done)
 				<-done
 			}
+		}
 
-			if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
-				glog.Infof("nodeStateSyncHandler(): pause MCP")
-				if err := dn.pauseMCP(); err != nil {
-					return err
-				}
+		if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() && !dn.isMcpPaused() {
+			glog.Infof("nodeStateSyncHandler(): pause MCP")
+			if err := dn.pauseMCP(); err != nil {
+				return err
 			}
 		}
 
-		glog.Info("nodeStateSyncHandler(): drain node")
-		if err := dn.drainNode(); err != nil {
-			return err
+		if dn.disableDrain {
+			glog.Info("nodeStateSyncHandler(): disable drain is true skipping drain")
+		} else {
+			glog.Info("nodeStateSyncHandler(): drain node")
+			if err := dn.drainNode(); err != nil {
+				return err
+			}
 		}
 	}
 
@@ -578,18 +582,25 @@ func (dn *Daemon) nodeHasAnnotation(annoKey string, value string) bool {
 	return false
 }
 
+// isNodeDraining: check if the node is draining
+// both Draining and MCP paused labels will return true
 func (dn *Daemon) isNodeDraining() bool {
 	anno, ok := dn.node.Annotations[annoKey]
 	if !ok {
 		return false
 	}
 
-	if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
-		// for openshift cluster draining should be true only if the annotation has MCP paused
-		return anno == annoMcpPaused
+	return anno == annoDraining || anno == annoMcpPaused
+}
+
+// isMcpPaused: return try only if the node label as the MCP paused label
+func (dn *Daemon) isMcpPaused() bool {
+	anno, ok := dn.node.Annotations[annoKey]
+	if !ok {
+		return false
 	}
 
-	return anno == annoDraining || anno == annoMcpPaused
+	return anno == annoMcpPaused
 }
 
 func (dn *Daemon) completeDrain() error {
@@ -599,7 +610,7 @@ func (dn *Daemon) completeDrain() error {
 		}
 	}
 
-	if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() {
+	if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() && dn.isMcpPaused() {
 		glog.Infof("completeDrain(): resume MCP %s", dn.mcpName)
 		pausePatch := []byte("{\"spec\":{\"paused\":false}}")
 		if _, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}); err != nil {
@@ -895,11 +906,6 @@ func (dn *Daemon) pauseMCP() error {
 }
 
 func (dn *Daemon) drainNode() error {
-	if dn.disableDrain {
-		glog.Info("drainNode(): disable drain is true skipping drain")
-		return nil
-	}
-
 	glog.Info("drainNode(): Update prepared")
 	var err error