Fixing race condition during cluster upgrade with kernel upgrade (#1222)

The following scenario causes worker pods to be stuck during cluster upgrade: 1) kernel module is loaded into the node. NMC contains both spec and status using the current kernel version 2) cluster upgrade starts. As part of the upgrade node becomes Unschedulable 3) Module-NMC removes Spec from NMC, since the node is Uschedulable 4) Node becomes schedulable 5) NMC controller tries to unload kernel module using the NMC Status confiration, which contains old kernel. 6) Worker unload pods get stuck in Error, since Node is running the new kernel 7) Module-NMC updates Spec of NMC, but since worker pod exists, nothing is done Solution: When processing orphaned NMC statuses (status exists but spec does not), NMC controller should ignore modules that have statuses created prior to Node's Ready timestamp
rh-ecosystem-edge · Sep 25, 2024 · 898c278 · 898c278
1 parent aed4d51
commit 898c278
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 85 deletions.
diff --git a/internal/controllers/mock_nmc_reconciler.go b/internal/controllers/mock_nmc_reconciler.go
diff --git a/internal/controllers/nmc_reconciler.go b/internal/controllers/nmc_reconciler.go
@@ -125,14 +125,19 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
 		statusMap[status.Namespace+"/"+status.Name] = &nmcObj.Status.Modules[i]
 	}
 
+	node := v1.Node{}
+	if err := r.client.Get(ctx, types.NamespacedName{Name: nmcObj.Name}, &node); err != nil {
+		return ctrl.Result{}, fmt.Errorf("could not get node %s: %v", nmcObj.Name, err)
+	}
+
 	errs := make([]error, 0, len(nmcObj.Spec.Modules)+len(nmcObj.Status.Modules))
 
 	for _, mod := range nmcObj.Spec.Modules {
 		moduleNameKey := mod.Namespace + "/" + mod.Name
 
 		logger := logger.WithValues("module", moduleNameKey)
 
-		if err := r.helper.ProcessModuleSpec(ctrl.LoggerInto(ctx, logger), &nmcObj, &mod, statusMap[moduleNameKey]); err != nil {
+		if err := r.helper.ProcessModuleSpec(ctrl.LoggerInto(ctx, logger), &nmcObj, &mod, statusMap[moduleNameKey], &node); err != nil {
 			errs = append(
 				errs,
 				fmt.Errorf("error processing Module %s: %v", moduleNameKey, err),
@@ -150,7 +155,7 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
 	for statusNameKey, status := range statusMap {
 		logger := logger.WithValues("status", statusNameKey)
 
-		if err := r.helper.ProcessUnconfiguredModuleStatus(ctrl.LoggerInto(ctx, logger), &nmcObj, status); err != nil {
+		if err := r.helper.ProcessUnconfiguredModuleStatus(ctrl.LoggerInto(ctx, logger), &nmcObj, status, &node); err != nil {
 			errs = append(
 				errs,
 				fmt.Errorf("error processing orphan status for Module %s: %v", statusNameKey, err),
@@ -161,10 +166,6 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
 	if err := r.helper.GarbageCollectInUseLabels(ctx, &nmcObj); err != nil {
 		errs = append(errs, fmt.Errorf("failed to GC in-use labels for NMC %s: %v", req.NamespacedName, err))
 	}
-	node := v1.Node{}
-	if err := r.client.Get(ctx, types.NamespacedName{Name: nmcObj.Name}, &node); err != nil {
-		return ctrl.Result{}, fmt.Errorf("could not get node %s: %v", nmcObj.Name, err)
-	}
 
 	if loaded, unloaded, err := r.helper.UpdateNodeLabels(ctx, &nmcObj, &node); err != nil {
 		errs = append(errs, fmt.Errorf("could not update node's labels for NMC %s: %v", req.NamespacedName, err))
@@ -223,8 +224,8 @@ func GetContainerStatus(statuses []v1.ContainerStatus, name string) v1.Container
 
 type nmcReconcilerHelper interface {
 	GarbageCollectInUseLabels(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig) error
-	ProcessModuleSpec(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig, spec *kmmv1beta1.NodeModuleSpec, status *kmmv1beta1.NodeModuleStatus) error
-	ProcessUnconfiguredModuleStatus(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig, status *kmmv1beta1.NodeModuleStatus) error
+	ProcessModuleSpec(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig, spec *kmmv1beta1.NodeModuleSpec, status *kmmv1beta1.NodeModuleStatus, node *v1.Node) error
+	ProcessUnconfiguredModuleStatus(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig, status *kmmv1beta1.NodeModuleStatus, node *v1.Node) error
 	RemovePodFinalizers(ctx context.Context, nodeName string) error
 	SyncStatus(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig) error
 	UpdateNodeLabels(ctx context.Context, nmc *kmmv1beta1.NodeModulesConfig, node *v1.Node) ([]types.NamespacedName, []types.NamespacedName, error)
@@ -314,6 +315,7 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec(
 	nmcObj *kmmv1beta1.NodeModulesConfig,
 	spec *kmmv1beta1.NodeModuleSpec,
 	status *kmmv1beta1.NodeModuleStatus,
+	node *v1.Node,
 ) error {
 	podName := workerPodName(nmcObj.Name, spec.Name)
 
@@ -344,13 +346,7 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec(
 			return h.pm.CreateLoaderPod(ctx, nmcObj, spec)
 		}
 
-		node := v1.Node{}
-
-		if err = h.client.Get(ctx, types.NamespacedName{Name: nmcObj.Name}, &node); err != nil {
-			return fmt.Errorf("could not get node %s: %v", nmcObj.Name, err)
-		}
-
-		if h.nodeAPI.NodeBecomeReadyAfter(&node, status.LastTransitionTime) {
+		if h.nodeAPI.NodeBecomeReadyAfter(node, status.LastTransitionTime) {
 			logger.Info("node has been rebooted and become ready after kernel module was loaded; creating loader Pod")
 			return h.pm.CreateLoaderPod(ctx, nmcObj, spec)
 		}
@@ -392,11 +388,20 @@ func (h *nmcReconcilerHelperImpl) ProcessUnconfiguredModuleStatus(
 	ctx context.Context,
 	nmcObj *kmmv1beta1.NodeModulesConfig,
 	status *kmmv1beta1.NodeModuleStatus,
+	node *v1.Node,
 ) error {
 	podName := workerPodName(nmcObj.Name, status.Name)
 
 	logger := ctrl.LoggerFrom(ctx).WithValues("pod name", podName)
 
+	/* node was rebooted, spec not set so no kernel module is loaded, no need to unload.
+	   it also fixes the scenario when node's kernel was upgraded, so unload pod will fail anyway
+	*/
+	if h.nodeAPI.NodeBecomeReadyAfter(node, status.LastTransitionTime) {
+		logger.Info("node was rebooted, no need to unload kernel module that is not present in kernel, will wait until NMC spec is updated")
+		return nil
+	}
+
 	pod, err := h.pm.GetWorkerPod(ctx, podName, status.Namespace)
 	if err != nil {
 		return fmt.Errorf("error while getting the worker Pod %s: %v", podName, err)