From c076bba01aaee102b4c06de1a26772451e36bd8d Mon Sep 17 00:00:00 2001 From: Yevgeny Shnaidman <60741237+yevgeny-shnaidman@users.noreply.github.com> Date: Sun, 22 Sep 2024 17:21:55 +0300 Subject: [PATCH] Fixing module loading during kernel upgrade (#1218) When the node upgrade includes kernel upgrade, NMC Spec will change to include new kernel version. In that case NMC controller will try first to create unloader worker pod. Since the unloader worker pod uses the old configuration (from status), which uses the old image, the modprobe in the worker pod will fail, since it won't find kernel module under the /opt/lib/modules/ path This PR fixes the issue by creating unloader pod in case of difference in spec and status of NMC only in case kernels are equal. Otherwise, it creates loader pod, since it means that node was rebooted, and the kernel module is not loaded yet, since the status contains old kernel --- internal/controllers/nmc_reconciler.go | 14 ++++++-- internal/controllers/nmc_reconciler_test.go | 39 +++++++++++++++++++-- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/internal/controllers/nmc_reconciler.go b/internal/controllers/nmc_reconciler.go index f98e929e9..4cce8386d 100644 --- a/internal/controllers/nmc_reconciler.go +++ b/internal/controllers/nmc_reconciler.go @@ -325,14 +325,23 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec( } if pod == nil { + // new module is introduced, need to load it if status == nil { logger.Info("Missing status; creating loader Pod") return h.pm.CreateLoaderPod(ctx, nmcObj, spec) } + /* configuration changed for module: if spec status contain the same kernel, + unload the kernel module, otherwise - load kernel modules, since the pod + is not running, the module cannot be loaded using the old kernel configuration + */ if !reflect.DeepEqual(spec.Config, status.Config) { - logger.Info("Outdated config in status; creating unloader Pod") - return h.pm.CreateUnloaderPod(ctx, nmcObj, status) + if spec.Config.KernelVersion == status.Config.KernelVersion { + logger.Info("Outdated config in status; creating unloader Pod") + return h.pm.CreateUnloaderPod(ctx, nmcObj, status) + } + logger.Info("Outdated config in status and kernels differ, probably due to upgrade; creating loader Pod") + return h.pm.CreateLoaderPod(ctx, nmcObj, spec) } node := v1.Node{} @@ -346,6 +355,7 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec( return fmt.Errorf("node %s has no Ready condition", nmcObj.Name) } + // node has been rebooted, load the module using the spec if readyCondition.Status == v1.ConditionTrue && status.LastTransitionTime.Before(&readyCondition.LastTransitionTime) { logger.Info("Outdated last transition time status; creating loader Pod") diff --git a/internal/controllers/nmc_reconciler_test.go b/internal/controllers/nmc_reconciler_test.go index 5f760cfdb..e19a6e789 100644 --- a/internal/controllers/nmc_reconciler_test.go +++ b/internal/controllers/nmc_reconciler_test.go @@ -431,7 +431,7 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() { ) }) - It("should create an unloader Pod if the spec is different from the status", func() { + It("should create an unloader Pod if the spec is different from the status and kernels are equal", func() { nmc := &kmmv1beta1.NodeModulesConfig{ ObjectMeta: metav1.ObjectMeta{Name: nmcName}, } @@ -441,7 +441,7 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() { Name: name, Namespace: namespace, }, - Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image"}, + Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image", KernelVersion: "same kernel"}, } status := &kmmv1beta1.NodeModuleStatus{ @@ -449,7 +449,7 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() { Name: name, Namespace: namespace, }, - Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image"}, + Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image", KernelVersion: "same kernel"}, } gomock.InOrder( @@ -464,6 +464,39 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() { ) }) + It("should create an loader Pod if the spec is different from the status and kernels different equal", func() { + nmc := &kmmv1beta1.NodeModulesConfig{ + ObjectMeta: metav1.ObjectMeta{Name: nmcName}, + } + + spec := &kmmv1beta1.NodeModuleSpec{ + ModuleItem: kmmv1beta1.ModuleItem{ + Name: name, + Namespace: namespace, + }, + Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image", KernelVersion: "old kernel"}, + } + + status := &kmmv1beta1.NodeModuleStatus{ + ModuleItem: kmmv1beta1.ModuleItem{ + Name: name, + Namespace: namespace, + }, + Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image", KernelVersion: "new kernel"}, + } + + gomock.InOrder( + pm.EXPECT().GetWorkerPod(ctx, podName, namespace), + pm.EXPECT().CreateLoaderPod(ctx, nmc, spec), + ) + + Expect( + wh.ProcessModuleSpec(ctx, nmc, spec, status), + ).NotTo( + HaveOccurred(), + ) + }) + It("should return an error if we could not get the node", func() { nmc := &kmmv1beta1.NodeModulesConfig{ ObjectMeta: metav1.ObjectMeta{Name: nmcName},