Skip to content

Commit

Permalink
Fixing module loading during kernel upgrade (#1218)
Browse files Browse the repository at this point in the history
When the node upgrade includes kernel upgrade, NMC Spec will change to
include new kernel version. In that case NMC controller will try first
to create unloader worker pod. Since the unloader worker pod uses the
old configuration (from status), which uses the old image, the modprobe
in the worker pod will fail, since it won't find kernel module under the
/opt/lib/modules/<new kernel> path
This PR fixes the issue by creating unloader pod in case of difference
in spec and status of NMC only in case kernels are equal. Otherwise, it
creates loader pod, since it means that node was rebooted, and the
kernel module is not loaded yet, since the status contains old kernel
  • Loading branch information
yevgeny-shnaidman authored Sep 22, 2024
1 parent 402826e commit c076bba
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 5 deletions.
14 changes: 12 additions & 2 deletions internal/controllers/nmc_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,23 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec(
}

if pod == nil {
// new module is introduced, need to load it
if status == nil {
logger.Info("Missing status; creating loader Pod")
return h.pm.CreateLoaderPod(ctx, nmcObj, spec)
}

/* configuration changed for module: if spec status contain the same kernel,
unload the kernel module, otherwise - load kernel modules, since the pod
is not running, the module cannot be loaded using the old kernel configuration
*/
if !reflect.DeepEqual(spec.Config, status.Config) {
logger.Info("Outdated config in status; creating unloader Pod")
return h.pm.CreateUnloaderPod(ctx, nmcObj, status)
if spec.Config.KernelVersion == status.Config.KernelVersion {
logger.Info("Outdated config in status; creating unloader Pod")
return h.pm.CreateUnloaderPod(ctx, nmcObj, status)
}
logger.Info("Outdated config in status and kernels differ, probably due to upgrade; creating loader Pod")
return h.pm.CreateLoaderPod(ctx, nmcObj, spec)
}

node := v1.Node{}
Expand All @@ -346,6 +355,7 @@ func (h *nmcReconcilerHelperImpl) ProcessModuleSpec(
return fmt.Errorf("node %s has no Ready condition", nmcObj.Name)
}

// node has been rebooted, load the module using the spec
if readyCondition.Status == v1.ConditionTrue && status.LastTransitionTime.Before(&readyCondition.LastTransitionTime) {
logger.Info("Outdated last transition time status; creating loader Pod")

Expand Down
39 changes: 36 additions & 3 deletions internal/controllers/nmc_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() {
)
})

It("should create an unloader Pod if the spec is different from the status", func() {
It("should create an unloader Pod if the spec is different from the status and kernels are equal", func() {
nmc := &kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
}
Expand All @@ -441,15 +441,15 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() {
Name: name,
Namespace: namespace,
},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image"},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image", KernelVersion: "same kernel"},
}

status := &kmmv1beta1.NodeModuleStatus{
ModuleItem: kmmv1beta1.ModuleItem{
Name: name,
Namespace: namespace,
},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image"},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image", KernelVersion: "same kernel"},
}

gomock.InOrder(
Expand All @@ -464,6 +464,39 @@ var _ = Describe("nmcReconcilerHelperImpl_ProcessModuleSpec", func() {
)
})

It("should create an loader Pod if the spec is different from the status and kernels different equal", func() {
nmc := &kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
}

spec := &kmmv1beta1.NodeModuleSpec{
ModuleItem: kmmv1beta1.ModuleItem{
Name: name,
Namespace: namespace,
},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "old-container-image", KernelVersion: "old kernel"},
}

status := &kmmv1beta1.NodeModuleStatus{
ModuleItem: kmmv1beta1.ModuleItem{
Name: name,
Namespace: namespace,
},
Config: kmmv1beta1.ModuleConfig{ContainerImage: "new-container-image", KernelVersion: "new kernel"},
}

gomock.InOrder(
pm.EXPECT().GetWorkerPod(ctx, podName, namespace),
pm.EXPECT().CreateLoaderPod(ctx, nmc, spec),
)

Expect(
wh.ProcessModuleSpec(ctx, nmc, spec, status),
).NotTo(
HaveOccurred(),
)
})

It("should return an error if we could not get the node", func() {
nmc := &kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
Expand Down

0 comments on commit c076bba

Please sign in to comment.