diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 07eefb4f8..950287a16 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -313,77 +313,60 @@ func (dn *Daemon) nodeStateSyncHandler() error { latest := dn.desiredNodeState.GetGeneration() log.Log.V(0).Info("nodeStateSyncHandler(): new generation", "generation", latest) - if dn.currentNodeState.GetGeneration() == latest && !dn.isDrainCompleted() { - if vars.UsingSystemdMode { - serviceEnabled, err := dn.HostHelpers.IsServiceEnabled(systemd.SriovServicePath) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config service exist on host") - return err - } - postNetworkServiceEnabled, err := dn.HostHelpers.IsServiceEnabled(systemd.SriovPostNetworkServicePath) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config-post-network service exist on host") - return err - } - - // if the service doesn't exist we should continue to let the k8s plugin to create the service files - // this is only for k8s base environments, for openshift the sriov-operator creates a machine config to will apply - // the system service and reboot the node the config-daemon doesn't need to do anything. - if !(serviceEnabled && postNetworkServiceEnabled) { - sriovResult = &systemd.SriovResult{SyncStatus: consts.SyncStatusFailed, - LastSyncError: fmt.Sprintf("some sriov systemd services are not available on node: "+ - "sriov-config available:%t, sriov-config-post-network available:%t", serviceEnabled, postNetworkServiceEnabled)} - } else { - sriovResult, err = systemd.ReadSriovResult() - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to load sriov result file from host") - return err - } - } - if sriovResult.LastSyncError != "" || sriovResult.SyncStatus == consts.SyncStatusFailed { - log.Log.Info("nodeStateSyncHandler(): sync failed systemd service error", "last-sync-error", sriovResult.LastSyncError) - - // add the error but don't requeue - dn.refreshCh <- Message{ - syncStatus: consts.SyncStatusFailed, - lastSyncError: sriovResult.LastSyncError, - } - <-dn.syncCh - return nil - } - } - log.Log.V(0).Info("nodeStateSyncHandler(): Interface not changed") - if dn.desiredNodeState.Status.LastSyncError != "" || - dn.desiredNodeState.Status.SyncStatus != consts.SyncStatusSucceeded { - dn.refreshCh <- Message{ - syncStatus: consts.SyncStatusSucceeded, - lastSyncError: "", - } - // wait for writer to refresh the status - <-dn.syncCh + // load plugins if it has not loaded + if len(dn.loadedPlugins) == 0 { + dn.loadedPlugins, err = loadPlugins(dn.desiredNodeState, dn.HostHelpers, dn.disabledPlugins) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins") + return err } - - return nil } - if dn.desiredNodeState.GetGeneration() == 1 && len(dn.desiredNodeState.Spec.Interfaces) == 0 { - err = dn.HostHelpers.ClearPCIAddressFolder() + if vars.UsingSystemdMode && dn.currentNodeState.GetGeneration() == latest && !dn.isDrainCompleted() { + serviceEnabled, err := dn.HostHelpers.IsServiceEnabled(systemd.SriovServicePath) if err != nil { - log.Log.Error(err, "failed to clear the PCI address configuration") + log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config service exist on host") + return err + } + postNetworkServiceEnabled, err := dn.HostHelpers.IsServiceEnabled(systemd.SriovPostNetworkServicePath) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config-post-network service exist on host") return err } - log.Log.V(0).Info( - "nodeStateSyncHandler(): interface policy spec not yet set by controller for sriovNetworkNodeState", - "name", dn.desiredNodeState.Name) - if dn.desiredNodeState.Status.SyncStatus != "Succeeded" { + // if the service doesn't exist we should continue to let the k8s plugin to create the service files + // this is only for k8s base environments, for openshift the sriov-operator creates a machine config to will apply + // the system service and reboot the node the config-daemon doesn't need to do anything. + if !(serviceEnabled && postNetworkServiceEnabled) { + sriovResult = &systemd.SriovResult{SyncStatus: consts.SyncStatusFailed, + LastSyncError: fmt.Sprintf("some sriov systemd services are not available on node: "+ + "sriov-config available:%t, sriov-config-post-network available:%t", serviceEnabled, postNetworkServiceEnabled)} + } else { + sriovResult, err = systemd.ReadSriovResult() + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to load sriov result file from host") + return err + } + } + if sriovResult.LastSyncError != "" || sriovResult.SyncStatus == consts.SyncStatusFailed { + log.Log.Info("nodeStateSyncHandler(): sync failed systemd service error", "last-sync-error", sriovResult.LastSyncError) + + // add the error but don't requeue dn.refreshCh <- Message{ - syncStatus: "Succeeded", - lastSyncError: "", + syncStatus: consts.SyncStatusFailed, + lastSyncError: sriovResult.LastSyncError, } - // wait for writer to refresh status <-dn.syncCh + return nil } + } + + skip, err := dn.shouldSkipReconciliation(dn.desiredNodeState) + if err != nil { + return err + } + // Reconcile only when there are changes in the spec or status of managed VFs. + if skip { return nil } @@ -404,15 +387,6 @@ func (dn *Daemon) nodeStateSyncHandler() error { } dn.desiredNodeState.Status = updatedState.Status - // load plugins if it has not loaded - if len(dn.loadedPlugins) == 0 { - dn.loadedPlugins, err = loadPlugins(dn.desiredNodeState, dn.HostHelpers, dn.disabledPlugins) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins") - return err - } - } - reqReboot := false reqDrain := false @@ -564,6 +538,61 @@ func (dn *Daemon) nodeStateSyncHandler() error { return nil } +func (dn *Daemon) shouldSkipReconciliation(latestState *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + var err error + + // Skip when SriovNetworkNodeState object has just been created. + if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { + err = dn.HostHelpers.ClearPCIAddressFolder() + if err != nil { + log.Log.Error(err, "failed to clear the PCI address configuration") + return false, err + } + + log.Log.V(0).Info( + "shouldSkipReconciliation(): interface policy spec not yet set by controller for sriovNetworkNodeState", + "name", latestState.Name) + if latestState.Status.SyncStatus != consts.SyncStatusSucceeded { + dn.refreshCh <- Message{ + syncStatus: consts.SyncStatusSucceeded, + lastSyncError: "", + } + // wait for writer to refresh status + <-dn.syncCh + } + return true, nil + } + + // Verify changes in the spec of the SriovNetworkNodeState CR. + if dn.currentNodeState.GetGeneration() == latestState.GetGeneration() && !dn.isDrainCompleted() { + for _, p := range dn.loadedPlugins { + // Verify changes in the status of the SriovNetworkNodeState CR. + changed, err := p.CheckStatusChanges(latestState) + if err != nil { + return false, err + } + if changed { + return false, nil + } + } + + log.Log.V(0).Info("shouldSkipReconciliation(): Interface not changed") + if latestState.Status.LastSyncError != "" || + latestState.Status.SyncStatus != consts.SyncStatusSucceeded { + dn.refreshCh <- Message{ + syncStatus: consts.SyncStatusSucceeded, + lastSyncError: "", + } + // wait for writer to refresh the status + <-dn.syncCh + } + + return true, nil + } + + return false, nil +} + // handleDrain: adds the right annotation to the node and nodeState object // returns true if we need to finish the reconcile loop and wait for a new object func (dn *Daemon) handleDrain(reqReboot bool) (bool, error) { diff --git a/pkg/plugins/fake/fake_plugin.go b/pkg/plugins/fake/fake_plugin.go index f7fe6e56f..0aa218f28 100644 --- a/pkg/plugins/fake/fake_plugin.go +++ b/pkg/plugins/fake/fake_plugin.go @@ -21,6 +21,10 @@ func (f *FakePlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState return false, false, nil } +func (f *FakePlugin) CheckStatusChanges(new *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil +} + func (f *FakePlugin) Apply() error { return nil } diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index a7a71ca15..5e0827c69 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "os/exec" - "reflect" "strconv" "strings" "syscall" @@ -52,7 +51,6 @@ type GenericPlugin struct { PluginName string SpecVersion string DesireState *sriovnetworkv1.SriovNetworkNodeState - LastState *sriovnetworkv1.SriovNetworkNodeState DriverStateMap DriverStateMapType DesiredKernelArgs map[string]bool helpers helper.HostHelpersInterface @@ -141,6 +139,37 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt return } +// CheckStatusChanges verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + log.Log.Info("generic-plugin CheckStatusChanges()") + + for _, iface := range current.Spec.Interfaces { + found := false + for _, ifaceStatus := range current.Status.Interfaces { + // TODO: remove the check for ExternallyManaged - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/632 + if iface.PciAddress == ifaceStatus.PciAddress && !iface.ExternallyManaged { + found = true + if sriovnetworkv1.NeedToUpdateSriov(&iface, &ifaceStatus) { + log.Log.Info("CheckStatusChanges(): status changed for interface", "address", iface.PciAddress) + return true, nil + } + break + } + } + if !found { + log.Log.Info("CheckStatusChanges(): no status found for interface", "address", iface.PciAddress) + } + } + + missingKernelArgs, err := p.getMissingKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") + return false, err + } + + return len(missingKernelArgs) != 0, nil +} + func (p *GenericPlugin) syncDriverState() error { for _, driverState := range p.DriverStateMap { if !driverState.DriverLoaded && driverState.NeedDriverFunc(p.DesireState, driverState) { @@ -159,14 +188,6 @@ func (p *GenericPlugin) syncDriverState() error { func (p *GenericPlugin) Apply() error { log.Log.Info("generic plugin Apply()", "desiredState", p.DesireState.Spec) - if p.LastState != nil { - log.Log.Info("generic plugin Apply()", "lastState", p.LastState.Spec) - if reflect.DeepEqual(p.LastState.Spec.Interfaces, p.DesireState.Spec.Interfaces) { - log.Log.Info("generic plugin Apply(): desired and latest state are the same, nothing to apply") - return nil - } - } - if err := p.syncDriverState(); err != nil { return err } @@ -188,8 +209,7 @@ func (p *GenericPlugin) Apply() error { } return err } - p.LastState = &sriovnetworkv1.SriovNetworkNodeState{} - *p.LastState = *p.DesireState + return nil } @@ -252,38 +272,49 @@ func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { } } -// syncDesiredKernelArgs Should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { - needReboot := false +// getMissingKernelArgs gets Kernel arguments that have not been set. +func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { + missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) if len(p.DesiredKernelArgs) == 0 { - return false, nil + return nil, nil } + kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return false, err + return nil, err } - for desiredKarg, attempted := range p.DesiredKernelArgs { - set := p.helpers.IsKernelArgsSet(kargs, desiredKarg) - if !set { - if attempted { - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", desiredKarg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(desiredKarg) - if err != nil { - log.Log.Error(err, "generic plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", desiredKarg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", desiredKarg) - } - p.DesiredKernelArgs[desiredKarg] = true + + for desiredKarg := range p.DesiredKernelArgs { + if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { + missingArgs = append(missingArgs, desiredKarg) } } + return missingArgs, nil +} + +// syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. +func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { + needReboot := false + + for _, karg := range kargs { + if p.DesiredKernelArgs[karg] { + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", + "karg", karg) + } + // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because + // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel + // argument is set once the daemon goes through node state sync again. + update, err := setKernelArg(karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } + if update { + needReboot = true + log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) + } + p.DesiredKernelArgs[karg] = true + } return needReboot, nil } @@ -351,19 +382,28 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } -func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool, err error) { - needReboot = false +func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + needReboot := false + p.addVfioDesiredKernelArg(state) - updateNode, err := p.syncDesiredKernelArgs() + missingKernelArgs, err := p.getMissingKernelArgs() if err != nil { - log.Log.Error(err, "generic plugin needRebootNode(): failed to set the desired kernel arguments") + log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if updateNode { - log.Log.V(2).Info("generic plugin needRebootNode(): need reboot for updating kernel arguments") - needReboot = true + + if len(missingKernelArgs) != 0 { + needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") + } } + return needReboot, nil } diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 9f9c1be83..d2e9c1bbe 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" ) @@ -137,6 +138,164 @@ var _ = Describe("Generic plugin", func() { Expect(needDrain).To(BeTrue()) }) + It("should not detect changes on status", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 1, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "netdevice", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-0", + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 1, + TotalVfs: 1, + DeviceID: "1015", + Vendor: "15b3", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "mlx5_core", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Name: "sriovif1v0", + Mtu: 1500, + Mac: "8e:d6:2c:62:87:1b", + Driver: "mlx5_core", + }}, + }}, + }, + } + + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeFalse()) + }) + + It("should detect changes on status due to spec mismatch", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "netdevice", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "1015", + Vendor: "15b3", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "mlx5_core", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + Name: "sriovif1v0", + Mtu: 999, // Bad MTU value, changed by the user application + Mac: "8e:d6:2c:62:87:1b", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1016", + Vendor: "15b3", + VfID: 0, + Driver: "mlx5_core", + }}, + }}, + }, + } + + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + + It("should detect changes on status due to missing kernel args", func() { + networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: sriovnetworkv1.Interfaces{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + Mtu: 1500, + VfGroups: []sriovnetworkv1.VfGroup{{ + DeviceType: "vfio-pci", + PolicyName: "policy-1", + ResourceName: "resource-1", + VfRange: "0-1", + Mtu: 1500, + }}}}, + }, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{ + Interfaces: sriovnetworkv1.InterfaceExts{{ + PciAddress: "0000:00:00.0", + NumVfs: 2, + TotalVfs: 2, + DeviceID: "159b", + Vendor: "8086", + Name: "sriovif1", + Mtu: 1500, + Mac: "0c:42:a1:55:ee:46", + Driver: "ice", + EswitchMode: "legacy", + LinkSpeed: "25000 Mb/s", + LinkType: "ETH", + VFs: []sriovnetworkv1.VirtualFunction{{ + PciAddress: "0000:00:00.1", + DeviceID: "1889", + Vendor: "8086", + VfID: 0, + Driver: "vfio-pci", + }, { + PciAddress: "0000:00:00.2", + DeviceID: "1889", + Vendor: "8086", + VfID: 1, + Driver: "vfio-pci", + }}, + }}, + }, + } + + // Load required kernel args. + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(networkNodeState) + + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + + changed, err := genericPlugin.CheckStatusChanges(networkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should load vfio_pci driver", func() { networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ diff --git a/pkg/plugins/intel/intel_plugin.go b/pkg/plugins/intel/intel_plugin.go index 467116ddb..e88a186c9 100644 --- a/pkg/plugins/intel/intel_plugin.go +++ b/pkg/plugins/intel/intel_plugin.go @@ -35,11 +35,16 @@ func (p *IntelPlugin) Spec() string { } // OnNodeStateChange Invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node -func (p *IntelPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { - log.Log.Info("intel plugin OnNodeStateChange()") +func (p *IntelPlugin) OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) (bool, bool, error) { + log.Log.Info("intel-plugin OnNodeStateChange()") return false, false, nil } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *IntelPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil +} + // Apply config change func (p *IntelPlugin) Apply() error { log.Log.Info("intel plugin Apply()") diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index 118073ee5..1a0336049 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -154,6 +154,12 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) return } +// TODO: implement - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/630 +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *K8sPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil +} + // Apply config change func (p *K8sPlugin) Apply() error { log.Log.Info("k8s plugin Apply()") diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 6b1b943de..87363464b 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -171,6 +171,12 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS return } +// TODO: implement - https://github.com/k8snetworkplumbingwg/sriov-network-operator/issues/631 +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *MellanoxPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil +} + // Apply config change func (p *MellanoxPlugin) Apply() error { if p.helpers.IsKernelLockdownMode() { diff --git a/pkg/plugins/mock/mock_plugin.go b/pkg/plugins/mock/mock_plugin.go index b821139c5..a6ae38202 100644 --- a/pkg/plugins/mock/mock_plugin.go +++ b/pkg/plugins/mock/mock_plugin.go @@ -48,6 +48,21 @@ func (mr *MockVendorPluginMockRecorder) Apply() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Apply", reflect.TypeOf((*MockVendorPlugin)(nil).Apply)) } +// CheckStatusChanges mocks base method. +func (m *MockVendorPlugin) CheckStatusChanges(arg0 *v1.SriovNetworkNodeState) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CheckStatusChanges", arg0) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CheckStatusChanges indicates an expected call of CheckStatusChanges. +func (mr *MockVendorPluginMockRecorder) CheckStatusChanges(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CheckStatusChanges", reflect.TypeOf((*MockVendorPlugin)(nil).CheckStatusChanges), arg0) +} + // Name mocks base method. func (m *MockVendorPlugin) Name() string { m.ctrl.T.Helper() @@ -63,9 +78,9 @@ func (mr *MockVendorPluginMockRecorder) Name() *gomock.Call { } // OnNodeStateChange mocks base method. -func (m *MockVendorPlugin) OnNodeStateChange(new *v1.SriovNetworkNodeState) (bool, bool, error) { +func (m *MockVendorPlugin) OnNodeStateChange(arg0 *v1.SriovNetworkNodeState) (bool, bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "OnNodeStateChange", new) + ret := m.ctrl.Call(m, "OnNodeStateChange", arg0) ret0, _ := ret[0].(bool) ret1, _ := ret[1].(bool) ret2, _ := ret[2].(error) @@ -73,9 +88,9 @@ func (m *MockVendorPlugin) OnNodeStateChange(new *v1.SriovNetworkNodeState) (boo } // OnNodeStateChange indicates an expected call of OnNodeStateChange. -func (mr *MockVendorPluginMockRecorder) OnNodeStateChange(new interface{}) *gomock.Call { +func (mr *MockVendorPluginMockRecorder) OnNodeStateChange(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OnNodeStateChange", reflect.TypeOf((*MockVendorPlugin)(nil).OnNodeStateChange), new) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OnNodeStateChange", reflect.TypeOf((*MockVendorPlugin)(nil).OnNodeStateChange), arg0) } // Spec mocks base method. diff --git a/pkg/plugins/plugin.go b/pkg/plugins/plugin.go index 276e37cf2..830f7dd68 100644 --- a/pkg/plugins/plugin.go +++ b/pkg/plugins/plugin.go @@ -6,12 +6,14 @@ import ( //go:generate ../../bin/mockgen -destination mock/mock_plugin.go -source plugin.go type VendorPlugin interface { - // Return the name of plugin + // Name returns the name of plugin Name() string - // Return the SpecVersion followed by plugin + // Spec returns the SpecVersion followed by plugin Spec() string - // Invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node - OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (bool, bool, error) + // OnNodeStateChange is invoked when SriovNetworkNodeState CR is created or updated, return if need dain and/or reboot node + OnNodeStateChange(*sriovnetworkv1.SriovNetworkNodeState) (bool, bool, error) // Apply config change Apply() error + // CheckStatusChanges checks status changes on the SriovNetworkNodeState CR for configured VFs. + CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) } diff --git a/pkg/plugins/virtual/virtual_plugin.go b/pkg/plugins/virtual/virtual_plugin.go index a942aaa86..211cbee2c 100644 --- a/pkg/plugins/virtual/virtual_plugin.go +++ b/pkg/plugins/virtual/virtual_plugin.go @@ -67,6 +67,11 @@ func (p *VirtualPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt return } +// OnNodeStatusChange verify whether SriovNetworkNodeState CR status present changes on configured VFs. +func (p *VirtualPlugin) CheckStatusChanges(*sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + return false, nil +} + // Apply config change func (p *VirtualPlugin) Apply() error { log.Log.Info("virtual plugin Apply()", "desired-state", p.DesireState.Spec) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index 55711b78f..094a95cf0 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -307,8 +307,12 @@ var _ = Describe("[sriov] operator", func() { node = sriovInfos.Nodes[0] createVanillaNetworkPolicy(node, sriovInfos, numVfs, resourceName) WaitForSRIOVStable() - sriovDevice, err = sriovInfos.FindOneSriovDevice(node) + + // Update info + sriovInfos, err = cluster.DiscoverSriov(clients, operatorNamespace) Expect(err).ToNot(HaveOccurred()) + sriovDevice = findInterface(sriovInfos, node) + By("Using device " + sriovDevice.Name + " on node " + node) Eventually(func() int64 { @@ -1000,6 +1004,44 @@ var _ = Describe("[sriov] operator", func() { return runningPodB.Status.Phase }, 3*time.Minute, time.Second).Should(Equal(corev1.PodRunning)) }) + + It("should reconcile managed VF if status changes", func() { + originalMtu := sriovDevice.Mtu + newMtu := 1000 + + By("manually changing the MTU") + _, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo %d > /sys/bus/pci/devices/%s/net/%s/mtu", newMtu, sriovDevice.PciAddress, sriovDevice.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + + By("waiting for the mtu to be updated in the status") + Eventually(func() sriovv1.InterfaceExts { + nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + return nodeState.Status.Interfaces + }, 3*time.Minute, 1*time.Second).Should(ContainElement(MatchFields( + IgnoreExtras, + Fields{ + "Name": Equal(sriovDevice.Name), + "Mtu": Equal(newMtu), + "PciAddress": Equal(sriovDevice.PciAddress), + "NumVfs": Equal(sriovDevice.NumVfs), + }))) + + By("waiting for the mtu to be restored") + Eventually(func() sriovv1.InterfaceExts { + nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + return nodeState.Status.Interfaces + }, 3*time.Minute, 1*time.Second).Should(ContainElement(MatchFields( + IgnoreExtras, + Fields{ + "Name": Equal(sriovDevice.Name), + "Mtu": Equal(originalMtu), + "PciAddress": Equal(sriovDevice.PciAddress), + "NumVfs": Equal(sriovDevice.NumVfs), + }))) + }) }) Context("CNI Logging level", func() { @@ -2486,8 +2528,7 @@ func WaitForSRIOVStable() { }, waitingTime, 1*time.Second).Should(BeTrue()) } -func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, numVfs int, resourceName string) { - // For the context of tests is better to use a Mellanox card +func findInterface(sriovInfos *cluster.EnabledNodes, node string) *sriovv1.InterfaceExt { // For the context of tests is better to use a Mellanox card // as they support all the virtual function flags // if we don't find a Mellanox card we fall back to any sriov // capability interface and skip the rate limit test. @@ -2497,6 +2538,12 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n Expect(err).ToNot(HaveOccurred()) } + return intf +} + +func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, numVfs int, resourceName string) { + intf := findInterface(sriovInfos, node) + config := &sriovv1.SriovNetworkNodePolicy{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "test-policy", @@ -2509,6 +2556,7 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n }, NumVfs: numVfs, ResourceName: resourceName, + Mtu: 1500, Priority: 99, NicSelector: sriovv1.SriovNetworkNicSelector{ PfNames: []string{intf.Name},