diff --git a/bindata/scripts/udev-find-sriov-pf.sh b/bindata/scripts/udev-find-sriov-pf.sh new file mode 100755 index 0000000000..ee708f903b --- /dev/null +++ b/bindata/scripts/udev-find-sriov-pf.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +cat <<'EOF' > /host/etc/udev/disable-nm-sriov.sh +#!/bin/bash +if [ ! -d "/sys/class/net/$1/device/physfn" ]; then + exit 0 +fi + +pf_path=$(readlink /sys/class/net/$1/device/physfn -n) +pf_pci_address=${pf_path##*../} + +if [ "$2" == "$pf_pci_address" ]; then + echo "NM_UNMANAGED=1" +fi +EOF + +chmod +x /host/etc/udev/disable-nm-sriov.sh diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ff0d8547ca..23308d3ee8 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -189,33 +189,6 @@ func New( } } -func (dn *Daemon) tryCreateUdevRuleWrapper() error { - ns, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get( - context.Background(), - dn.name, - metav1.GetOptions{}, - ) - if nodeStateErr != nil { - glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr) - } else { - err := tryCreateSwitchdevUdevRule(ns) - if err != nil { - glog.Warningf("Failed to create switchdev udev rules: %v", err) - } - } - - // update udev rule only if we are on a BM environment - // for virtual environments we don't disable the vfs as they may be used by the platform/host - if dn.platform != utils.VirtualOpenStack { - err := tryCreateNMUdevRule() - if err != nil { - return err - } - } - - return nil -} - // Run the config daemon func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { if utils.ClusterType == utils.ClusterTypeOpenshift { @@ -249,8 +222,11 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { } dn.storeManager = storeManager - if err := dn.tryCreateUdevRuleWrapper(); err != nil { - return err + if err := dn.prepareNMUdevRule(); err != nil { + glog.Warningf("failed to prepare udev files to disable network manager on requested VFs: %v", err) + } + if err := dn.tryCreateSwitchdevUdevRule(); err != nil { + glog.Warningf("failed to create udev files for switchdev") } var timeout int64 = 5 @@ -325,7 +301,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { return err case <-time.After(30 * time.Second): glog.V(2).Info("Run(): period refresh") - if err := dn.tryCreateUdevRuleWrapper(); err != nil { + if err := dn.tryCreateSwitchdevUdevRule(); err != nil { glog.V(2).Info("Could not create udev rule: ", err) } } @@ -1062,8 +1038,18 @@ func (dn *Daemon) drainNode() error { return nil } -func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error { +func (dn *Daemon) tryCreateSwitchdevUdevRule() error { glog.V(2).Infof("tryCreateSwitchdevUdevRule()") + nodeState, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get( + context.Background(), + dn.name, + metav1.GetOptions{}, + ) + if nodeStateErr != nil { + glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr) + return nil + } + var newContent string filePath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") @@ -1120,11 +1106,7 @@ func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) return nil } -func tryCreateNMUdevRule() error { - glog.V(2).Infof("tryCreateNMUdevRule()") - dirPath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d") - filePath := path.Join(dirPath, "10-nm-unmanaged.rules") - +func (dn *Daemon) prepareNMUdevRule() error { // we need to remove the Red Hat Virtio network device from the udev rule configuration // if we don't remove it when running the config-daemon on a virtual node it will disconnect the node after a reboot // even that the operator should not be installed on virtual environments that are not openstack @@ -1136,34 +1118,6 @@ func tryCreateNMUdevRule() error { } supportedVfIds = append(supportedVfIds, vfID) } - newContent := fmt.Sprintf("ACTION==\"add|change|move\", ATTRS{device}==\"%s\", ENV{NM_UNMANAGED}=\"1\"\n", strings.Join(supportedVfIds, "|")) - - // add NM udev rules for renaming VF rep - newContent = newContent + "SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}!=\"\", ATTR{phys_port_name}==\"pf*vf*\", ENV{NM_UNMANAGED}=\"1\"\n" - - oldContent, err := os.ReadFile(filePath) - // if oldContent = newContent, don't do anything - if err == nil && newContent == string(oldContent) { - return nil - } - - glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.", - strings.TrimSuffix(string(oldContent), "\n"), - strings.TrimSuffix(newContent, "\n"), - filePath) - - err = os.MkdirAll(dirPath, os.ModePerm) - if err != nil && !os.IsExist(err) { - glog.Errorf("tryCreateNMUdevRule(): failed to create dir %s: %v", dirPath, err) - return err - } - // if the file does not exist or if oldContent != newContent - // write to file and create it if it doesn't exist - err = os.WriteFile(filePath, []byte(newContent), 0666) - if err != nil { - glog.Errorf("tryCreateNMUdevRule(): fail to write file: %v", err) - return err - } - return nil + return utils.PrepareNMUdevRule(supportedVfIds) } diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index cdd1fe11bb..973882c4ab 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -3,8 +3,6 @@ package daemon import ( "context" "flag" - "os" - "path" "testing" . "github.com/onsi/ginkgo/v2" @@ -234,17 +232,6 @@ var _ = Describe("Config Daemon", func() { Expect(sut.nodeState.GetGeneration()).To(BeNumerically("==", 777)) }) - - It("configure udev rules on host", func() { - - networkManagerUdevRulePath := path.Join(utils.FilesystemRoot, "host/etc/udev/rules.d/10-nm-unmanaged.rules") - - expectedContents := `ACTION=="add|change|move", ATTRS{device}=="0x1014|0x154c", ENV{NM_UNMANAGED}="1" -SUBSYSTEM=="net", ACTION=="add|move", ATTRS{phys_switch_id}!="", ATTR{phys_port_name}=="pf*vf*", ENV{NM_UNMANAGED}="1" -` - // No need to trigger any action on config-daemon, as it checks the file in the main loop - assertFileContents(networkManagerUdevRulePath, expectedContents) - }) }) Context("isNodeDraining", func() { @@ -321,10 +308,3 @@ func updateSriovNetworkNodeState(c snclientset.Interface, nodeState *sriovnetwor Update(context.Background(), nodeState, metav1.UpdateOptions{}) return err } - -func assertFileContents(path, contents string) { - Eventually(func() (string, error) { - ret, err := os.ReadFile(path) - return string(ret), err - }, "10s").WithOffset(1).Should(Equal(contents)) -} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index ab1a0e847e..0050725d91 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -8,6 +8,7 @@ import ( "net" "os" "os/exec" + "path" "path/filepath" "regexp" "strconv" @@ -40,6 +41,10 @@ const ( VendorMellanox = "15b3" DeviceBF2 = "a2d6" DeviceBF3 = "a2dc" + + udevFolder = "/etc/udev" + udevRulesFolder = udevFolder + "/rules.d" + udevDisableNM = "/bindata/scripts/udev-find-sriov-pf.sh" ) var InitialState sriovnetworkv1.SriovNetworkNodeState @@ -50,6 +55,8 @@ var pfPhysPortNameRe = regexp.MustCompile(`p\d+`) // FilesystemRoot used by test to mock interactions with filesystem var FilesystemRoot = "" +var SupportedVfIds []string + func init() { ClusterType = os.Getenv("CLUSTER_TYPE") } @@ -242,6 +249,11 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses ifaceStatus.Name, ifaceStatus.PciAddress) continue + } else { + err = RemoveUdevRule(ifaceStatus.PciAddress) + if err != nil { + return err + } } if err = resetSriovDevice(ifaceStatus); err != nil { @@ -362,15 +374,27 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor // set numVFs if iface.NumVfs != ifaceStatus.NumVfs { if iface.ExternallyManaged { - errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) - glog.Error(errMsg) - return fmt.Errorf(errMsg) - } + if iface.NumVfs > ifaceStatus.NumVfs { + errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) + glog.Error(errMsg) + return fmt.Errorf(errMsg) + } + } else { + // create the udev rule to disable all the vfs from network manager as this vfs are managed by the operator + err = AddUdevRule(iface.PciAddress) + if err != nil { + return err + } - err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) - if err != nil { - glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress) - return err + err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) + if err != nil { + err = RemoveUdevRule(iface.PciAddress) + if err != nil { + return err + } + glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress) + return err + } } } // set PF mtu @@ -886,3 +910,63 @@ func RebindVfToDefaultDriver(vfAddr string) error { glog.Warningf("RebindVfToDefaultDriver(): workaround implemented for VF %s", vfAddr) return nil } + +func PrepareNMUdevRule(supportedVfIds []string) error { + glog.V(2).Infof("PrepareNMUdevRule()") + dirPath := path.Join(FilesystemRoot, "/host/etc/udev/rules.d") + filePath := path.Join(dirPath, "10-nm-unmanaged.rules") + + // remove the old unmanaged rules file + if _, err := os.Stat(filePath); err == nil { + err = os.Remove(filePath) + if err != nil { + glog.Warningf("failed to remove the network manager global unmanaged rule on path %s: %v", filePath, err) + } + } + + // create the pf finder script for udev rules + var stdout, stderr bytes.Buffer + cmd := exec.Command("/bin/bash", path.Join(FilesystemRoot, udevDisableNM)) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return err + } + glog.V(2).Infof("PrepareNMUdevRule(): %v", cmd.Stdout) + + //save the device list to use for udev rules + SupportedVfIds = supportedVfIds + return nil +} + +func AddUdevRule(pfPciAddress string) error { + glog.V(2).Infof("AddUdevRule(): %s", pfPciAddress) + pathFile := udevRulesFolder + udevRuleContent := fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|change|move\", ATTRS{device}==\"%s\", IMPORT{program}=\"/etc/udev/disable-nm-sriov.sh $env{INTERFACE} %s\"", strings.Join(SupportedVfIds, "|"), pfPciAddress) + + err := os.MkdirAll(pathFile, os.ModePerm) + if err != nil && !os.IsExist(err) { + glog.Errorf("AddUdevRule(): failed to create dir %s: %v", pathFile, err) + return err + } + + filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress)) + // if the file does not exist or if oldContent != newContent + // write to file and create it if it doesn't exist + err = os.WriteFile(filePath, []byte(udevRuleContent), 0666) + if err != nil { + glog.Errorf("AddUdevRule(): fail to write file: %v", err) + return err + } + return nil +} + +func RemoveUdevRule(pfPciAddress string) error { + pathFile := udevRulesFolder + filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress)) + err := os.Remove(filePath) + if err != nil && !os.IsNotExist(err) { + return err + } + return nil +}