diff --git a/tests/ocp/sriov/internal/sriovenv/sriovenv.go b/tests/ocp/sriov/internal/sriovenv/sriovenv.go index e75270ac4..07f4492cd 100644 --- a/tests/ocp/sriov/internal/sriovenv/sriovenv.go +++ b/tests/ocp/sriov/internal/sriovenv/sriovenv.go @@ -248,6 +248,8 @@ func initVFWithDevType(name, deviceID, interfaceName, vendor, devType string, vf policy.Definition.Spec.NicSelector.DeviceID = deviceID } + policy.Definition.Spec.EswitchMode = "legacy" + if _, err := policy.Create(); err != nil { klog.V(90).Infof("Failed to create policy on node %q: %v", nodeName, err) @@ -632,7 +634,7 @@ func VerifyLinkStateConfiguration(networkName, namespace, description string, // ============================================================================ func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState string) error { - // Get node name and MAC + // Get node name refreshedPod, err := pod.Pull(APIClient, clientPod.Definition.Name, clientPod.Definition.Namespace) if err != nil { return fmt.Errorf("failed to refresh pod: %w", err) @@ -643,11 +645,6 @@ func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState strin return fmt.Errorf("pod node name is empty") } - mac, err := ExtractPodInterfaceMAC(clientPod, "net1") - if err != nil { - return fmt.Errorf("failed to extract MAC: %w", err) - } - // Execute on node via cluster helper outputMap, err := cluster.ExecCmdWithStdout(APIClient, fmt.Sprintf("ip link show %s", interfaceName), metav1.ListOptions{LabelSelector: fmt.Sprintf("kubernetes.io/hostname=%s", nodeName)}) @@ -671,19 +668,33 @@ func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState strin } } - // Find line with MAC and check spoof state + // Search VF lines for the expected spoof check state. + // Note: In switchdev mode (e.g., Mellanox CX6-DX), `ip link show ` reports VF MACs as + // 00:00:00:00:00:00 regardless of the MAC assigned inside the pod, so matching by MAC address + // is unreliable. Instead, we check any VF line for the expected state, since the SR-IOV policy + // applies a uniform spoof checking configuration to all VFs it creates. + vfLinesFound := 0 + for _, line := range strings.Split(output, "\n") { - if strings.Contains(line, mac) { - if strings.Contains(line, fmt.Sprintf("spoof checking %s", expectedState)) || - strings.Contains(line, fmt.Sprintf("spoofchk %s", expectedState)) { - klog.V(90).Infof("Spoof check verified: %s for MAC %s", expectedState, mac) + if !strings.Contains(line, "vf ") { + continue + } - return nil - } + vfLinesFound++ + + if strings.Contains(line, fmt.Sprintf("spoof checking %s", expectedState)) || + strings.Contains(line, fmt.Sprintf("spoofchk %s", expectedState)) { + klog.V(90).Infof("Spoof check verified: %s on interface %s", expectedState, interfaceName) + + return nil } } - return fmt.Errorf("spoof check %s not found for MAC %s", expectedState, mac) + if vfLinesFound == 0 { + return fmt.Errorf("no VF information found in ip link show output for interface %s", interfaceName) + } + + return fmt.Errorf("spoof check %s not found for interface %s", expectedState, interfaceName) } // ============================================================================ diff --git a/tests/ocp/sriov/tests/exposemtu.go b/tests/ocp/sriov/tests/exposemtu.go index 935adcdbb..fb1301b02 100644 --- a/tests/ocp/sriov/tests/exposemtu.go +++ b/tests/ocp/sriov/tests/exposemtu.go @@ -24,6 +24,7 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes workerNodeList []*nodes.Builder err error sriovInterfacesUnderTest []string + vfNum int ) BeforeAll(func() { @@ -38,6 +39,11 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes sriovInterfacesUnderTest, err = SriovOcpConfig.GetSriovInterfaces(1) Expect(err).ToNot(HaveOccurred(), "Failed to retrieve SR-IOV interfaces for testing") + By("Loading VF configuration") + + vfNum, err = SriovOcpConfig.GetVFNum() + Expect(err).ToNot(HaveOccurred(), "Failed to get VF number") + By("Verifying if expose MTU tests can be executed on given cluster") err = sriovocpenv.DoesClusterHaveEnoughNodes(1, 1) @@ -65,19 +71,19 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes }) It("netdev 1500", reportxml.ID("73786"), func() { - testExposeMTU(1500, sriovInterfacesUnderTest, "netdevice") + testExposeMTU(1500, sriovInterfacesUnderTest, "netdevice", vfNum) }) It("netdev 9000", reportxml.ID("73787"), func() { - testExposeMTU(9000, sriovInterfacesUnderTest, "netdevice") + testExposeMTU(9000, sriovInterfacesUnderTest, "netdevice", vfNum) }) It("vfio 1500", reportxml.ID("73789"), func() { - testExposeMTU(1500, sriovInterfacesUnderTest, "vfio-pci") + testExposeMTU(1500, sriovInterfacesUnderTest, "vfio-pci", vfNum) }) It("vfio 9000", reportxml.ID("73790"), func() { - testExposeMTU(9000, sriovInterfacesUnderTest, "vfio-pci") + testExposeMTU(9000, sriovInterfacesUnderTest, "vfio-pci", vfNum) }) It("netdev 2 Policies with different MTU", reportxml.ID("73788"), func() { @@ -93,7 +99,7 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes sriovAndResourceName5000, SriovOcpConfig.SriovOperatorNamespace, sriovAndResourceName5000, - 5, + vfNum, []string{fmt.Sprintf("%s#0-1", sriovInterfacesUnderTest[0])}, SriovOcpConfig.WorkerLabelMap). WithDevType("netdevice").WithMTU(5000).Create() Expect(err).ToNot(HaveOccurred(), "Failed to configure SR-IOV policy with mtu 5000") @@ -103,7 +109,7 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes sriovAndResourceName9000, SriovOcpConfig.SriovOperatorNamespace, sriovAndResourceName9000, - 5, + vfNum, []string{fmt.Sprintf("%s#2-3", sriovInterfacesUnderTest[0])}, SriovOcpConfig.WorkerLabelMap). WithDevType("netdevice").WithMTU(9000).Create() Expect(err).ToNot(HaveOccurred(), "Failed to configure SR-IOV policy with mtu 9000") @@ -166,7 +172,7 @@ var _ = Describe("SRIOV: Expose MTU:", Ordered, Label(tsparams.LabelExposeMTUTes }) }) -func testExposeMTU(mtu int, interfacesUnderTest []string, devType string) { +func testExposeMTU(mtu int, interfacesUnderTest []string, devType string, vfsNumber int) { By("Creating SR-IOV policy") const sriovAndResourceNameExposeMTU = "exposemtu" @@ -176,7 +182,7 @@ func testExposeMTU(mtu int, interfacesUnderTest []string, devType string) { sriovAndResourceNameExposeMTU, SriovOcpConfig.SriovOperatorNamespace, sriovAndResourceNameExposeMTU, - 5, + vfsNumber, interfacesUnderTest, SriovOcpConfig.WorkerLabelMap).WithDevType(devType).WithMTU(mtu) err := sriovoperator.CreateSriovPolicyAndWaitUntilItsApplied( diff --git a/tests/ocp/sriov/tests/metricsExporter.go b/tests/ocp/sriov/tests/metricsExporter.go index 4bcfbf241..f0d0ec681 100644 --- a/tests/ocp/sriov/tests/metricsExporter.go +++ b/tests/ocp/sriov/tests/metricsExporter.go @@ -304,9 +304,22 @@ func runMetricsNettoVfioTests(clientPf, serverPf, clientWorker, serverWorker, de "Failed to add server mac address in client pod mac table. Output: %s", outputbuf.String())) By("ICMP check between client and server pods") - Eventually(func() error { - return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1") - }, 1*time.Minute, 2*time.Second).Should(HaveOccurred(), "ICMP fail scenario could not be executed") + + // Derive the expected ICMP outcome from the device type that defineMetricsPolicy() actually + // configured on the server policy, rather than re-checking the vendor ID here. + // With true vfio-pci (Intel), the VF is exclusively owned by DPDK and ICMP fails. + // With netdevice+RDMA (Mellanox "vfiopci" mode), the kernel network stack remains active + // on the VF and ICMP succeeds. + if serverResources.policy.Definition.Spec.DeviceType == "vfio-pci" { + Eventually(func() error { + return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1") + }, 1*time.Minute, 2*time.Second).Should(HaveOccurred(), "ICMP fail scenario could not be executed") + } else { + Eventually(func() error { + return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1") + }, 1*time.Minute, 2*time.Second).ShouldNot(HaveOccurred(), + "ICMP connectivity check failed for netdevice+RDMA server") + } checkMetricsWithPromQL() } @@ -472,16 +485,22 @@ func defineMetricsDPDKPod(role, devType, worker string) *pod.Builder { } func createMetricsTestResources(cRes, sRes metricsTestResource) *pod.Builder { + // Create all policies first so the SR-IOV daemon sees them in the same reconcile generation. + // Creating policies sequentially with network/NAD waits in between introduces a gap that + // causes the daemon to process them in separate generations, leading to premature + // WaitForSriovStable returns before all device plugin resources are registered. for _, res := range []metricsTestResource{cRes, sRes} { - By("Create SriovNetworkNodePolicy") + By(fmt.Sprintf("Create SriovNetworkNodePolicy %s", res.policy.Definition.Name)) _, err := res.policy.Create() Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to Create SriovNetworkNodePolicy %s", res.policy.Definition.Name)) + } - By("Create SriovNetwork") + for _, res := range []metricsTestResource{cRes, sRes} { + By(fmt.Sprintf("Create SriovNetwork %s", res.network.Definition.Name)) - _, err = res.network.Create() + _, err := res.network.Create() Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to create SriovNetwork %s", res.network.Definition.Name))