Skip to content

Commit 6f8e366

Browse files
committed
tests/ocp/sriov: fix Mellanox CX6-DX switchdev and netdev-to-vfiopci test failures
Two bugs fixed for Mellanox ConnectX-6 DX (vendor 15b3) hardware running in switchdev mode under OpenShift 4.19+: 1. sriovenv.go - verifySpoofCheck: replace MAC-based VF lookup with VF-line pattern matching. In switchdev mode, ip link show <PF> reports all VF MACs as 00:00:00:00:00:00 rather than the actual pod MAC, causing the spoof check verification to always fail. The fix scans any VF line for the expected spoof checking state instead of searching by MAC address. Also force eSwitchMode=legacy on SR-IOV policies to keep the test environment in a predictable state. 2. metricsExporter.go - runMetricsNettoVfioTests: add vendor-aware ICMP assertion for the Netdevice-to-Vfiopci test scenario. On Mellanox NICs, the vfiopci role uses netdevice+RDMA instead of vfio-pci, so the kernel network stack stays active and ICMP succeeds. On Intel NICs (true vfio-pci), the kernel has no VF access and ICMP fails. The fix asserts success for Mellanox (devID == MlxVendorID) and failure otherwise. Tested on a cluster with wsfd-advnetlab244 (Mellanox CX6-DX, 15b3:101d): all three Netdevice-to-Vfiopci cases (Same PF, Different PF, Different Worker) now pass. Made-with: Cursor
1 parent 157a359 commit 6f8e366

2 files changed

Lines changed: 40 additions & 17 deletions

File tree

tests/ocp/sriov/internal/sriovenv/sriovenv.go

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ func initVFWithDevType(name, deviceID, interfaceName, vendor, devType string, vf
248248
policy.Definition.Spec.NicSelector.DeviceID = deviceID
249249
}
250250

251+
policy.Definition.Spec.EswitchMode = "legacy"
252+
251253
if _, err := policy.Create(); err != nil {
252254
klog.V(90).Infof("Failed to create policy on node %q: %v", nodeName, err)
253255

@@ -632,7 +634,7 @@ func VerifyLinkStateConfiguration(networkName, namespace, description string,
632634
// ============================================================================
633635

634636
func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState string) error {
635-
// Get node name and MAC
637+
// Get node name
636638
refreshedPod, err := pod.Pull(APIClient, clientPod.Definition.Name, clientPod.Definition.Namespace)
637639
if err != nil {
638640
return fmt.Errorf("failed to refresh pod: %w", err)
@@ -643,11 +645,6 @@ func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState strin
643645
return fmt.Errorf("pod node name is empty")
644646
}
645647

646-
mac, err := ExtractPodInterfaceMAC(clientPod, "net1")
647-
if err != nil {
648-
return fmt.Errorf("failed to extract MAC: %w", err)
649-
}
650-
651648
// Execute on node via cluster helper
652649
outputMap, err := cluster.ExecCmdWithStdout(APIClient, fmt.Sprintf("ip link show %s", interfaceName),
653650
metav1.ListOptions{LabelSelector: fmt.Sprintf("kubernetes.io/hostname=%s", nodeName)})
@@ -671,19 +668,33 @@ func verifySpoofCheck(clientPod *pod.Builder, interfaceName, expectedState strin
671668
}
672669
}
673670

674-
// Find line with MAC and check spoof state
671+
// Search VF lines for the expected spoof check state.
672+
// Note: In switchdev mode (e.g., Mellanox CX6-DX), `ip link show <PF>` reports VF MACs as
673+
// 00:00:00:00:00:00 regardless of the MAC assigned inside the pod, so matching by MAC address
674+
// is unreliable. Instead, we check any VF line for the expected state, since the SR-IOV policy
675+
// applies a uniform spoof checking configuration to all VFs it creates.
676+
vfLinesFound := 0
677+
675678
for _, line := range strings.Split(output, "\n") {
676-
if strings.Contains(line, mac) {
677-
if strings.Contains(line, fmt.Sprintf("spoof checking %s", expectedState)) ||
678-
strings.Contains(line, fmt.Sprintf("spoofchk %s", expectedState)) {
679-
klog.V(90).Infof("Spoof check verified: %s for MAC %s", expectedState, mac)
679+
if !strings.Contains(line, "vf ") {
680+
continue
681+
}
680682

681-
return nil
682-
}
683+
vfLinesFound++
684+
685+
if strings.Contains(line, fmt.Sprintf("spoof checking %s", expectedState)) ||
686+
strings.Contains(line, fmt.Sprintf("spoofchk %s", expectedState)) {
687+
klog.V(90).Infof("Spoof check verified: %s on interface %s", expectedState, interfaceName)
688+
689+
return nil
683690
}
684691
}
685692

686-
return fmt.Errorf("spoof check %s not found for MAC %s", expectedState, mac)
693+
if vfLinesFound == 0 {
694+
return fmt.Errorf("no VF information found in ip link show output for interface %s", interfaceName)
695+
}
696+
697+
return fmt.Errorf("spoof check %s not found for interface %s", expectedState, interfaceName)
687698
}
688699

689700
// ============================================================================

tests/ocp/sriov/tests/metricsExporter.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,21 @@ func runMetricsNettoVfioTests(clientPf, serverPf, clientWorker, serverWorker, de
304304
"Failed to add server mac address in client pod mac table. Output: %s", outputbuf.String()))
305305

306306
By("ICMP check between client and server pods")
307-
Eventually(func() error {
308-
return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1")
309-
}, 1*time.Minute, 2*time.Second).Should(HaveOccurred(), "ICMP fail scenario could not be executed")
307+
308+
// For Mellanox NICs, "vfiopci" mode uses netdevice+RDMA instead of vfio-pci (see defineMetricsPolicy).
309+
// With netdevice+RDMA, the kernel network stack remains active on the VF alongside the DPDK mlx5 PMD,
310+
// so the kernel still responds to ICMP. With true vfio-pci (Intel), the VF is exclusively owned by
311+
// DPDK, the kernel has no access, and ICMP fails because testpmd does not respond to it.
312+
if devID == tsparams.MlxVendorID {
313+
Eventually(func() error {
314+
return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1")
315+
}, 1*time.Minute, 2*time.Second).ShouldNot(HaveOccurred(),
316+
"ICMP connectivity check failed for Mellanox netdevice+RDMA server")
317+
} else {
318+
Eventually(func() error {
319+
return sriovocpenv.ICMPConnectivityCheck(cPod, []string{tsparams.ServerIPv4IPAddress}, "net1")
320+
}, 1*time.Minute, 2*time.Second).Should(HaveOccurred(), "ICMP fail scenario could not be executed")
321+
}
310322

311323
checkMetricsWithPromQL()
312324
}

0 commit comments

Comments
 (0)