Skip to content

Commit 95362dc

Browse files
authored
Merge pull request #1019 from ashokpariya0/fix-vf-wait-race
Fix VF creation race by waiting for VF symlinks before switchdev
2 parents 5f7022b + e9aeeb1 commit 95362dc

File tree

2 files changed

+115
-23
lines changed

2 files changed

+115
-23
lines changed

pkg/host/internal/sriov/sriov.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,71 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin
11541154
return fn(pciAddr, desiredEswitchMode, numVFs)
11551155
}
11561156

1157+
func (s *sriov) waitForVFLinks(pciAddr string, expectedNum int, maxTimeout time.Duration) error {
1158+
// VF destruction is asynchronous and safe to ignore
1159+
if expectedNum == 0 {
1160+
log.Log.V(2).Info("waitForVFLinks(): skipping wait for cleanup (expected 0 VFs) - async destroy", "device", pciAddr)
1161+
return nil
1162+
}
1163+
1164+
log.Log.V(2).Info("waitForVFLinks(): waiting for VF symlinks",
1165+
"device", pciAddr,
1166+
"expected", expectedNum,
1167+
"maxTimeout", maxTimeout)
1168+
1169+
err := wait.PollUntilContextTimeout(
1170+
context.Background(),
1171+
time.Second, // poll interval
1172+
maxTimeout, // overall timeout
1173+
true, // run immediately
1174+
func(ctx context.Context) (bool, error) {
1175+
vfAddrs, err := s.dputilsLib.GetVFList(pciAddr)
1176+
if err != nil {
1177+
log.Log.V(2).Info("waitForVFLinks(): GetVFList failed, retrying", "err", err)
1178+
return false, nil
1179+
}
1180+
1181+
if len(vfAddrs) < expectedNum {
1182+
return false, nil
1183+
}
1184+
1185+
// Check all have physfn symlink
1186+
for _, vfAddr := range vfAddrs {
1187+
linkPath := filepath.Join(
1188+
vars.FilesystemRoot,
1189+
consts.SysBusPciDevices,
1190+
vfAddr,
1191+
"physfn",
1192+
)
1193+
1194+
if _, err := os.Lstat(linkPath); err != nil {
1195+
log.Log.V(2).Info(
1196+
"waitForVFLinks(): physfn symlink missing",
1197+
"vf", vfAddr,
1198+
"err", err,
1199+
)
1200+
return false, nil
1201+
}
1202+
}
1203+
1204+
log.Log.V(2).Info("waitForVFLinks(): all expected VF symlinks ready")
1205+
return true, nil
1206+
},
1207+
)
1208+
1209+
if err != nil {
1210+
return fmt.Errorf(
1211+
"timeout waiting for %d VF symlinks on %s (max %v): %w",
1212+
expectedNum,
1213+
pciAddr,
1214+
maxTimeout,
1215+
err,
1216+
)
1217+
}
1218+
1219+
return nil
1220+
}
1221+
11571222
// setEswitchModeAndNumVFsMlx configures PF eSwitch and sriov_numvfs in the following order:
11581223
// a. set eSwitchMode to legacy
11591224
// b. set the desired number of Virtual Functions
@@ -1182,6 +1247,9 @@ func (s *sriov) setEswitchModeAndNumVFsMlx(pciAddr string, desiredEswitchMode st
11821247
if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil {
11831248
return err
11841249
}
1250+
if err := s.waitForVFLinks(pciAddr, numVFs, 120*time.Second); err != nil {
1251+
return err
1252+
}
11851253

11861254
if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
11871255
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {

pkg/host/internal/sriov/sriov_test.go

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ var _ = Describe("SRIOV", func() {
9696
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(
9797
&netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "switchdev"}}}, nil)
9898
dputilsLibMock.EXPECT().SriovConfigured("0000:d8:00.0").Return(true)
99-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil)
99+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
100100
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.2").Return("mlx5_core", nil)
101101
dputilsLibMock.EXPECT().GetVFID("0000:d8:00.2").Return(0, nil)
102102
hostMock.EXPECT().DiscoverVDPAType("0000:d8:00.2").Return("")
@@ -204,8 +204,13 @@ var _ = Describe("SRIOV", func() {
204204
Context("ConfigSriovInterfaces", func() {
205205
It("should configure", func() {
206206
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
207-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
207+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0",
208+
"/sys/bus/pci/devices/0000:d8:00.2",
209+
"/sys/bus/pci/devices/0000:d8:00.3"},
208210
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
211+
Symlinks: map[string]string{
212+
"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0",
213+
"/sys/bus/pci/devices/0000:d8:00.3/physfn": "../../0000:d8:00.0"},
209214
})
210215

211216
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2)
@@ -217,7 +222,7 @@ var _ = Describe("SRIOV", func() {
217222
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
218223
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
219224
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil)
220-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil)
225+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil).AnyTimes()
221226
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
222227
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(3)
223228
pfLinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Flags: 0, EncapType: "ether"})
@@ -283,9 +288,20 @@ var _ = Describe("SRIOV", func() {
283288
vars.ParallelNicConfig = false
284289
}()
285290
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
286-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.1"},
291+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0",
292+
"/sys/bus/pci/devices/0000:d8:00.1",
293+
"/sys/bus/pci/devices/0000:d8:00.2", // VF0 of PF0
294+
"/sys/bus/pci/devices/0000:d8:00.3", // VF1 of PF0
295+
"/sys/bus/pci/devices/0000:d8:00.4", // VF0 of PF1
296+
"/sys/bus/pci/devices/0000:d8:00.5"},
287297
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {},
288298
"/sys/bus/pci/devices/0000:d8:00.1/sriov_numvfs": {}},
299+
Symlinks: map[string]string{
300+
"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0",
301+
"/sys/bus/pci/devices/0000:d8:00.3/physfn": "../../0000:d8:00.0",
302+
"/sys/bus/pci/devices/0000:d8:00.4/physfn": "../../0000:d8:00.1",
303+
"/sys/bus/pci/devices/0000:d8:00.5/physfn": "../../0000:d8:00.1",
304+
},
289305
})
290306

291307
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2)
@@ -297,7 +313,7 @@ var _ = Describe("SRIOV", func() {
297313
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
298314
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
299315
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil)
300-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil)
316+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil).AnyTimes()
301317
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
302318
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(3)
303319
pfLinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Flags: 0, EncapType: "ether"})
@@ -332,7 +348,7 @@ var _ = Describe("SRIOV", func() {
332348
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.1").Return(nil)
333349
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.1").Return(nil)
334350
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.1").Return(nil)
335-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.1").Return([]string{"0000:d8:00.4", "0000:d8:00.5"}, nil)
351+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.1").Return([]string{"0000:d8:00.4", "0000:d8:00.5"}, nil).AnyTimes()
336352
pf1LinkMock := netlinkMockPkg.NewMockLink(testCtrl)
337353
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np1").Return(pf1LinkMock, nil).Times(3)
338354
pf1LinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Flags: 0, EncapType: "ether"})
@@ -412,8 +428,9 @@ var _ = Describe("SRIOV", func() {
412428

413429
It("should configure IB", func() {
414430
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
415-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
416-
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
431+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.2"},
432+
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
433+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0"},
417434
})
418435

419436
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
@@ -425,7 +442,7 @@ var _ = Describe("SRIOV", func() {
425442
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
426443
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
427444
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil)
428-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil)
445+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
429446
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
430447
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
431448
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -464,8 +481,9 @@ var _ = Describe("SRIOV", func() {
464481

465482
It("should configure switchdev", func() {
466483
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
467-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
468-
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
484+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.2"},
485+
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
486+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0"},
469487
})
470488

471489
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
@@ -478,7 +496,7 @@ var _ = Describe("SRIOV", func() {
478496
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
479497
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
480498
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", syscall.EINVAL)
481-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
499+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
482500
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
483501
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
484502
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -533,8 +551,9 @@ var _ = Describe("SRIOV", func() {
533551

534552
It("should configure switchdev even if steering mode is not detected", func() {
535553
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
536-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
537-
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
554+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.2"},
555+
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
556+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0"},
538557
})
539558

540559
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
@@ -547,7 +566,7 @@ var _ = Describe("SRIOV", func() {
547566
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
548567
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
549568
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", nil)
550-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
569+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
551570
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
552571
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
553572
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -602,8 +621,9 @@ var _ = Describe("SRIOV", func() {
602621

603622
It("should configure switchdev even if steering mode is already smfs", func() {
604623
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
605-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
606-
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
624+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.2"},
625+
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
626+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0"},
607627
})
608628

609629
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
@@ -616,7 +636,7 @@ var _ = Describe("SRIOV", func() {
616636
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
617637
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
618638
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("smfs", nil)
619-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
639+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
620640
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
621641
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
622642
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -671,8 +691,9 @@ var _ = Describe("SRIOV", func() {
671691

672692
It("should configure configure swtichdev by switching back to legacy mode and configure smfs", func() {
673693
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
674-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
675-
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
694+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0", "/sys/bus/pci/devices/0000:d8:00.2"},
695+
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
696+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0"},
676697
})
677698

678699
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
@@ -685,7 +706,7 @@ var _ = Describe("SRIOV", func() {
685706
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
686707
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
687708
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("test", nil)
688-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(4)
709+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
689710
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
690711
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
691712
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -972,8 +993,11 @@ var _ = Describe("SRIOV", func() {
972993

973994
It("should configure - skipVFConfiguration is true", func() {
974995
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
975-
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
996+
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0",
997+
"/sys/bus/pci/devices/0000:d8:00.2", "/sys/bus/pci/devices/0000:d8:00.3"},
976998
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
999+
Symlinks: map[string]string{"/sys/bus/pci/devices/0000:d8:00.2/physfn": "../../0000:d8:00.0",
1000+
"/sys/bus/pci/devices/0000:d8:00.3/physfn": "../../0000:d8:00.0"},
9771001
})
9781002

9791003
dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2)
@@ -986,7 +1010,7 @@ var _ = Describe("SRIOV", func() {
9861010
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
9871011
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
9881012
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil)
989-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil)
1013+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2", "0000:d8:00.3"}, nil).AnyTimes()
9901014
hostMock.EXPECT().Unbind("0000:d8:00.2").Return(nil)
9911015
hostMock.EXPECT().Unbind("0000:d8:00.3").Return(nil)
9921016

0 commit comments

Comments
 (0)