Skip to content

Commit e7dce27

Browse files
committed
bug: rdma exlusive handling
In case a RDMA device in exclusive mode is in use by a Pod, the DP was not reporting it as a resource after DP restart. Following changes are introduced in RdmaSpec: - isRdma: in case of no rdma resources, check if netlink "enable_rdma" is available. - GetRdmaDeviceSpec: the device specs are retrieved dynamically and not on discovery stage as before. Dynamic RDMA specs computatiopn vs on discovery, comes to solve following scenario for exlusive mode: - Discover RDMA device - Allocate to Pod (resources are hidden on host) - Restart DP pod - Deallocate - Reallocate Fixes #565 Signed-off-by: Fred Rolland <[email protected]>
1 parent 39a434e commit e7dce27

File tree

7 files changed

+100
-38
lines changed

7 files changed

+100
-38
lines changed

pkg/devices/rdma.go

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,55 @@
1818
package devices
1919

2020
import (
21+
"github.com/golang/glog"
2122
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
2223

2324
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
2425
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
2526
)
2627

2728
type rdmaSpec struct {
28-
isSupportRdma bool
29-
deviceSpec []*pluginapi.DeviceSpec
29+
deviceID string
30+
deviceType types.DeviceType
3031
}
3132

32-
func newRdmaSpec(rdmaResources []string) types.RdmaSpec {
33+
// NewRdmaSpec returns the RdmaSpec
34+
func NewRdmaSpec(dt types.DeviceType, id string) types.RdmaSpec {
35+
if dt == types.AcceleratorType {
36+
return nil
37+
}
38+
return &rdmaSpec{deviceID: id, deviceType: dt}
39+
}
40+
41+
func (r *rdmaSpec) IsRdma() bool {
42+
if len(r.getRdmaResources()) > 0 {
43+
return true
44+
}
45+
// Checking for netlink param for exclusive RDMA use case
46+
rdma, err := utils.HasRdmaParam(r.deviceID)
47+
if err != nil {
48+
glog.Infof("HasRdmaParam(): unable to get Netlink RDMA param for device %s : %q", r.deviceID, err)
49+
return false
50+
}
51+
return rdma
52+
}
53+
54+
func (r *rdmaSpec) getRdmaResources() []string {
55+
//nolint: exhaustive
56+
switch r.deviceType {
57+
case types.NetDeviceType:
58+
return utils.GetRdmaProvider().GetRdmaDevicesForPcidev(r.deviceID)
59+
case types.AuxNetDeviceType:
60+
return utils.GetRdmaProvider().GetRdmaDevicesForAuxdev(r.deviceID)
61+
default:
62+
return make([]string, 0)
63+
}
64+
}
65+
66+
func (r *rdmaSpec) GetRdmaDeviceSpec() []*pluginapi.DeviceSpec {
67+
rdmaResources := r.getRdmaResources()
3368
deviceSpec := make([]*pluginapi.DeviceSpec, 0)
34-
isSupportRdma := false
3569
if len(rdmaResources) > 0 {
36-
isSupportRdma = true
3770
for _, res := range rdmaResources {
3871
resRdmaDevices := utils.GetRdmaProvider().GetRdmaCharDevices(res)
3972
for _, rdmaDevice := range resRdmaDevices {
@@ -45,26 +78,5 @@ func newRdmaSpec(rdmaResources []string) types.RdmaSpec {
4578
}
4679
}
4780
}
48-
49-
return &rdmaSpec{isSupportRdma: isSupportRdma, deviceSpec: deviceSpec}
50-
}
51-
52-
// NewRdmaSpec returns the RdmaSpec for PCI address
53-
func NewRdmaSpec(pciAddr string) types.RdmaSpec {
54-
rdmaResources := utils.GetRdmaProvider().GetRdmaDevicesForPcidev(pciAddr)
55-
return newRdmaSpec(rdmaResources)
56-
}
57-
58-
// NewAuxRdmaSpec returns the RdmaSpec for auxiliary device ID
59-
func NewAuxRdmaSpec(deviceID string) types.RdmaSpec {
60-
rdmaResources := utils.GetRdmaProvider().GetRdmaDevicesForAuxdev(deviceID)
61-
return newRdmaSpec(rdmaResources)
62-
}
63-
64-
func (r *rdmaSpec) IsRdma() bool {
65-
return r.isSupportRdma
66-
}
67-
68-
func (r *rdmaSpec) GetRdmaDeviceSpec() []*pluginapi.DeviceSpec {
69-
return r.deviceSpec
81+
return deviceSpec
7082
}

pkg/devices/rdma_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
2424

2525
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/devices"
26+
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
2627
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
2728
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils/mocks"
2829
)
@@ -35,7 +36,7 @@ var _ = Describe("RdmaSpec", func() {
3536
fakeRdmaProvider := mocks.RdmaProvider{}
3637
fakeRdmaProvider.On("GetRdmaDevicesForPcidev", "0000:00:00.0").Return([]string{})
3738
utils.SetRdmaProviderInst(&fakeRdmaProvider)
38-
spec := devices.NewRdmaSpec("0000:00:00.0")
39+
spec := devices.NewRdmaSpec(types.NetDeviceType, "0000:00:00.0")
3940

4041
Expect(spec.IsRdma()).To(BeFalse())
4142
Expect(spec.GetRdmaDeviceSpec()).To(HaveLen(0))
@@ -50,7 +51,7 @@ var _ = Describe("RdmaSpec", func() {
5051
"/dev/infiniband/uverbs0", "/dev/infiniband/rdma_cm",
5152
}).On("GetRdmaCharDevices", "fake_1").Return([]string{"/dev/infiniband/rdma_cm"})
5253
utils.SetRdmaProviderInst(&fakeRdmaProvider)
53-
spec := devices.NewRdmaSpec("0000:00:00.0")
54+
spec := devices.NewRdmaSpec(types.NetDeviceType, "0000:00:00.0")
5455

5556
Expect(spec.IsRdma()).To(BeTrue())
5657
Expect(spec.GetRdmaDeviceSpec()).To(Equal([]*pluginapi.DeviceSpec{

pkg/factory/factory.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,7 @@ func (rf *resourceFactory) GetResourcePool(rc *types.ResourceConfig, filteredDev
163163
}
164164

165165
func (rf *resourceFactory) GetRdmaSpec(dt types.DeviceType, deviceID string) types.RdmaSpec {
166-
//nolint: exhaustive
167-
switch dt {
168-
case types.NetDeviceType:
169-
return devices.NewRdmaSpec(deviceID)
170-
case types.AuxNetDeviceType:
171-
return devices.NewAuxRdmaSpec(deviceID)
172-
default:
173-
return nil
174-
}
166+
return devices.NewRdmaSpec(dt, deviceID)
175167
}
176168

177169
func (rf *resourceFactory) GetVdpaDevice(pciAddr string) types.VdpaDevice {

pkg/factory/factory_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ import (
2525
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
2626
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types/mocks"
2727
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
28+
utilmocks "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils/mocks"
2829

2930
. "github.com/onsi/ginkgo"
3031
. "github.com/onsi/ginkgo/extensions/table"
3132
. "github.com/onsi/gomega"
33+
"github.com/stretchr/testify/mock"
3234
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
3335
)
3436

@@ -606,6 +608,9 @@ var _ = Describe("Factory", func() {
606608
)
607609
Describe("getting rdma spec", func() {
608610
Context("check c rdma spec", func() {
611+
mockProvider := &utilmocks.NetlinkProvider{}
612+
mockProvider.On("HasRdmaParam", mock.AnythingOfType("string")).Return(false, nil)
613+
utils.SetNetlinkProviderInst(mockProvider)
609614
f := factory.NewResourceFactory("fake", "fake", true, false)
610615
rs1 := f.GetRdmaSpec(types.NetDeviceType, "0000:00:00.1")
611616
rs2 := f.GetRdmaSpec(types.AcceleratorType, "0000:00:00.2")

pkg/utils/mocks/NetlinkProvider.go

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/utils/netlink_provider.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ type NetlinkProvider interface {
3131
GetIPv4RouteList(ifName string) ([]nl.Route, error)
3232
// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map
3333
GetDevlinkGetDeviceInfoByNameAsMap(bus, device string) (map[string]string, error)
34+
// HasRdmaParam returns true if PCI device has "enable_rdma" param
35+
HasRdmaParam(pciAddr string) (bool, error)
3436
}
3537

3638
type defaultNetlinkProvider struct {
@@ -48,6 +50,19 @@ func GetNetlinkProvider() NetlinkProvider {
4850
return netlinkProvider
4951
}
5052

53+
// HasRdmaParam returns true if PCI device has "enable_rdma" param
54+
// equivalent to "devlink dev param show pci/0000:d8:01.1 name enable_rdma"
55+
func (defaultNetlinkProvider) HasRdmaParam(pciAddr string) (bool, error) {
56+
param, err := nl.DevlinkGetDeviceParamByName("pci", pciAddr, "enable_rdma")
57+
if err != nil {
58+
return false, fmt.Errorf("error getting enable_rdma attribute for pci device %s %v", pciAddr, err)
59+
}
60+
if len(param.Values) == 0 || param.Values[0].Data == nil {
61+
return false, nil
62+
}
63+
return true, nil
64+
}
65+
5166
// GetLinkAttrs returns a net device's link attributes.
5267
func (defaultNetlinkProvider) GetLinkAttrs(ifName string) (*nl.LinkAttrs, error) {
5368
link, err := nl.LinkByName(ifName)

pkg/utils/utils.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,15 @@ func GetPfEswitchMode(pciAddr string) (string, error) {
474474
return devLinkDeviceAttrs.Mode, nil
475475
}
476476

477+
// HasRdmaParam returns true if PCI device has "enable_rdma" param
478+
func HasRdmaParam(pciAddr string) (bool, error) {
479+
rdma, err := GetNetlinkProvider().HasRdmaParam(pciAddr)
480+
if err != nil {
481+
return false, err
482+
}
483+
return rdma, nil
484+
}
485+
477486
// HasDefaultRoute returns true if PCI network device is default route interface
478487
func HasDefaultRoute(pciAddr string) (bool, error) {
479488
// Get net interface name

0 commit comments

Comments
 (0)