Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/driver/dra_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func (np *NetworkDriver) prepareResourceClaim(ctx context.Context, claim *resour
}

// Get RDMA configuration: link and char devices
if rdmaDev, _ := rdmamap.GetRdmaDeviceForNetdevice(ifName); rdmaDev != "" {
if rdmaDev, err := getRdmaDeviceFromNetdev(ifName); err == nil && rdmaDev != "" {
klog.V(2).Infof("RunPodSandbox processing RDMA device: %s", rdmaDev)
podCfg.RDMADevice.LinkDev = rdmaDev
// Obtain the char devices associated to the rdma device
Expand Down
30 changes: 30 additions & 0 deletions pkg/driver/rdmadevice.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
import (
"fmt"
"os"
"path/filepath"
"syscall"

"github.com/Mellanox/rdmamap"
"github.com/google/dranet/internal/nlwrap"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
Expand Down Expand Up @@ -79,6 +81,34 @@

}

// getRdmaDeviceFromNetdev returns the RDMA device name for a given network interface
func getRdmaDeviceFromNetdev(ifName string) (string, error) {
var rdmaDev string
if rdmaDev, _ := rdmamap.GetRdmaDeviceForNetdevice(ifName); rdmaDev == "" {

//Fallback to sysfs check if rdmamap fails. This is particularly related to a known

Check failure on line 89 in pkg/driver/rdmadevice.go

View workflow job for this annotation

GitHub Actions / test (1.25.x)

commentFormatting: put a space between `//` and comment text (gocritic)
// issue to detect RDMA devices for certain Mellanox NICs
// https://github.com/Mellanox/rdmamap/issues/15

rdmaDir := filepath.Join("/sys/class/net", ifName, "device/infiniband")

entries, err := os.ReadDir(rdmaDir)
if err != nil {
return "", fmt.Errorf("no RDMA device for %s: %w", ifName, err)
}

for _, entry := range entries {
if entry.IsDir() {
return entry.Name(), nil // Return first RDMA device found (e.g., "mlx5_0")
}
}

return "", fmt.Errorf("no RDMA device found for %s", ifName)
}

return rdmaDev, nil
Comment on lines +88 to +109
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we consider creating this as function in pkg/inventory/sysfs.go as all sysnet operations added in that file

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Extending this further, can we please refactor the existing function

func hasRDMADeviceInSysfs(ifName string) bool {
into two functions, similar to the upstream implementation of IsRDmaDeviceForNetdevice and GetRdmaDeviceForNetdevice, where IsRDmaDeviceForNetdevice is a simple wrapper over GetRdmaDeviceForNetdevice (Ref. https://github.com/Mellanox/rdmamap/blob/37bd11cc4c57da931b7b117f829fb663d46ce480/rdma_map.go#L348-L368

}

// GetDeviceInfo retrieves device type, major, and minor numbers for a given path.
// It returns an error if the path does not exist or if it's not a device file.
func GetDeviceInfo(path string) (LinuxDevice, error) {
Expand Down
186 changes: 186 additions & 0 deletions pkg/driver/rdmadevice_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
Copyright The Kubernetes Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
)

// TestGetRdmaDeviceFromNetdevSysfs tests the sysfs fallback logic
// of getRdmaDeviceFromNetdev using a mock sysfs structure.
func TestGetRdmaDeviceFromNetdevSysfs(t *testing.T) {
testCases := []struct {
name string
ifName string
setupFunc func(t *testing.T, baseDir string)
want string
wantErr bool
errContains string
}{
{
name: "valid RDMA device found",
ifName: "eth0",
setupFunc: func(t *testing.T, baseDir string) {
// Create mock sysfs structure: /sys/class/net/eth0/device/infiniband/mlx5_0
rdmaDir := filepath.Join(baseDir, "eth0", "device", "infiniband", "mlx5_0")
if err := os.MkdirAll(rdmaDir, 0755); err != nil {
t.Fatalf("failed to create mock sysfs dir: %v", err)
}
},
want: "mlx5_0",
wantErr: false,
},
{
name: "multiple RDMA devices returns first",
ifName: "eth1",
setupFunc: func(t *testing.T, baseDir string) {
// Create mock sysfs structure with multiple RDMA devices
for _, rdmaDev := range []string{"mlx5_0", "mlx5_1"} {
rdmaDir := filepath.Join(baseDir, "eth1", "device", "infiniband", rdmaDev)
if err := os.MkdirAll(rdmaDir, 0755); err != nil {
t.Fatalf("failed to create mock sysfs dir: %v", err)
}
}
},
want: "", // Returns first found, but order is not guaranteed
wantErr: false,
},
{
name: "no RDMA device - infiniband dir missing",
ifName: "eth2",
setupFunc: func(t *testing.T, baseDir string) {
// Create mock sysfs structure without infiniband dir
deviceDir := filepath.Join(baseDir, "eth2", "device")
if err := os.MkdirAll(deviceDir, 0755); err != nil {
t.Fatalf("failed to create mock sysfs dir: %v", err)
}
},
want: "",
wantErr: true,
errContains: "no RDMA device for eth2",
},
{
name: "no RDMA device - empty infiniband dir",
ifName: "eth3",
setupFunc: func(t *testing.T, baseDir string) {
// Create mock sysfs structure with empty infiniband dir
rdmaDir := filepath.Join(baseDir, "eth3", "device", "infiniband")
if err := os.MkdirAll(rdmaDir, 0755); err != nil {
t.Fatalf("failed to create mock sysfs dir: %v", err)
}
},
want: "",
wantErr: true,
errContains: "no RDMA device found for eth3",
},
{
name: "interface does not exist",
ifName: "nonexistent",
setupFunc: func(t *testing.T, baseDir string) {
// Don't create anything
},
want: "",
wantErr: true,
errContains: "no RDMA device for nonexistent",
},
{
name: "only files in infiniband dir, no directories",
ifName: "eth4",
setupFunc: func(t *testing.T, baseDir string) {
// Create mock sysfs structure with only files (no directories)
rdmaDir := filepath.Join(baseDir, "eth4", "device", "infiniband")
if err := os.MkdirAll(rdmaDir, 0755); err != nil {
t.Fatalf("failed to create mock sysfs dir: %v", err)
}
// Create a file instead of directory
filePath := filepath.Join(rdmaDir, "somefile")
if err := os.WriteFile(filePath, []byte("test"), 0644); err != nil {
t.Fatalf("failed to create mock file: %v", err)
}
},
want: "",
wantErr: true,
errContains: "no RDMA device found for eth4",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create temporary directory to mock /sys/class/net
tmpDir := t.TempDir()

// Setup mock sysfs structure
tc.setupFunc(t, tmpDir)

// Call the sysfs fallback helper with the temp dir
got, err := getRdmaDeviceFromSysfs(tmpDir, tc.ifName)

// Check error conditions
if tc.wantErr {
if err == nil {
t.Errorf("getRdmaDeviceFromSysfs() expected error, got nil")
return
}
if tc.errContains != "" && !strings.Contains(err.Error(), tc.errContains) {
t.Errorf("getRdmaDeviceFromSysfs() error = %v, want error containing %q", err, tc.errContains)
}
return
}

if err != nil {
t.Errorf("getRdmaDeviceFromSysfs() unexpected error: %v", err)
return
}

// For the "multiple RDMA devices" case, just check we got something valid
if tc.name == "multiple RDMA devices returns first" {
if got != "mlx5_0" && got != "mlx5_1" {
t.Errorf("getRdmaDeviceFromSysfs() = %v, want mlx5_0 or mlx5_1", got)
}
return
}

if got != tc.want {
t.Errorf("getRdmaDeviceFromSysfs() = %v, want %v", got, tc.want)
}
})
}
}

// getRdmaDeviceFromSysfs is a testable helper that implements the sysfs fallback logic
// with a configurable base path instead of hardcoded /sys/class/net.
// This mirrors the fallback logic in getRdmaDeviceFromNetdev.
func getRdmaDeviceFromSysfs(basePath, ifName string) (string, error) {
rdmaDir := filepath.Join(basePath, ifName, "device/infiniband")

entries, err := os.ReadDir(rdmaDir)
if err != nil {
return "", fmt.Errorf("no RDMA device for %s: %w", ifName, err)
}

for _, entry := range entries {
if entry.IsDir() {
return entry.Name(), nil // Return first RDMA device found (e.g., "mlx5_0")
}
}

return "", fmt.Errorf("no RDMA device found for %s", ifName)
}
Loading