Skip to content

Commit dd6edf8

Browse files
authored
Merge pull request #1339 from elezar/bump-nvidia-container-toolkit-v1.18.0-rc.2
Add support for GDRCOPY
2 parents 944ff2e + 7bb7845 commit dd6edf8

File tree

149 files changed

+3786
-2394
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+3786
-2394
lines changed

api/config/v1/flags.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type CommandLineFlags struct {
6060
MpsRoot *string `json:"mpsRoot,omitempty" yaml:"mpsRoot,omitempty"`
6161
NvidiaDriverRoot *string `json:"nvidiaDriverRoot,omitempty" yaml:"nvidiaDriverRoot,omitempty"`
6262
NvidiaDevRoot *string `json:"nvidiaDevRoot,omitempty" yaml:"nvidiaDevRoot,omitempty"`
63+
GDRCopyEnabled *bool `json:"gdrcopyEnabled" yaml:"gdrcopyEnabled"`
6364
GDSEnabled *bool `json:"gdsEnabled" yaml:"gdsEnabled"`
6465
MOFEDEnabled *bool `json:"mofedEnabled" yaml:"mofedEnabled"`
6566
UseNodeFeatureAPI *bool `json:"useNodeFeatureAPI" yaml:"useNodeFeatureAPI"`
@@ -126,6 +127,8 @@ func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) {
126127
updateFromCLIFlag(&f.NvidiaDriverRoot, c, n)
127128
case "dev-root", "nvidia-dev-root":
128129
updateFromCLIFlag(&f.NvidiaDevRoot, c, n)
130+
case "gdrcopy-enabled":
131+
updateFromCLIFlag(&f.GDRCopyEnabled, c, n)
129132
case "gds-enabled":
130133
updateFromCLIFlag(&f.GDSEnabled, c, n)
131134
case "mofed-enabled":

api/config/v1/flags_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ func TestMarshalFlags(t *testing.T) {
160160
output: `{
161161
"migStrategy": null,
162162
"failOnInitError": null,
163+
"gdrcopyEnabled": null,
163164
"gdsEnabled": null,
164165
"mofedEnabled": null,
165166
"useNodeFeatureAPI": null,
@@ -177,6 +178,7 @@ func TestMarshalFlags(t *testing.T) {
177178
output: `{
178179
"migStrategy": null,
179180
"failOnInitError": null,
181+
"gdrcopyEnabled": null,
180182
"gdsEnabled": null,
181183
"mofedEnabled": null,
182184
"useNodeFeatureAPI": null,
@@ -201,6 +203,7 @@ func TestMarshalFlags(t *testing.T) {
201203
output: `{
202204
"migStrategy": null,
203205
"failOnInitError": null,
206+
"gdrcopyEnabled": null,
204207
"gdsEnabled": null,
205208
"mofedEnabled": null,
206209
"useNodeFeatureAPI": null,

cmd/nvidia-device-plugin/main.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,19 @@ func main() {
100100
Usage: "the desired strategy for passing device IDs to the underlying runtime:\n\t\t[uuid | index]",
101101
EnvVars: []string{"DEVICE_ID_STRATEGY"},
102102
},
103+
&cli.BoolFlag{
104+
Name: "gdrcopy-enabled",
105+
Usage: "ensure that containers that request NVIDIA GPU resources are started with GDRCopy support",
106+
EnvVars: []string{"GDRCOPY_ENABLED"},
107+
},
103108
&cli.BoolFlag{
104109
Name: "gds-enabled",
105-
Usage: "ensure that containers are started with NVIDIA_GDS=enabled",
110+
Usage: "ensure that containers that request NVIDIA GPU resources are started with GPUDirect Storage support",
106111
EnvVars: []string{"GDS_ENABLED"},
107112
},
108113
&cli.BoolFlag{
109114
Name: "mofed-enabled",
110-
Usage: "ensure that containers are started with NVIDIA_MOFED=enabled",
115+
Usage: "ensure that containers that request NVIDIA GPU resources are started with MOFED support",
111116
EnvVars: []string{"MOFED_ENABLED"},
112117
},
113118
&cli.StringFlag{

cmd/nvidia-device-plugin/plugin-manager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ func GetPlugins(ctx context.Context, infolib info.Interface, nvmllib nvml.Interf
5454
cdi.WithNvidiaCTKPath(*config.Flags.Plugin.NvidiaCTKPath),
5555
cdi.WithDeviceIDStrategy(*config.Flags.Plugin.DeviceIDStrategy),
5656
cdi.WithVendor("k8s.device-plugin.nvidia.com"),
57+
cdi.WithGdrcopyEnabled(*config.Flags.GDRCopyEnabled),
5758
cdi.WithGdsEnabled(*config.Flags.GDSEnabled),
5859
cdi.WithMofedEnabled(*config.Flags.MOFEDEnabled),
5960
cdi.WithImexChannels(imexChannels),

deployments/helm/nvidia-device-plugin/templates/daemonset-device-plugin.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ spec:
169169
- name: NVIDIA_CDI_HOOK_PATH
170170
value: {{ .Values.cdi.nvidiaHookPath }}
171171
{{- end }}
172+
{{- if typeIs "bool" .Values.gdrcopyEnabled }}
173+
- name: GDRCOPY_ENABLED
174+
value: {{ .Values.gdrcopyEnabled | quote }}
175+
{{- end }}
172176
{{- if typeIs "bool" .Values.gdsEnabled }}
173177
- name: GDS_ENABLED
174178
value: {{ .Values.gdsEnabled | quote }}

deployments/helm/nvidia-device-plugin/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ failOnInitError: null
3333
deviceListStrategy: null
3434
deviceIDStrategy: null
3535
nvidiaDriverRoot: null
36+
gdrcopyEnabled: null
3637
gdsEnabled: null
3738
mofedEnabled: null
3839
deviceDiscoveryStrategy: null

go.mod

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ module github.com/NVIDIA/k8s-device-plugin
22

33
go 1.24.0
44

5-
toolchain go1.24.1
5+
toolchain go1.24.5
66

77
require (
88
github.com/NVIDIA/go-gpuallocator v0.6.0
9-
github.com/NVIDIA/go-nvlib v0.8.0
9+
github.com/NVIDIA/go-nvlib v0.8.1
1010
github.com/NVIDIA/go-nvml v0.13.0-1
11-
github.com/NVIDIA/nvidia-container-toolkit v1.17.8
11+
github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.5
1212
github.com/fsnotify/fsnotify v1.9.0
1313
github.com/google/renameio v1.0.1
1414
github.com/google/uuid v1.6.0
@@ -28,8 +28,8 @@ require (
2828
sigs.k8s.io/node-feature-discovery v0.17.3
2929
sigs.k8s.io/node-feature-discovery/api/nfd v0.17.3
3030
sigs.k8s.io/yaml v1.4.0
31-
tags.cncf.io/container-device-interface v0.8.1
32-
tags.cncf.io/container-device-interface/specs-go v0.8.0
31+
tags.cncf.io/container-device-interface v1.0.1
32+
tags.cncf.io/container-device-interface/specs-go v1.0.0
3333
)
3434

3535
require (
@@ -65,7 +65,7 @@ require (
6565
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
6666
golang.org/x/net v0.38.0 // indirect
6767
golang.org/x/oauth2 v0.27.0 // indirect
68-
golang.org/x/sys v0.33.0 // indirect
68+
golang.org/x/sys v0.36.0 // indirect
6969
golang.org/x/term v0.30.0 // indirect
7070
golang.org/x/text v0.23.0 // indirect
7171
golang.org/x/time v0.8.0 // indirect

go.sum

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
github.com/NVIDIA/go-gpuallocator v0.6.0 h1:2PA2swx59gJYREPkZNTGtyCP6Pnz3WEgnYsXlRkyvkk=
22
github.com/NVIDIA/go-gpuallocator v0.6.0/go.mod h1:c+Yspg+/QxWOmoSQeuI48Z/7nS+mMPtxyj1NYUTwewY=
3-
github.com/NVIDIA/go-nvlib v0.8.0 h1:vorMvnsJYvZaxiluSXFd+fIFeQFPWSiSjNPiJyvDs0c=
4-
github.com/NVIDIA/go-nvlib v0.8.0/go.mod h1:bV+OEgjJCbFXf5T8c082mVPFuiF+gKwf9CMT7DWGUBI=
3+
github.com/NVIDIA/go-nvlib v0.8.1 h1:OPEHVvn3zcV5OXB68A7WRpeCnYMRSPl7LdeJH/d3gZI=
4+
github.com/NVIDIA/go-nvlib v0.8.1/go.mod h1:7mzx9FSdO9fXWP9NKuZmWkCwhkEcSWQFe2tmFwtLb9c=
55
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
66
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
7-
github.com/NVIDIA/nvidia-container-toolkit v1.17.8 h1:ndE23TKvQBicsZT88mzZudygn6JNOe6+UsIgqk6gGvw=
8-
github.com/NVIDIA/nvidia-container-toolkit v1.17.8/go.mod h1:khOgMW80+g8eX/1zPlO4demLShHht9I0YEm8ngcPgwk=
7+
github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.5 h1:ft4S4nyT2jI1tV6CAFRMMZdrtd8HAfmuE9X9ieoDz+Y=
8+
github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.5/go.mod h1:t/awbHrDkz8ec0vecKo82Cn/11YkuD2ngE5RT9wuAgU=
99
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
1010
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
1111
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
@@ -169,8 +169,8 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w
169169
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
170170
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
171171
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
172-
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
173-
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
172+
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
173+
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
174174
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
175175
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
176176
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -232,7 +232,7 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aN
232232
sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4=
233233
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
234234
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
235-
tags.cncf.io/container-device-interface v0.8.1 h1:c0jN4Mt6781jD67NdPajmZlD1qrqQyov/Xfoab37lj0=
236-
tags.cncf.io/container-device-interface v0.8.1/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y=
237-
tags.cncf.io/container-device-interface/specs-go v0.8.0 h1:QYGFzGxvYK/ZLMrjhvY0RjpUavIn4KcmRmVP/JjdBTA=
238-
tags.cncf.io/container-device-interface/specs-go v0.8.0/go.mod h1:BhJIkjjPh4qpys+qm4DAYtUyryaTDg9zris+AczXyws=
235+
tags.cncf.io/container-device-interface v1.0.1 h1:KqQDr4vIlxwfYh0Ed/uJGVgX+CHAkahrgabg6Q8GYxc=
236+
tags.cncf.io/container-device-interface v1.0.1/go.mod h1:JojJIOeW3hNbcnOH2q0NrWNha/JuHoDZcmYxAZwb2i0=
237+
tags.cncf.io/container-device-interface/specs-go v1.0.0 h1:8gLw29hH1ZQP9K1YtAzpvkHCjjyIxHZYzBAvlQ+0vD8=
238+
tags.cncf.io/container-device-interface/specs-go v1.0.0/go.mod h1:u86hoFWqnh3hWz3esofRFKbI261bUlvUfLKGrDhJkgQ=

internal/cdi/api.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,11 @@
1616

1717
package cdi
1818

19-
import "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
20-
2119
// Interface provides the API to the 'cdi' package
2220
//
23-
//go:generate moq -stub -out api_mock.go . Interface
21+
//go:generate moq -rm -fmt=goimports -stub -out api_mock.go . Interface
2422
type Interface interface {
2523
CreateSpecFile() error
2624
QualifiedName(string, string) string
27-
}
28-
29-
type cdiSpecGenerator interface {
30-
GetSpec() (spec.Interface, error)
25+
AdditionalDevices() []string
3126
}

internal/cdi/api_mock.go

Lines changed: 42 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)