Skip to content

Commit ac2214e

Browse files
committed
Add config option for ContainerConfigRoot
This change allows the container compat root for nvgpu (e.g. Orin) systems to be specified either as the nvidia-container-runtime.modes.csv.compat-container-root option in the config.toml file, or with the --csv.compat-container-root (NVIDIA_CTK_CDI_GENERATE_CSV_COMPAT_CONTAINER_ROOT) option when generating CDI specifications. A WithCSVCompatContainerRoot option is also exposed in the nvcdi API. Note that this option is only relevant when nvgpu devices are detected. Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent f7004ac commit ac2214e

File tree

6 files changed

+112
-35
lines changed

6 files changed

+112
-35
lines changed

cmd/nvidia-ctk/cdi/generate/generate.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ type options struct {
7070
featureFlags []string
7171

7272
csv struct {
73-
files []string
74-
ignorePatterns []string
73+
files []string
74+
ignorePatterns []string
75+
CompatContainerRoot string
7576
}
7677

7778
noAllDevice bool
@@ -212,6 +213,12 @@ func (m command) build() *cli.Command {
212213
Destination: &opts.csv.ignorePatterns,
213214
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"),
214215
},
216+
&cli.StringFlag{
217+
Name: "csv.compat-container-root",
218+
Usage: "specify the container folder to use for CUDA Forward Compatibility in non-standard containers",
219+
Destination: &opts.csv.CompatContainerRoot,
220+
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_CSV_CONTAINER_COMPAT_ROOT"),
221+
},
215222
&cli.StringSliceFlag{
216223
Name: "disable-hook",
217224
Aliases: []string{"disable-hooks"},
@@ -384,6 +391,7 @@ func (m command) generateSpecs(opts *options) ([]generatedSpecs, error) {
384391
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths),
385392
nvcdi.WithCSVFiles(opts.csv.files),
386393
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns),
394+
nvcdi.WithCSVCompatContainerRoot(opts.csv.CompatContainerRoot),
387395
nvcdi.WithDisabledHooks(opts.disabledHooks...),
388396
nvcdi.WithEnabledHooks(opts.enabledHooks...),
389397
nvcdi.WithFeatureFlags(opts.featureFlags...),

internal/config/runtime.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ type jitCDIModeConfig struct {
5353

5454
type csvModeConfig struct {
5555
MountSpecPath string `toml:"mount-spec-path"`
56+
// CompatContainerRoot specifies the compat root used when the the standard
57+
// CUDA compat libraries should not be used.
58+
CompatContainerRoot string `toml:"compat-container-root"`
5659
}
5760

5861
type legacyModeConfig struct {

internal/modifier/csv.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
5959
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
6060
nvcdi.WithMode(nvcdi.ModeCSV),
6161
nvcdi.WithCSVFiles(csvFiles),
62+
nvcdi.With(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.CompatContainerRoot),
6263
)
6364
if err != nil {
6465
return nil, fmt.Errorf("failed to construct CDI library: %v", err)

pkg/nvcdi/lib-csv.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,14 @@ import (
3636
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
3737
)
3838

39+
const (
40+
defaultOrinCompatContainerRoot = "/usr/local/cuda/compat-orin"
41+
)
42+
3943
type csvOptions struct {
40-
Files []string
41-
IgnorePatterns []string
44+
Files []string
45+
IgnorePatterns []string
46+
CompatContainerRoot string
4247
}
4348

4449
type csvlib nvcdilib
@@ -52,6 +57,9 @@ func (l *nvcdilib) asCSVLib() *csvlib {
5257
if len(l.csv.Files) == 0 {
5358
l.csv.Files = csv.DefaultFileList()
5459
}
60+
if l.csv.CompatContainerRoot == "" {
61+
l.csv.CompatContainerRoot = defaultOrinCompatContainerRoot
62+
}
5563
return (*csvlib)(l)
5664
}
5765

@@ -481,7 +489,7 @@ func (l *csvlib) cudaCompatDiscoverer() discover.Discover {
481489
// TODO: Should this be overridable through a feature flag / config option?
482490
if strings.Contains(name, "Orin (nvgpu)") {
483491
// TODO: This should probably be a constant or configurable.
484-
cudaCompatContainerRoot = "/usr/local/cuda/compat-orin"
492+
cudaCompatContainerRoot = l.csv.CompatContainerRoot
485493
break
486494
}
487495
}

pkg/nvcdi/lib-csv_test.go

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -63,39 +63,50 @@ func TestDeviceSpecGenerators(t *testing.T) {
6363
infolib: &infoInterfaceMock{
6464
HasNvmlFunc: func() (bool, string) { return true, "forced" },
6565
},
66-
// TODO: Replace this with a system-specific implementation once available.
67-
nvmllib: &mock.Interface{
68-
InitFunc: func() nvml.Return {
69-
return nvml.SUCCESS
70-
},
71-
ShutdownFunc: func() nvml.Return {
72-
return nvml.SUCCESS
73-
},
74-
SystemGetDriverVersionFunc: func() (string, nvml.Return) {
75-
return "540.3.0", nvml.SUCCESS
76-
},
77-
DeviceGetCountFunc: func() (int, nvml.Return) {
78-
return 1, nvml.SUCCESS
66+
nvmllib: mockOrinServer(),
67+
},
68+
expectedDeviceSpecs: []specs.Device{
69+
{
70+
Name: "0",
71+
ContainerEdits: specs.ContainerEdits{
72+
DeviceNodes: []*specs.DeviceNode{
73+
{Path: "/dev/nvidia0", HostPath: "/dev/nvidia0"},
74+
},
7975
},
80-
DeviceGetHandleByIndexFunc: func(n int) (nvml.Device, nvml.Return) {
81-
if n != 0 {
82-
return nil, nvml.ERROR_INVALID_ARGUMENT
83-
}
84-
device := &mock.Device{
85-
GetUUIDFunc: func() (string, nvml.Return) {
86-
return "GPU-orin", nvml.SUCCESS
87-
},
88-
GetNameFunc: func() (string, nvml.Return) {
89-
return "Orin (nvgpu)", nvml.SUCCESS
90-
},
91-
GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
92-
return nvml.PciInfo{}, nvml.ERROR_NOT_SUPPORTED
93-
},
94-
}
95-
return device, nvml.SUCCESS
76+
},
77+
},
78+
expectedCommonEdits: &cdi.ContainerEdits{
79+
ContainerEdits: &specs.ContainerEdits{
80+
Hooks: []*specs.Hook{
81+
{
82+
HookName: "createContainer",
83+
Path: "/usr/bin/nvidia-cdi-hook",
84+
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/usr/local/cuda/compat-orin"},
85+
Env: []string{"NVIDIA_CTK_DEBUG=false"},
86+
},
87+
{
88+
HookName: "createContainer",
89+
Path: "/usr/bin/nvidia-cdi-hook",
90+
Args: []string{"nvidia-cdi-hook", "update-ldcache"},
91+
Env: []string{"NVIDIA_CTK_DEBUG=false"},
92+
},
9693
},
9794
},
9895
},
96+
},
97+
{
98+
description: "single orin CSV device; custom container compat root",
99+
rootfsFolder: "rootfs-orin",
100+
lib: &csvlib{
101+
// test-case specific
102+
infolib: &infoInterfaceMock{
103+
HasNvmlFunc: func() (bool, string) { return true, "forced" },
104+
},
105+
nvmllib: mockOrinServer(),
106+
csv: csvOptions{
107+
CompatContainerRoot: "/another/compat/root",
108+
},
109+
},
99110
expectedDeviceSpecs: []specs.Device{
100111
{
101112
Name: "0",
@@ -112,7 +123,7 @@ func TestDeviceSpecGenerators(t *testing.T) {
112123
{
113124
HookName: "createContainer",
114125
Path: "/usr/bin/nvidia-cdi-hook",
115-
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/usr/local/cuda/compat-orin"},
126+
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/another/compat/root"},
116127
Env: []string{"NVIDIA_CTK_DEBUG=false"},
117128
},
118129
{
@@ -192,6 +203,9 @@ func TestDeviceSpecGenerators(t *testing.T) {
192203
filepath.Join(driverRoot, "/etc/nvidia-container-runtime/host-files-for-container.d/devices.csv"),
193204
filepath.Join(driverRoot, "/etc/nvidia-container-runtime/host-files-for-container.d/drivers.csv"),
194205
}
206+
if tc.lib.csv.CompatContainerRoot == "" {
207+
tc.lib.csv.CompatContainerRoot = defaultOrinCompatContainerRoot
208+
}
195209

196210
t.Run(tc.description, func(t *testing.T) {
197211
generator, err := tc.lib.DeviceSpecGenerators("all")
@@ -230,6 +244,41 @@ func stripRoot[T any](root string, v T) T {
230244
return modified
231245
}
232246

247+
// TODO: We should move this mock to go-nvml/mock
248+
func mockOrinServer() nvml.Interface {
249+
return &mock.Interface{
250+
InitFunc: func() nvml.Return {
251+
return nvml.SUCCESS
252+
},
253+
ShutdownFunc: func() nvml.Return {
254+
return nvml.SUCCESS
255+
},
256+
SystemGetDriverVersionFunc: func() (string, nvml.Return) {
257+
return "540.3.0", nvml.SUCCESS
258+
},
259+
DeviceGetCountFunc: func() (int, nvml.Return) {
260+
return 1, nvml.SUCCESS
261+
},
262+
DeviceGetHandleByIndexFunc: func(n int) (nvml.Device, nvml.Return) {
263+
if n != 0 {
264+
return nil, nvml.ERROR_INVALID_ARGUMENT
265+
}
266+
device := &mock.Device{
267+
GetUUIDFunc: func() (string, nvml.Return) {
268+
return "GPU-orin", nvml.SUCCESS
269+
},
270+
GetNameFunc: func() (string, nvml.Return) {
271+
return "Orin (nvgpu)", nvml.SUCCESS
272+
},
273+
GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
274+
return nvml.PciInfo{}, nvml.ERROR_NOT_SUPPORTED
275+
},
276+
}
277+
return device, nvml.SUCCESS
278+
},
279+
}
280+
}
281+
233282
// TODO: We should move this mock to go-nvml/mock
234283
func mockIGXServer() nvml.Interface {
235284
thor := &mock.Device{

pkg/nvcdi/options.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,14 @@ func WithCSVIgnorePatterns(csvIgnorePatterns []string) Option {
142142
}
143143
}
144144

145+
// WithCSVCompatContainerRoot sets the compat root to use for the container in
146+
// the case of nvgpu-only devices.
147+
func WithCSVCompatContainerRoot(csvCompatContainerRoot string) Option {
148+
return func(o *nvcdilib) {
149+
o.csv.CompatContainerRoot = csvCompatContainerRoot
150+
}
151+
}
152+
145153
// WithConfigSearchPaths sets the search paths for config files.
146154
func WithConfigSearchPaths(paths []string) Option {
147155
return func(o *nvcdilib) {

0 commit comments

Comments
 (0)