Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions cmd/nvidia-ctk/cdi/generate/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ type options struct {
featureFlags []string

csv struct {
files []string
ignorePatterns []string
files []string
ignorePatterns []string
CompatContainerRoot string
}

noAllDevice bool
Expand Down Expand Up @@ -212,6 +213,12 @@ func (m command) build() *cli.Command {
Destination: &opts.csv.ignorePatterns,
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"),
},
&cli.StringFlag{
Name: "csv.compat-container-root",
Usage: "specify the container folder to use for CUDA Forward Compatibility in non-standard containers",
Destination: &opts.csv.CompatContainerRoot,
Sources: cli.EnvVars("NVIDIA_CTK_CDI_GENERATE_CSV_CONTAINER_COMPAT_ROOT"),
},
&cli.StringSliceFlag{
Name: "disable-hook",
Aliases: []string{"disable-hooks"},
Expand Down Expand Up @@ -384,6 +391,7 @@ func (m command) generateSpecs(opts *options) ([]generatedSpecs, error) {
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths),
nvcdi.WithCSVFiles(opts.csv.files),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns),
nvcdi.WithCSVCompatContainerRoot(opts.csv.CompatContainerRoot),
nvcdi.WithDisabledHooks(opts.disabledHooks...),
nvcdi.WithEnabledHooks(opts.enabledHooks...),
nvcdi.WithFeatureFlags(opts.featureFlags...),
Expand Down
3 changes: 3 additions & 0 deletions internal/config/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ type jitCDIModeConfig struct {

type csvModeConfig struct {
MountSpecPath string `toml:"mount-spec-path"`
// CompatContainerRoot specifies the compat root used when the the standard
// CUDA compat libraries should not be used.
CompatContainerRoot string `toml:"compat-container-root,omitempty"`
}

type legacyModeConfig struct {
Expand Down
1 change: 1 addition & 0 deletions internal/modifier/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithMode(nvcdi.ModeCSV),
nvcdi.WithCSVFiles(csvFiles),
nvcdi.WithCSVCompatContainerRoot(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.CompatContainerRoot),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI library: %v", err)
Expand Down
32 changes: 27 additions & 5 deletions pkg/nvcdi/lib-csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,36 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
)

type csvlib nvcdilib
const (
defaultOrinCompatContainerRoot = "/usr/local/cuda/compat-orin"
)

type csvOptions struct {
Files []string
IgnorePatterns []string
CompatContainerRoot string
}

type csvlib nvcdilib
type mixedcsvlib nvcdilib

var _ deviceSpecGeneratorFactory = (*csvlib)(nil)

// asCSVLib sets any CSV-specific defaults and casts the nvcdilib instance as a
// *csvlib.
func (l *nvcdilib) asCSVLib() *csvlib {
if len(l.csv.Files) == 0 {
l.csv.Files = csv.DefaultFileList()
}
if l.csv.CompatContainerRoot == "" {
l.csv.CompatContainerRoot = defaultOrinCompatContainerRoot
}
return (*csvlib)(l)
}

// DeviceSpecGenerators creates a set of generators for the specified set of
// devices.
// If NVML is not available or the disable-multiple-csv-devices feature flag is
Expand Down Expand Up @@ -171,7 +193,7 @@ func (l *csvDeviceGenerator) deviceNodeDiscoverer() (discover.Discover, error) {

func (l *csvDeviceGenerator) deviceNodeMountSpecs() tegra.MountSpecPathsByTyper {
mountSpecs := tegra.Transform(
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
tegra.MountSpecsFromCSVFiles(l.logger, l.csv.Files...),
// We remove non-device nodes.
tegra.OnlyDeviceNodes(),
)
Expand Down Expand Up @@ -388,10 +410,10 @@ func isIntegratedGPU(d nvml.Device) (bool, error) {
func (l *csvlib) driverDiscoverer() (discover.Discover, error) {
mountSpecs := tegra.Transform(
tegra.Transform(
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
tegra.MountSpecsFromCSVFiles(l.logger, l.csv.Files...),
tegra.WithoutDeviceNodes(),
),
tegra.IgnoreSymlinkMountSpecsByPattern(l.csvIgnorePatterns...),
tegra.IgnoreSymlinkMountSpecsByPattern(l.csv.IgnorePatterns...),
)
driverDiscoverer, err := tegra.New(
tegra.WithLogger(l.logger),
Expand Down Expand Up @@ -467,7 +489,7 @@ func (l *csvlib) cudaCompatDiscoverer() discover.Discover {
// TODO: Should this be overridable through a feature flag / config option?
if strings.Contains(name, "Orin (nvgpu)") {
// TODO: This should probably be a constant or configurable.
cudaCompatContainerRoot = "/usr/local/cuda/compat-orin"
cudaCompatContainerRoot = l.csv.CompatContainerRoot
break
}
}
Expand Down
111 changes: 80 additions & 31 deletions pkg/nvcdi/lib-csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,39 +63,50 @@ func TestDeviceSpecGenerators(t *testing.T) {
infolib: &infoInterfaceMock{
HasNvmlFunc: func() (bool, string) { return true, "forced" },
},
// TODO: Replace this with a system-specific implementation once available.
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
ShutdownFunc: func() nvml.Return {
return nvml.SUCCESS
},
SystemGetDriverVersionFunc: func() (string, nvml.Return) {
return "540.3.0", nvml.SUCCESS
},
DeviceGetCountFunc: func() (int, nvml.Return) {
return 1, nvml.SUCCESS
nvmllib: mockOrinServer(),
},
expectedDeviceSpecs: []specs.Device{
{
Name: "0",
ContainerEdits: specs.ContainerEdits{
DeviceNodes: []*specs.DeviceNode{
{Path: "/dev/nvidia0", HostPath: "/dev/nvidia0"},
},
},
DeviceGetHandleByIndexFunc: func(n int) (nvml.Device, nvml.Return) {
if n != 0 {
return nil, nvml.ERROR_INVALID_ARGUMENT
}
device := &mock.Device{
GetUUIDFunc: func() (string, nvml.Return) {
return "GPU-orin", nvml.SUCCESS
},
GetNameFunc: func() (string, nvml.Return) {
return "Orin (nvgpu)", nvml.SUCCESS
},
GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
return nvml.PciInfo{}, nvml.ERROR_NOT_SUPPORTED
},
}
return device, nvml.SUCCESS
},
},
expectedCommonEdits: &cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Hooks: []*specs.Hook{
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/usr/local/cuda/compat-orin"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "update-ldcache"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
},
},
},
},
{
description: "single orin CSV device; custom container compat root",
rootfsFolder: "rootfs-orin",
lib: &csvlib{
// test-case specific
infolib: &infoInterfaceMock{
HasNvmlFunc: func() (bool, string) { return true, "forced" },
},
nvmllib: mockOrinServer(),
csv: csvOptions{
CompatContainerRoot: "/another/compat/root",
},
},
expectedDeviceSpecs: []specs.Device{
{
Name: "0",
Expand All @@ -112,7 +123,7 @@ func TestDeviceSpecGenerators(t *testing.T) {
{
HookName: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/usr/local/cuda/compat-orin"},
Args: []string{"nvidia-cdi-hook", "enable-cuda-compat", "--host-driver-version=540.3.0", "--cuda-compat-container-root=/another/compat/root"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
Expand Down Expand Up @@ -188,10 +199,13 @@ func TestDeviceSpecGenerators(t *testing.T) {

tc.lib.driverRoot = driverRoot
tc.lib.devRoot = driverRoot
tc.lib.csvFiles = []string{
tc.lib.csv.Files = []string{
filepath.Join(driverRoot, "/etc/nvidia-container-runtime/host-files-for-container.d/devices.csv"),
filepath.Join(driverRoot, "/etc/nvidia-container-runtime/host-files-for-container.d/drivers.csv"),
}
if tc.lib.csv.CompatContainerRoot == "" {
tc.lib.csv.CompatContainerRoot = defaultOrinCompatContainerRoot
}

t.Run(tc.description, func(t *testing.T) {
generator, err := tc.lib.DeviceSpecGenerators("all")
Expand Down Expand Up @@ -230,6 +244,41 @@ func stripRoot[T any](root string, v T) T {
return modified
}

// TODO: We should move this mock to go-nvml/mock
func mockOrinServer() nvml.Interface {
return &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
ShutdownFunc: func() nvml.Return {
return nvml.SUCCESS
},
SystemGetDriverVersionFunc: func() (string, nvml.Return) {
return "540.3.0", nvml.SUCCESS
},
DeviceGetCountFunc: func() (int, nvml.Return) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(n int) (nvml.Device, nvml.Return) {
if n != 0 {
return nil, nvml.ERROR_INVALID_ARGUMENT
}
device := &mock.Device{
GetUUIDFunc: func() (string, nvml.Return) {
return "GPU-orin", nvml.SUCCESS
},
GetNameFunc: func() (string, nvml.Return) {
return "Orin (nvgpu)", nvml.SUCCESS
},
GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
return nvml.PciInfo{}, nvml.ERROR_NOT_SUPPORTED
},
}
return device, nvml.SUCCESS
},
}
}

// TODO: We should move this mock to go-nvml/mock
func mockIGXServer() nvml.Interface {
thor := &mock.Device{
Expand Down
9 changes: 2 additions & 7 deletions pkg/nvcdi/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
)

Expand All @@ -45,8 +44,7 @@ type nvcdilib struct {
configSearchPaths []string
librarySearchPaths []string

csvFiles []string
csvIgnorePatterns []string
csv csvOptions

vendor string
class string
Expand Down Expand Up @@ -115,10 +113,7 @@ func New(opts ...Option) (Interface, error) {
var factory deviceSpecGeneratorFactory
switch l.resolveMode() {
case ModeCSV:
if len(l.csvFiles) == 0 {
l.csvFiles = csv.DefaultFileList()
}
factory = (*csvlib)(l)
factory = l.asCSVLib()
case ModeManagement:
if l.vendor == "" {
l.vendor = "management.nvidia.com"
Expand Down
12 changes: 10 additions & 2 deletions pkg/nvcdi/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,22 @@ func WithMergedDeviceOptions(opts ...transform.MergedDeviceOption) Option {
// WithCSVFiles sets the CSV files for the library
func WithCSVFiles(csvFiles []string) Option {
return func(o *nvcdilib) {
o.csvFiles = csvFiles
o.csv.Files = csvFiles
}
}

// WithCSVIgnorePatterns sets the ignore patterns for entries in the CSV files.
func WithCSVIgnorePatterns(csvIgnorePatterns []string) Option {
return func(o *nvcdilib) {
o.csvIgnorePatterns = csvIgnorePatterns
o.csv.IgnorePatterns = csvIgnorePatterns
}
}

// WithCSVCompatContainerRoot sets the compat root to use for the container in
// the case of nvgpu-only devices.
func WithCSVCompatContainerRoot(csvCompatContainerRoot string) Option {
return func(o *nvcdilib) {
o.csv.CompatContainerRoot = csvCompatContainerRoot
}
}

Expand Down
Loading