Skip to content

Commit aad82d3

Browse files
elezarcdesiniotis
andcommitted
Add support for drop-in configs
This change adds explicit support for drop-in configs as supported by containerd and cri-o. Signed-off-by: Evan Lezar <elezar@nvidia.com> Co-authored-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
1 parent c88ce54 commit aad82d3

File tree

2 files changed

+215
-36
lines changed

2 files changed

+215
-36
lines changed

controllers/object_controls.go

Lines changed: 91 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,18 @@ import (
5757
const (
5858
// DefaultContainerdConfigFile indicates default config file path for containerd
5959
DefaultContainerdConfigFile = "/etc/containerd/config.toml"
60+
// DefaultContainerdDropInConfigFile indicates default drop-in config file path for containerd
61+
DefaultContainerdDropInConfigFile = "/run/nvidia/toolkit/config/99-nvidia.toml"
6062
// DefaultContainerdSocketFile indicates default containerd socket file
6163
DefaultContainerdSocketFile = "/run/containerd/containerd.sock"
6264
// DefaultDockerConfigFile indicates default config file path for docker
6365
DefaultDockerConfigFile = "/etc/docker/daemon.json"
6466
// DefaultDockerSocketFile indicates default docker socket file
6567
DefaultDockerSocketFile = "/var/run/docker.sock"
66-
// DefaultCRIOConfigFile indicates default config file path for cri-o.
67-
// Note, config files in the drop-in directory, /etc/crio/crio.conf.d,
68-
// have a higher priority than the default /etc/crio/crio.conf file.
69-
DefaultCRIOConfigFile = "/etc/crio/crio.conf.d/99-nvidia.conf"
68+
// DefaultCRIOConfigFile indicates default config file path for cri-o. .
69+
DefaultCRIOConfigFile = "/etc/crio/config.toml"
70+
// DefaultCRIODropInConfigFile indicates the default path to the drop-in config file for cri-o
71+
DefaultCRIODropInConfigFile = "/etc/crio/crio.conf.d/99-nvidia.conf"
7072
// TrustedCAConfigMapName indicates configmap with custom user CA injected
7173
TrustedCAConfigMapName = "gpu-operator-trusted-ca"
7274
// TrustedCABundleFileName indicates custom user ca certificate filename
@@ -95,6 +97,8 @@ const (
9597
DefaultRuntimeSocketTargetDir = "/runtime/sock-dir/"
9698
// DefaultRuntimeConfigTargetDir represents target directory where runtime socket dirctory will be mounted
9799
DefaultRuntimeConfigTargetDir = "/runtime/config-dir/"
100+
// DefaultRuntimeDropInConfigTargetDir represents target directory where drop-in config directory will be mounted
101+
DefaultRuntimeDropInConfigTargetDir = "/runtime/config-dir.d/"
98102
// ValidatorImageEnvName indicates env name for validator image passed
99103
ValidatorImageEnvName = "VALIDATOR_IMAGE"
100104
// ValidatorImagePullPolicyEnvName indicates env name for validator image pull policy passed
@@ -1327,32 +1331,56 @@ func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec,
13271331
setContainerEnv(mainContainer, "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config))
13281332
}
13291333

1334+
// For runtime config files we have top-level configs and drop-in files.
1335+
// These are supported as follows:
1336+
// * Docker only supports top-level config files.
1337+
// * Containerd supports drop-in files, but required modification to the top-level config
1338+
// * Crio supports drop-in files at a predefined location. The top-level config may be read
1339+
// but should not be updated.
1340+
13301341
// setup mounts for runtime config file
1331-
runtimeConfigFile, err := getRuntimeConfigFile(mainContainer, runtime)
1342+
runtimeConfigFiles, err := getRuntimeConfigFiles(mainContainer, runtime)
13321343
if err != nil {
13331344
return fmt.Errorf("error getting path to runtime config file: %v", err)
13341345
}
1335-
sourceConfigFileName := path.Base(runtimeConfigFile)
13361346

1337-
var configEnvvarName string
1338-
switch runtime {
1339-
case gpuv1.Containerd.String():
1340-
configEnvvarName = "CONTAINERD_CONFIG"
1341-
case gpuv1.Docker.String():
1342-
configEnvvarName = "DOCKER_CONFIG"
1343-
case gpuv1.CRIO.String():
1344-
configEnvvarName = "CRIO_CONFIG"
1347+
// Handle the top-level configs
1348+
if runtimeConfigFiles.topLevelConfigFile != "" {
1349+
sourceConfigFileName := path.Base(runtimeConfigFiles.topLevelConfigFile)
1350+
sourceConfigDir := path.Dir(runtimeConfigFiles.topLevelConfigFile)
1351+
containerConfigDir := DefaultRuntimeConfigTargetDir
1352+
setContainerEnv(mainContainer, "RUNTIME_CONFIG", containerConfigDir+sourceConfigFileName)
1353+
setContainerEnv(mainContainer, runtimeConfigFiles.envvarName, containerConfigDir+sourceConfigFileName)
1354+
1355+
volMountConfigName := fmt.Sprintf("%s-config", runtime)
1356+
volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: containerConfigDir}
1357+
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig)
1358+
1359+
configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: sourceConfigDir, Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}}
1360+
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol)
13451361
}
13461362

1347-
setContainerEnv(mainContainer, "RUNTIME_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName)
1348-
setContainerEnv(mainContainer, configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName)
1363+
// Handle the drop-in configs
1364+
// TODO: It's a bit of a hack to skip the `nvidia-kata-manager` container here.
1365+
// Ideally if the two projects are using the SAME API then this should be
1366+
// captured more rigorously.
1367+
// Note that we probably want to implement drop-in file support in the
1368+
// kata manager in any case -- in which case it will be good to use a
1369+
// similar implementation.
1370+
if runtimeConfigFiles.dropInConfigFile != "" && containerName != "nvidia-kata-manager" {
1371+
sourceConfigFileName := path.Base(runtimeConfigFiles.dropInConfigFile)
1372+
sourceConfigDir := path.Dir(runtimeConfigFiles.dropInConfigFile)
1373+
containerConfigDir := DefaultRuntimeDropInConfigTargetDir
1374+
setContainerEnv(mainContainer, "RUNTIME_DROP_IN_CONFIG", containerConfigDir+sourceConfigFileName)
1375+
setContainerEnv(mainContainer, "RUNTIME_DROP_IN_CONFIG_HOST_PATH", runtimeConfigFiles.dropInConfigFile)
13491376

1350-
volMountConfigName := fmt.Sprintf("%s-config", runtime)
1351-
volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir}
1352-
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig)
1377+
volMountConfigName := fmt.Sprintf("%s-drop-in-config", runtime)
1378+
volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: containerConfigDir}
1379+
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig)
13531380

1354-
configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}}
1355-
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol)
1381+
configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: sourceConfigDir, Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}}
1382+
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol)
1383+
}
13561384

13571385
// setup mounts for runtime socket file
13581386
runtimeSocketFile, err := getRuntimeSocketFile(mainContainer, runtime)
@@ -2357,30 +2385,60 @@ func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol
23572385
return nil
23582386
}
23592387

2388+
type runtimeConfigFiles struct {
2389+
envvarName string
2390+
topLevelConfigFile string
2391+
dropInConfigFile string
2392+
}
2393+
23602394
// get runtime(docker, containerd) config file path based on toolkit container env or default
2361-
func getRuntimeConfigFile(c *corev1.Container, runtime string) (string, error) {
2362-
var runtimeConfigFile string
2395+
func getRuntimeConfigFiles(c *corev1.Container, runtime string) (runtimeConfigFiles, error) {
23632396
switch runtime {
23642397
case gpuv1.Docker.String():
2365-
runtimeConfigFile = DefaultDockerConfigFile
2398+
topLevelConfigFile := DefaultDockerConfigFile
23662399
if value := getContainerEnv(c, "DOCKER_CONFIG"); value != "" {
2367-
runtimeConfigFile = value
2368-
}
2400+
topLevelConfigFile = value
2401+
}
2402+
return runtimeConfigFiles{
2403+
topLevelConfigFile: topLevelConfigFile,
2404+
// Docker does not support drop-in files.
2405+
dropInConfigFile: "",
2406+
envvarName: "DOCKER_CONFIG",
2407+
}, nil
23692408
case gpuv1.Containerd.String():
2370-
runtimeConfigFile = DefaultContainerdConfigFile
2409+
topLevelConfigFile := DefaultContainerdConfigFile
2410+
// TODO: We should also read RUNTIME_CONFIG here
23712411
if value := getContainerEnv(c, "CONTAINERD_CONFIG"); value != "" {
2372-
runtimeConfigFile = value
2412+
topLevelConfigFile = value
2413+
}
2414+
dropInConfigFile := DefaultContainerdDropInConfigFile
2415+
if value := getContainerEnv(c, "RUNTIME_DROP_IN_CONFIG"); value != "" {
2416+
dropInConfigFile = value
23732417
}
2418+
return runtimeConfigFiles{
2419+
topLevelConfigFile: topLevelConfigFile,
2420+
dropInConfigFile: dropInConfigFile,
2421+
envvarName: "CONTAINERD_CONFIG",
2422+
}, nil
23742423
case gpuv1.CRIO.String():
2375-
runtimeConfigFile = DefaultCRIOConfigFile
2424+
// TODO: We should still allow the top-level config to be specified
2425+
// TODO: We should also read RUNTIME_CONFIG here
2426+
topLevelConfigFile := DefaultCRIOConfigFile
23762427
if value := getContainerEnv(c, "CRIO_CONFIG"); value != "" {
2377-
runtimeConfigFile = value
2428+
topLevelConfigFile = value
23782429
}
2430+
dropInConfigFile := DefaultCRIODropInConfigFile
2431+
if value := getContainerEnv(c, "RUNTIME_DROP_IN_CONFIG"); value != "" {
2432+
dropInConfigFile = value
2433+
}
2434+
return runtimeConfigFiles{
2435+
topLevelConfigFile: topLevelConfigFile,
2436+
dropInConfigFile: dropInConfigFile,
2437+
envvarName: "CRIO_CONFIG",
2438+
}, nil
23792439
default:
2380-
return "", fmt.Errorf("invalid runtime: %s", runtime)
2440+
return runtimeConfigFiles{}, fmt.Errorf("invalid runtime: %s", runtime)
23812441
}
2382-
2383-
return runtimeConfigFile, nil
23842442
}
23852443

23862444
// get runtime(docker, containerd) socket file path based on toolkit container env or default

controllers/transforms_test.go

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ func TestTransformForRuntime(t *testing.T) {
326326
WithContainer(corev1.Container{Name: "test-ctr"}),
327327
expectedOutput: NewDaemonset().
328328
WithHostPathVolume("containerd-config", filepath.Dir(DefaultContainerdConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)).
329+
WithHostPathVolume("containerd-drop-in-config", "/run/nvidia/toolkit/config", newHostPathType(corev1.HostPathDirectoryOrCreate)).
329330
WithHostPathVolume("containerd-socket", filepath.Dir(DefaultContainerdSocketFile), nil).
330331
WithContainer(corev1.Container{
331332
Name: "test-ctr",
@@ -334,11 +335,14 @@ func TestTransformForRuntime(t *testing.T) {
334335
{Name: "CONTAINERD_RUNTIME_CLASS", Value: DefaultRuntimeClass},
335336
{Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))},
336337
{Name: "CONTAINERD_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))},
338+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"},
339+
{Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/run/nvidia/toolkit/config/99-nvidia.toml"},
337340
{Name: "RUNTIME_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))},
338341
{Name: "CONTAINERD_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))},
339342
},
340343
VolumeMounts: []corev1.VolumeMount{
341344
{Name: "containerd-config", MountPath: DefaultRuntimeConfigTargetDir},
345+
{Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"},
342346
{Name: "containerd-socket", MountPath: DefaultRuntimeSocketTargetDir},
343347
},
344348
}),
@@ -348,16 +352,20 @@ func TestTransformForRuntime(t *testing.T) {
348352
runtime: gpuv1.CRIO,
349353
input: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}),
350354
expectedOutput: NewDaemonset().
351-
WithHostPathVolume("crio-config", filepath.Dir(DefaultCRIOConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)).
355+
WithHostPathVolume("crio-config", "/etc/crio", newHostPathType(corev1.HostPathDirectoryOrCreate)).
356+
WithHostPathVolume("crio-drop-in-config", "/etc/crio/crio.conf.d", newHostPathType(corev1.HostPathDirectoryOrCreate)).
352357
WithContainer(corev1.Container{
353358
Name: "test-ctr",
354359
Env: []corev1.EnvVar{
355360
{Name: "RUNTIME", Value: gpuv1.CRIO.String()},
356-
{Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))},
357-
{Name: "CRIO_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))},
361+
{Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"},
362+
{Name: "CRIO_CONFIG", Value: "/runtime/config-dir/config.toml"},
363+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.conf"},
364+
{Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/etc/crio/crio.conf.d/99-nvidia.conf"},
358365
},
359366
VolumeMounts: []corev1.VolumeMount{
360367
{Name: "crio-config", MountPath: DefaultRuntimeConfigTargetDir},
368+
{Name: "crio-drop-in-config", MountPath: "/runtime/config-dir.d/"},
361369
},
362370
}),
363371
},
@@ -647,15 +655,19 @@ func TestTransformToolkit(t *testing.T) {
647655
{Name: "CONTAINERD_RUNTIME_CLASS", Value: "nvidia"},
648656
{Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"},
649657
{Name: "CONTAINERD_CONFIG", Value: "/runtime/config-dir/config.toml"},
658+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"},
659+
{Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/run/nvidia/toolkit/config/99-nvidia.toml"},
650660
{Name: "RUNTIME_SOCKET", Value: "/runtime/sock-dir/containerd.sock"},
651661
{Name: "CONTAINERD_SOCKET", Value: "/runtime/sock-dir/containerd.sock"},
652662
},
653663
VolumeMounts: []corev1.VolumeMount{
654664
{Name: "containerd-config", MountPath: "/runtime/config-dir/"},
665+
{Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"},
655666
{Name: "containerd-socket", MountPath: "/runtime/sock-dir/"},
656667
},
657668
}).
658669
WithHostPathVolume("containerd-config", "/etc/containerd", newHostPathType(corev1.HostPathDirectoryOrCreate)).
670+
WithHostPathVolume("containerd-drop-in-config", "/run/nvidia/toolkit/config", newHostPathType(corev1.HostPathDirectoryOrCreate)).
659671
WithHostPathVolume("containerd-socket", "/run/containerd", nil).
660672
WithPullSecret("pull-secret"),
661673
},
@@ -718,14 +730,18 @@ func TestTransformToolkit(t *testing.T) {
718730
{Name: "CONTAINERD_SET_AS_DEFAULT", Value: "true"},
719731
{Name: "RUNTIME", Value: "containerd"},
720732
{Name: "RUNTIME_CONFIG", Value: "/runtime/config-dir/config.toml"},
733+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/runtime/config-dir.d/99-nvidia.toml"},
734+
{Name: "RUNTIME_DROP_IN_CONFIG_HOST_PATH", Value: "/run/nvidia/toolkit/config/99-nvidia.toml"},
721735
{Name: "RUNTIME_SOCKET", Value: "/runtime/sock-dir/containerd.sock"},
722736
},
723737
VolumeMounts: []corev1.VolumeMount{
724738
{Name: "containerd-config", MountPath: "/runtime/config-dir/"},
739+
{Name: "containerd-drop-in-config", MountPath: "/runtime/config-dir.d/"},
725740
{Name: "containerd-socket", MountPath: "/runtime/sock-dir/"},
726741
},
727742
}).
728743
WithHostPathVolume("containerd-config", "/var/lib/rancher/k3s/agent/etc/containerd", newHostPathType(corev1.HostPathDirectoryOrCreate)).
744+
WithHostPathVolume("containerd-drop-in-config", "/run/nvidia/toolkit/config", newHostPathType(corev1.HostPathDirectoryOrCreate)).
729745
WithHostPathVolume("containerd-socket", "/run/k3s/containerd", nil).
730746
WithPullSecret("pull-secret"),
731747
},
@@ -1821,3 +1837,108 @@ func TestTransformDriver(t *testing.T) {
18211837
})
18221838
}
18231839
}
1840+
1841+
func TestGetRuntimeConfigFiles(t *testing.T) {
1842+
testCases := []struct {
1843+
description string
1844+
container corev1.Container
1845+
runtime string
1846+
expectedRuntimeConfigFiles runtimeConfigFiles
1847+
errorExpected bool
1848+
}{
1849+
{
1850+
description: "invalid runtime",
1851+
container: corev1.Container{},
1852+
runtime: "foo",
1853+
expectedRuntimeConfigFiles: runtimeConfigFiles{},
1854+
errorExpected: true,
1855+
},
1856+
{
1857+
description: "docker",
1858+
container: corev1.Container{},
1859+
runtime: gpuv1.Docker.String(),
1860+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1861+
topLevelConfigFile: DefaultDockerConfigFile,
1862+
dropInConfigFile: "",
1863+
envvarName: "DOCKER_CONFIG",
1864+
},
1865+
},
1866+
{
1867+
description: "docker, config path overridden",
1868+
container: corev1.Container{
1869+
Env: []corev1.EnvVar{
1870+
{Name: "DOCKER_CONFIG", Value: "/path/to/docker/daemon.json"},
1871+
},
1872+
},
1873+
runtime: gpuv1.Docker.String(),
1874+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1875+
topLevelConfigFile: "/path/to/docker/daemon.json",
1876+
dropInConfigFile: "",
1877+
envvarName: "DOCKER_CONFIG",
1878+
},
1879+
},
1880+
{
1881+
description: "containerd",
1882+
container: corev1.Container{},
1883+
runtime: gpuv1.Containerd.String(),
1884+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1885+
topLevelConfigFile: DefaultContainerdConfigFile,
1886+
dropInConfigFile: DefaultContainerdDropInConfigFile,
1887+
envvarName: "CONTAINERD_CONFIG",
1888+
},
1889+
},
1890+
{
1891+
description: "containerd, config path overridden",
1892+
container: corev1.Container{
1893+
Env: []corev1.EnvVar{
1894+
{Name: "CONTAINERD_CONFIG", Value: "/path/to/containerd/config.toml"},
1895+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/containerd/drop-in/config.toml"},
1896+
},
1897+
},
1898+
runtime: gpuv1.Containerd.String(),
1899+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1900+
topLevelConfigFile: "/path/to/containerd/config.toml",
1901+
dropInConfigFile: "/path/to/containerd/drop-in/config.toml",
1902+
envvarName: "CONTAINERD_CONFIG",
1903+
},
1904+
},
1905+
{
1906+
description: "crio",
1907+
container: corev1.Container{},
1908+
runtime: gpuv1.CRIO.String(),
1909+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1910+
topLevelConfigFile: DefaultCRIOConfigFile,
1911+
dropInConfigFile: DefaultCRIODropInConfigFile,
1912+
envvarName: "CRIO_CONFIG",
1913+
},
1914+
},
1915+
{
1916+
description: "crio, config path overridden",
1917+
container: corev1.Container{
1918+
Env: []corev1.EnvVar{
1919+
{Name: "CRIO_CONFIG", Value: "/path/to/crio/config.toml"},
1920+
{Name: "RUNTIME_DROP_IN_CONFIG", Value: "/path/to/crio/drop-in/config.toml"},
1921+
},
1922+
},
1923+
runtime: gpuv1.CRIO.String(),
1924+
expectedRuntimeConfigFiles: runtimeConfigFiles{
1925+
topLevelConfigFile: "/path/to/crio/config.toml",
1926+
dropInConfigFile: "/path/to/crio/drop-in/config.toml",
1927+
envvarName: "CRIO_CONFIG",
1928+
},
1929+
},
1930+
}
1931+
1932+
for _, tc := range testCases {
1933+
t.Run(tc.description, func(t *testing.T) {
1934+
runtimeConfigFiles, err := getRuntimeConfigFiles(&tc.container, tc.runtime)
1935+
if tc.errorExpected {
1936+
require.Error(t, err)
1937+
return
1938+
}
1939+
require.NoError(t, err)
1940+
require.EqualValues(t, tc.expectedRuntimeConfigFiles, runtimeConfigFiles)
1941+
})
1942+
}
1943+
1944+
}

0 commit comments

Comments
 (0)