Skip to content

Commit 54f487f

Browse files
Adding env var to confirm CDI is enabled
Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent aded8ca commit 54f487f

File tree

3 files changed

+19
-0
lines changed

3 files changed

+19
-0
lines changed

cmd/compute-domain-daemon/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ func newApp() *cli.App {
204204

205205
// Run invokes the IMEX daemon and manages its lifecycle.
206206
func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
207+
// Verify that CDI container edits were applied by the container runtime.
208+
// If the env var is not set to "true", CDI is likely disabled and the daemon
209+
// cannot function correctly (e.g. the /imexd mount will be missing).
210+
if os.Getenv("NVIDIA_CDI_EDITS_APPLIED") != "true" {
211+
return fmt.Errorf("CDI container edits did not apply -- is CDI enabled in your container runtime?")
212+
}
213+
207214
common.StartDebugSignalHandlers()
208215

209216
// Validate feature gate dependencies

cmd/compute-domain-kubelet-plugin/computedomain.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ func (s *ComputeDomainDaemonSettings) GetCDIContainerEditsCommon(ctx context.Con
162162
edits := &cdiapi.ContainerEdits{
163163
ContainerEdits: &cdispec.ContainerEdits{
164164
Env: []string{
165+
// This is a value that the CD daemon checks at startup to verify that CDI edits were applied by the container runtime.
166+
// If the value is not present, CDI is likely disabled and the daemon cannot function correctly (e.g. the /imexd mount will be missing).
167+
"NVIDIA_CDI_EDITS_APPLIED=true",
165168
fmt.Sprintf("CLIQUE_ID=%s", s.manager.cliqueID),
166169
fmt.Sprintf("COMPUTE_DOMAIN_UUID=%s", cd.UID),
167170
fmt.Sprintf("COMPUTE_DOMAIN_NAME=%s", cd.Name),

internal/common/util.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ import (
2929

3030
const dumpPath = "/tmp/goroutine-stacks.dump"
3131

32+
// CDIEditsAppliedEnvKey and CDIEditsAppliedEnvValue is a key/value
33+
// pair injected via CDI container edits into the CD daemon container. The CD
34+
// daemon checks for this key/value pair at startup to verify that CDI edits were
35+
// applied by the container runtime. If not present, CDI is likely disabled.
36+
const (
37+
CDIEditsAppliedEnvKey = "NVIDIA_CDI_EDITS_APPLIED"
38+
CDIEditsAppliedEnvValue = "true"
39+
)
40+
3241
// Set up SIGUSR2 handler: if triggered, acquire stack traces for all goroutines
3342
// in this process. Dump to file, and fall back to emitting to stderr if file
3443
// output didn't work.

0 commit comments

Comments
 (0)