@@ -23,11 +23,16 @@ import (
2323const (
2424 CCModeON CCMode = "ON"
2525 CCModeOFF CCMode = "OFF"
26+ CCModeDevTools CCMode = "DEVTOOLS"
2627 installerContainerID = "tee-gpu-driver-installer-container"
2728 installerSnapshotID = "tee-gpu-driver-installer-snapshot"
2829)
2930
3031var supportedCGPUTypes = []deviceinfo.GPUType {
32+ deviceinfo .L4 ,
33+ deviceinfo .T4 ,
34+ deviceinfo .A100_40GB ,
35+ deviceinfo .A100_80GB ,
3136 deviceinfo .H100 ,
3237}
3338
@@ -36,7 +41,7 @@ type CCMode string
3641
3742func (ccm CCMode ) isValid () error {
3843 switch ccm {
39- case CCModeOFF , CCModeON :
44+ case CCModeOFF , CCModeON , CCModeDevTools :
4045 return nil
4146 }
4247 return fmt .Errorf ("invalid gpu cc mode: %s" , ccm )
@@ -74,11 +79,11 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
7479 }
7580
7681 if ! gpuType .OpenSupported () {
77- return fmt .Errorf ("unsupported GPU type %s, please retry with one of the supported confidential GPU types: %v" , gpuType .String (), supportedCGPUTypes )
82+ return fmt .Errorf ("unsupported open sourced kernel modules for GPU type %s, please retry with one of the supported GPU types: %v" , gpuType .String (), supportedCGPUTypes )
7883 }
7984
8085 ctx = namespaces .WithNamespace (ctx , namespaces .Default )
81- installerImageRef , err := getInstallerImageReference ()
86+ installerImageRef , err := getInstallerImageReference (InstallerImageRefFile )
8287 if err != nil {
8388 di .logger .Error (fmt .Sprintf ("failed to get the installer container image reference: %v" , err ))
8489 return err
@@ -90,8 +95,7 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
9095 return fmt .Errorf ("failed to pull installer image: %v" , err )
9196 }
9297
93- installerDigest := image .Target ().Digest .String ()
94- if err := verifyInstallerImageDigest (installerDigest ); err != nil {
98+ if err := verifyInstallerImageDigest (image , InstallerImageDigestFile ); err != nil {
9599 return err
96100 }
97101
@@ -178,25 +182,27 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
178182 if err = setGPUStateToReady (); err != nil {
179183 return fmt .Errorf ("failed to set the GPU state to ready: %v" , err )
180184 }
181- } else {
182- return fmt .Errorf ("confidential compute is not enabled for the gpu type %s" , gpuType )
183185 }
184186
185187 di .logger .Info ("GPU driver installation completed successfully" )
186188 return nil
187189}
188190
189- func getInstallerImageReference () (string , error ) {
190- imageRefBytes , err := os .ReadFile (InstallerImageRefFile )
191+ func getInstallerImageReference (installerImageRefFile string ) (string , error ) {
192+ imageRefBytes , err := os .ReadFile (installerImageRefFile )
191193 if err != nil {
192194 return "" , fmt .Errorf ("failed to get the cos-gpu-installer version: %v" , err )
193195 }
194196 installerImageRef := strings .TrimSpace (string (imageRefBytes ))
197+ if len (installerImageRef ) < 1 {
198+ return "" , fmt .Errorf ("empty value of cos-gpu-installer image reference" )
199+ }
195200 return installerImageRef , nil
196201}
197202
198- func verifyInstallerImageDigest (installerDigest string ) error {
199- imageDigestBytes , err := os .ReadFile (InstallerImageDigestFile )
203+ func verifyInstallerImageDigest (image containerd.Image , referenceDigestFile string ) error {
204+ installerDigest := image .Target ().Digest .String ()
205+ imageDigestBytes , err := os .ReadFile (referenceDigestFile )
200206 if err != nil {
201207 return fmt .Errorf ("failed to get the cos-gpu-installer image digest: %v" , err )
202208 }
@@ -239,16 +245,16 @@ func setGPUStateToReady() error {
239245}
240246
241247func isGPUCCModeEnabled () (bool , error ) {
242- ccMode , err := GetGPUCCMode ()
248+ ccMode , err := QueryCCMode ()
243249 if err != nil {
244250 return false , err
245251 }
246252 return ccMode == CCModeON , nil
247253}
248254
249- // GetGPUCCMode executes nvidia-smi to determine the current Confidential Computing (CC) mode status of the GPU.
250- // It returns the CC mode ("ON" or "OFF") and an error if the command fails or if the output cannot be parsed .
251- func GetGPUCCMode () (CCMode , error ) {
255+ // QueryCCMode executes nvidia-smi to determine the current Confidential Computing (CC) mode status of the GPU.
256+ // If DEVTOOLS mode is enabled, it would override CC mode as DEVTOOLS. DEVTOOLS mode would be enabled only when CC mode is ON .
257+ func QueryCCMode () (CCMode , error ) {
252258 // Run nvidia-smi conf-compute command to get the confidential computing mode status.
253259 nvidiaSmiCmd := fmt .Sprintf ("%s/bin/nvidia-smi" , InstallationHostDir )
254260 ccModeOutput , err := exec .Command (nvidiaSmiCmd , "conf-compute" , "-f" ).Output ()
@@ -259,7 +265,17 @@ func GetGPUCCMode() (CCMode, error) {
259265 if err != nil {
260266 return "" , err
261267 }
262- return CCMode (ccMode ), nil
268+
269+ devToolsEnabled , err := isDevToolsModeEnabled ()
270+ if err != nil {
271+ return "" , err
272+ }
273+
274+ if devToolsEnabled {
275+ ccMode = CCModeDevTools
276+ }
277+
278+ return ccMode , nil
263279}
264280
265281func parseCCStatus (output string ) (CCMode , error ) {
@@ -276,6 +292,21 @@ func parseCCStatus(output string) (CCMode, error) {
276292 return ccMode , nil
277293}
278294
295+ func isDevToolsModeEnabled () (bool , error ) {
296+ nvidiaSmiCmd := fmt .Sprintf ("%s/bin/nvidia-smi" , InstallationHostDir )
297+ output , err := exec .Command (nvidiaSmiCmd , "conf-compute" , "-d" ).Output ()
298+ if err != nil {
299+ return false , err
300+ }
301+ re := regexp .MustCompile (`DevTools Mode:\s*(ON|OFF)` )
302+ match := re .FindStringSubmatch (string (output ))
303+
304+ if len (match ) < 2 {
305+ return false , fmt .Errorf ("DevTools mode not found in output: %s" , output )
306+ }
307+ return match [1 ] == "ON" , nil
308+ }
309+
279310func launchNvidiaPersistencedProcess (logger logging.Logger ) error {
280311 nvidiaPersistencedCmd := fmt .Sprintf ("%s/bin/nvidia-persistenced" , InstallationHostDir )
281312 logger .Info ("Starting nvidia-persistenced process" )
0 commit comments