Skip to content

Commit 7a5c0e3

Browse files
committed
Add cos_gpu_installer digest verification
1 parent e9e8a4f commit 7a5c0e3

File tree

7 files changed

+225
-27
lines changed

7 files changed

+225
-27
lines changed

launcher/container_runner.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func NewRunner(ctx context.Context, cdClient *containerd.Client, token oauth2.To
169169
specOpts = append(specOpts, oci.WithDevShmSize(launchSpec.DevShmSize))
170170
}
171171

172-
if launchSpec.Experiments.EnableGpuDriverInstallation && launchSpec.InstallGpuDriver {
172+
if launchSpec.Experiments.EnableConfidentialGPUSupport && launchSpec.InstallGpuDriver {
173173
gpuMounts := []specs.Mount{
174174
{
175175
Type: "volume",
@@ -341,6 +341,12 @@ func (r *ContainerRunner) measureCELEvents(ctx context.Context) error {
341341
return fmt.Errorf("failed to measure memory monitoring state: %v", err)
342342
}
343343

344+
if r.launchSpec.Experiments.EnableConfidentialGPUSupport && r.launchSpec.InstallGpuDriver {
345+
if err := r.measureGPUCCMode(); err != nil {
346+
return fmt.Errorf("failed to measure gpu cc mode : %v", err)
347+
}
348+
}
349+
344350
separator := cel.CosTlv{
345351
EventType: cel.LaunchSeparatorType,
346352
EventContent: nil, // Success
@@ -418,6 +424,17 @@ func (r *ContainerRunner) measureMemoryMonitor() error {
418424
return nil
419425
}
420426

427+
func (r *ContainerRunner) measureGPUCCMode() error {
428+
ccMode, err := gpu.GetGPUCCMode()
429+
if err != nil {
430+
return err
431+
}
432+
if err := r.attestAgent.MeasureEvent(cel.CosTlv{EventType: cel.GpuCCModeType, EventContent: []byte(ccMode)}); err != nil {
433+
return err
434+
}
435+
return nil
436+
}
437+
421438
// Retrieves the default OIDC token from the attestation service, and returns how long
422439
// to wait before attemping to refresh it.
423440
// The token file will be written to a tmp file and then renamed.

launcher/image/preload.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
readonly OEM_PATH='/usr/share/oem'
44
readonly CS_PATH="${OEM_PATH}/confidential_space"
55
readonly EXPERIMENTS_BINARY="confidential_space_experiments"
6+
readonly GPU_REF_VALUES_PATH="${CS_PATH}/gpu"
7+
readonly COS_GPU_INSTALLER_IMAGE_REF="${GPU_REF_VALUES_PATH}/cos_gpu_installer_image_ref"
8+
readonly COS_GPU_INSTALLER_IMAGE_DIGEST="${GPU_REF_VALUES_PATH}/cos_gpu_installer_image_digest"
69

710
copy_launcher() {
811
cp launcher "${CS_PATH}/cs_container_launcher"
@@ -100,6 +103,41 @@ configure_systemd_units_for_hardened() {
100103
disable_unit "var-lib-toolbox.mount"
101104
}
102105

106+
get_cos_gpu_installer_image_digest() {
107+
local image_ref="${1}"
108+
local registry
109+
local repo_with_image_name
110+
local tag
111+
local manifest_url
112+
local image_digest
113+
114+
if [[ "$image_ref" =~ ^([^/]+)/([^:]+):([^:]+)$ ]]; then
115+
registry="${BASH_REMATCH[1]}"
116+
repo_with_image_name="${BASH_REMATCH[2]}"
117+
tag="${BASH_REMATCH[3]}"
118+
else
119+
echo "Error: Invalid image reference format: $image_ref" >&2
120+
return 1
121+
fi
122+
123+
manifest_url="https://${registry}/v2/${repo_name}/manifests/${tag}"
124+
image_digest=$(curl -s --head ${manifest_url} | grep -i Docker-Content-Digest | cut -d' ' -f2)
125+
echo "${image_digest}"
126+
}
127+
128+
129+
set_gpu_driver_ref_values() {
130+
local cos_gpu_installer_image_ref
131+
local cos_gpu_installer_image_digest
132+
133+
mkdir ${GPU_REF_VALUES_PATH}
134+
cos_gpu_installer_image_ref=$(cos-extensions list -- --gpu-installer)
135+
cos_gpu_installer_image_digest=$(get_cos_gpu_installer_image_digest ${cos_gpu_installer_image_ref})
136+
137+
echo ${cos_gpu_installer_image_ref} >> ${COS_GPU_INSTALLER_IMAGE_REF}
138+
echo ${cos_gpu_installer_image_digest} >> ${COS_GPU_INSTALLER_IMAGE_DIGEST}
139+
}
140+
103141
main() {
104142
mount -o remount,rw ${OEM_PATH}
105143
mkdir ${CS_PATH}
@@ -110,6 +148,7 @@ main() {
110148
copy_experiment_client
111149
# Install container launcher.
112150
copy_launcher
151+
set_gpu_driver_ref_values
113152
setup_launcher_systemd_unit
114153
# Minimum required COS version for 'e': cos-dev-105-17222-0-0.
115154
# Minimum required COS version for 'm': cos-dev-113-18203-0-0.

launcher/internal/experiments/experiments.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ import (
1111
// Failure to unmarshal the experiment JSON data will result in an empty object being returned
1212
// to treat experiment flags as their default value. The error should still be checked.
1313
type Experiments struct {
14-
EnableTestFeatureForImage bool
15-
EnableTempFSMount bool
16-
EnableGpuDriverInstallation bool
14+
EnableTestFeatureForImage bool
15+
EnableTempFSMount bool
16+
EnableGpuDriverInstallation bool
17+
EnableConfidentialGPUSupport bool
1718
}
1819

1920
// New takes a filepath, opens the file, and calls ReadJsonInput with the contents

launcher/internal/gpu/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@ const (
55
InstallationHostDir = "/var/lib/nvidia"
66
// InstallationContainerDir is the directory where gpu drivers will be available on the workload container.
77
InstallationContainerDir = "/usr/local/nvidia"
8+
// InstallerImageRefFile is a filename which has the container image reference of cos_gpu_installer.
9+
InstallerImageRefFile = "/usr/share/oem/confidential_space/gpu/cos_gpu_installer_image_ref"
10+
// InstallerImageDigestFile is a filename which has the container image digest of cos_gpu_installer.
11+
InstallerImageDigestFile = "/usr/share/oem/confidential_space/gpu/cos_gpu_installer_image_digest"
812
)

launcher/internal/gpu/driverinstaller.go

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"os"
88
"os/exec"
9+
"regexp"
910
"strings"
1011

1112
"cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/deviceinfo"
@@ -18,19 +19,29 @@ import (
1819
"github.com/opencontainers/runtime-spec/specs-go"
1920
)
2021

22+
// CCMode enums
2123
const (
22-
installerContainerID = "tee-gpu-driver-installer-container"
23-
installerSnapshotID = "tee-gpu-driver-installer-snapshot"
24+
CCModeON CCMode = "ON"
25+
CCModeOFF CCMode = "OFF"
26+
installerContainerID = "tee-gpu-driver-installer-container"
27+
installerSnapshotID = "tee-gpu-driver-installer-snapshot"
2428
)
2529

26-
var supportedGpuTypes = []deviceinfo.GPUType{
27-
deviceinfo.L4,
28-
deviceinfo.T4,
29-
deviceinfo.A100_40GB,
30-
deviceinfo.A100_80GB,
30+
var supportedCGPUTypes = []deviceinfo.GPUType{
3131
deviceinfo.H100,
3232
}
3333

34+
// CCMode represents the status confidential computing mode of the GPU.
35+
type CCMode string
36+
37+
func (ccm CCMode) isValid() error {
38+
switch ccm {
39+
case CCModeOFF, CCModeON:
40+
return nil
41+
}
42+
return fmt.Errorf("invalid gpu cc mode: %s", ccm)
43+
}
44+
3445
// DriverInstaller contains information about the GPU driver installer settings
3546
type DriverInstaller struct {
3647
cdClient *containerd.Client
@@ -63,7 +74,7 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
6374
}
6475

6576
if !gpuType.OpenSupported() {
66-
return fmt.Errorf("unsupported GPU type %s, please retry with one of the supported GPU types: %v", gpuType.String(), supportedGpuTypes)
77+
return fmt.Errorf("unsupported GPU type %s, please retry with one of the supported confidential GPU types: %v", gpuType.String(), supportedCGPUTypes)
6778
}
6879

6980
ctx = namespaces.WithNamespace(ctx, namespaces.Default)
@@ -79,6 +90,16 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
7990
return fmt.Errorf("failed to pull installer image: %v", err)
8091
}
8192

93+
installerDigest := image.Target().Digest.String()
94+
expectedInstallerDigest, err := os.ReadFile(InstallerImageDigestFile)
95+
if err != nil {
96+
return fmt.Errorf("failed to read reference image digest from file %s : %v", InstallerImageDigestFile, err)
97+
}
98+
99+
if installerDigest != string(expectedInstallerDigest) {
100+
return fmt.Errorf("cos_gpu_installer image digest verification failed - expected : %s, actual : %s", expectedInstallerDigest, installerDigest)
101+
}
102+
82103
mounts := []specs.Mount{
83104
{
84105
Type: "volume",
@@ -153,7 +174,7 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
153174
return fmt.Errorf("failed to verify GPU driver installation: %v", err)
154175
}
155176

156-
ccEnabled, err := isGPUCCModeEnabled(di.logger, gpuType)
177+
ccEnabled, err := isGPUCCModeEnabled()
157178
if err != nil {
158179
return fmt.Errorf("failed to check confidential compute mode status: %v", err)
159180
}
@@ -169,11 +190,11 @@ func (di *DriverInstaller) InstallGPUDrivers(ctx context.Context) error {
169190
}
170191

171192
func getInstallerImageReference() (string, error) {
172-
installerImageRefBytes, err := exec.Command("cos-extensions", "list", "--", "--gpu-installer").Output()
193+
imageRefBytes, err := os.ReadFile(InstallerImageRefFile)
173194
if err != nil {
174195
return "", fmt.Errorf("failed to get the cos-gpu-installer version: %v", err)
175196
}
176-
installerImageRef := strings.TrimSpace(string(installerImageRefBytes))
197+
installerImageRef := strings.TrimSpace(string(imageRefBytes))
177198
return installerImageRef, nil
178199
}
179200

@@ -208,20 +229,42 @@ func setGPUStateToReady() error {
208229
return nil
209230
}
210231

211-
func isGPUCCModeEnabled(logger logging.Logger, gpuType deviceinfo.GPUType) (bool, error) {
212-
// Run nvidia-smi conf-compute command to check if confidential compute mode is ON.
232+
func isGPUCCModeEnabled() (bool, error) {
233+
ccMode, err := GetGPUCCMode()
234+
if err != nil {
235+
return false, err
236+
}
237+
return ccMode == CCModeON, nil
238+
}
239+
240+
// GetGPUCCMode executes nvidia-smi to determine the current Confidential Computing (CC) mode status of the GPU.
241+
// It returns the CC mode ("ON" or "OFF") and an error if the command fails or if the output cannot be parsed.
242+
func GetGPUCCMode() (CCMode, error) {
243+
// Run nvidia-smi conf-compute command to get the confidential computing mode status.
213244
nvidiaSmiCmd := fmt.Sprintf("%s/bin/nvidia-smi", InstallationHostDir)
214245
ccModeOutput, err := exec.Command(nvidiaSmiCmd, "conf-compute", "-f").Output()
215-
// The nvidia-smi conf-compute command fails for GPU which doesn't support confidential computing.
216-
// This check would bypass nvidia-smi conf-compute command for GPU not having confidential compute support.
217-
if strings.Contains(string(ccModeOutput), "No CC capable devices found") {
218-
logger.Info(fmt.Sprintf("Confidential Computing is not supported for GPU type : %s", gpuType.String()))
219-
return false, nil
246+
if err != nil {
247+
return "", err
220248
}
249+
ccMode, err := parseCCStatus(string(ccModeOutput))
221250
if err != nil {
222-
return false, err
251+
return "", err
252+
}
253+
return CCMode(ccMode), nil
254+
}
255+
256+
func parseCCStatus(output string) (CCMode, error) {
257+
re := regexp.MustCompile(`CC status:\s*(ON|OFF)`)
258+
match := re.FindStringSubmatch(output)
259+
260+
if len(match) < 2 {
261+
return "", fmt.Errorf("CC status not found in output: %s", output)
262+
}
263+
ccMode := CCMode(match[1])
264+
if err := ccMode.isValid(); err != nil {
265+
return "", err
223266
}
224-
return strings.Contains(string(ccModeOutput), "CC status: ON"), nil
267+
return ccMode, nil
225268
}
226269

227270
func launchNvidiaPersistencedProcess(logger logging.Logger) error {
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package gpu
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestParseCCStatus(t *testing.T) {
8+
tests := []struct {
9+
name string
10+
output string
11+
want CCMode
12+
wantErr bool
13+
}{
14+
{
15+
name: "CC ON",
16+
output: "some text CC status: ON more text",
17+
want: CCModeON,
18+
wantErr: false,
19+
},
20+
{
21+
name: "CC OFF",
22+
output: "another line CC status: OFF at the end",
23+
want: CCModeOFF,
24+
wantErr: false,
25+
},
26+
{
27+
name: "CC ON at the beginning",
28+
output: "CC status: ON some other info",
29+
want: CCModeON,
30+
wantErr: false,
31+
},
32+
{
33+
name: "CC OFF only",
34+
output: "CC status: OFF",
35+
want: CCModeOFF,
36+
wantErr: false,
37+
},
38+
{
39+
name: "CC status not found",
40+
output: "No CC information here",
41+
want: "",
42+
wantErr: true,
43+
},
44+
{
45+
name: "CC status misspelled",
46+
output: "CC state: ON",
47+
want: "",
48+
wantErr: true,
49+
},
50+
{
51+
name: "CC value missing",
52+
output: "CC status:",
53+
want: "",
54+
wantErr: true,
55+
},
56+
{
57+
name: "Invalid CC value",
58+
output: "CC status: ENABLED",
59+
want: "",
60+
wantErr: true,
61+
},
62+
{
63+
name: "Multiple CC status lines - picks the first",
64+
output: "CC status: ON\nSome other info\nCC status: OFF",
65+
want: CCModeON,
66+
wantErr: false,
67+
},
68+
{
69+
name: "Case insensitive match",
70+
output: "CC status: on",
71+
want: "",
72+
wantErr: true,
73+
},
74+
{
75+
name: "Whitespace around CC value",
76+
output: "CC status: ON ",
77+
want: CCModeON,
78+
wantErr: false,
79+
},
80+
}
81+
82+
for _, tt := range tests {
83+
t.Run(tt.name, func(t *testing.T) {
84+
got, err := parseCCStatus(tt.output)
85+
if (err != nil) != tt.wantErr {
86+
t.Errorf("parseCCStatus() error = %v, wantErr %v", err, tt.wantErr)
87+
return
88+
}
89+
if got != tt.want {
90+
t.Errorf("parseCCStatus() got = %v, want %v", got, tt.want)
91+
}
92+
})
93+
}
94+
}

launcher/launcher/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,15 +191,15 @@ func startLauncher(launchSpec spec.LaunchSpec, serialConsole *os.File) error {
191191

192192
ctx := namespaces.WithNamespace(context.Background(), namespaces.Default)
193193
if launchSpec.InstallGpuDriver {
194-
if launchSpec.Experiments.EnableGpuDriverInstallation {
194+
if launchSpec.Experiments.EnableConfidentialGPUSupport {
195195
installer := gpu.NewDriverInstaller(containerdClient, launchSpec, logger)
196196
err = installer.InstallGPUDrivers(ctx)
197197
if err != nil {
198198
return fmt.Errorf("failed to install gpu drivers: %v", err)
199199
}
200200
} else {
201-
logger.Info("GPU installation experiment flag is not enabled for this project. Ensure that it is enabled when tee-install-gpu-driver is set to true")
202-
return fmt.Errorf("gpu installation experiment flag is not enabled")
201+
logger.Info("Confidential GPU support experiment flag is not enabled for this project. Ensure that it is enabled when tee-install-gpu-driver is set to true")
202+
return fmt.Errorf("confidential gpu support experiment flag is not enabled")
203203
}
204204
}
205205

0 commit comments

Comments
 (0)