@@ -30,9 +30,12 @@ import (
3030
3131const (
3232 installPrerequisitesScript = `
33- export DEBIAN_FRONTEND=noninteractive
34- apt-get update && apt-get install -y curl gnupg2
35- `
33+ set -e
34+ export DEBIAN_FRONTEND=noninteractive
35+ # Install prerequisites
36+ apt-get update
37+ apt-get install -y curl gnupg2
38+ `
3639)
3740
3841type localRunner struct {}
@@ -96,7 +99,7 @@ func NewRunner(opts ...runnerOption) Runner {
9699// NewNestedContainerRunner creates a new nested container runner.
97100// A nested container runs a container inside another container based on a
98101// given runner (remote or local).
99- func NewNestedContainerRunner (runner Runner , baseImage string , installCTK bool , containerName string , cacheDir string ) (Runner , error ) {
102+ func NewNestedContainerRunner (runner Runner , baseImage string , mountToolkitFromHost bool , containerName string , cacheDir string , requiresGPUs bool ) (Runner , error ) {
100103 // If a container with the same name exists from a previous test run, remove it first.
101104 // Ignore errors as container might not exist
102105 _ , _ , err := runner .Run (fmt .Sprintf ("docker rm -f %s 2>/dev/null || true" , containerName ))
@@ -106,13 +109,24 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
106109
107110 var additionalContainerArguments []string
108111
112+ if requiresGPUs {
113+ // If the container requires access to GPUs we explicitly add the nvidia
114+ // runtime and set `NVIDIA_VISIBLE_DEVICES` to trigger jit-cdi spec
115+ // generation.
116+ additionalContainerArguments = append (additionalContainerArguments ,
117+ "--runtime=nvidia" ,
118+ "-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all" ,
119+ )
120+ }
121+
109122 if cacheDir != "" {
110123 additionalContainerArguments = append (additionalContainerArguments ,
111124 "-v " + cacheDir + ":" + cacheDir + ":ro" ,
112125 )
113126 }
114127
115- if ! installCTK {
128+ if mountToolkitFromHost {
129+ // TODO: This is actually ONLY needed for the CLI tests.
116130 // If installCTK is false, we use the preinstalled toolkit.
117131 // This means we need to add toolkit libraries and binaries from the "host"
118132
@@ -179,6 +193,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
179193 if err != nil {
180194 return nil , err
181195 }
196+
182197 _ , _ , err = runner .Run (script )
183198 if err != nil {
184199 return nil , fmt .Errorf ("failed to run start container script: %w" , err )
@@ -191,7 +206,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
191206
192207 _ , _ , err = inContainer .Run (installPrerequisitesScript )
193208 if err != nil {
194- return nil , fmt .Errorf ("failed to install docker : %w" , err )
209+ return nil , fmt .Errorf ("failed to install prerequisites : %w" , err )
195210 }
196211
197212 return inContainer , nil
@@ -296,20 +311,14 @@ func connectOrDie(sshKey, sshUser, host, port string) (*ssh.Client, error) {
296311
297312// outerContainerTemplate represents a template to start a container with
298313// a name specified.
299- // The container is given access to all NVIDIA gpus by explicitly using the
300- // nvidia runtime and the `runtime.nvidia.com/gpu=all` device to trigger JIT
301- // CDI spec generation.
302- // The template also allows for additional arguments to be specified.
303314type outerContainer struct {
304315 Name string
305316 BaseImage string
306317 AdditionalArguments []string
307318}
308319
309320func (o * outerContainer ) Render () (string , error ) {
310- tmpl , err := template .New ("startContainer" ).Parse (`docker run -d --name {{.Name}} --privileged --runtime=nvidia \
311- -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
312- -e NVIDIA_DRIVER_CAPABILITIES=all \
321+ tmpl , err := template .New ("startContainer" ).Parse (`docker run -d --name {{.Name}} --privileged \
313322{{ range $i, $a := .AdditionalArguments -}}
314323{{ $a }} \
315324{{ end -}}
0 commit comments