feat(launcher): expose GPUs to eval container via NVIDIA_VISIBLE_DEVICES

wprazuch · wprazuch · commit 463f8f0051d0 · 2026-04-16T09:52:58.000+02:00
Benchmarks like compute-eval need to compile and execute CUDA code
inside the eval container. Without GPU access, nvcc can't detect
the target architecture and compiled binaries fail with
cudaErrorInsufficientDriver.

Export NVIDIA_VISIBLE_DEVICES=all before the eval srun and pass it
through to the container. This makes pyxis/enroot expose the parent
job's GPUs to the eval container.

Validated with compute-eval on HSG: pass@1 went from 0% (no GPU)
to 51.25% (with GPU access).

Signed-off-by: Wojciech Prazuch &lt;wprazuch@nvidia.com&gt;
diff --git a/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py b/packages/nemo-evaluator-launcher/src/nemo_evaluator_launcher/executors/slurm/executor.py
@@ -1014,11 +1014,18 @@ def _create_slurm_sbatch_script(
         aux_extra_env_names.extend(endpoint_vars)
 
     s += "# evaluation client\n"
+    s += "export NVIDIA_VISIBLE_DEVICES=all\n"
     s += "srun --mpi pmix --overlap "
     s += '--nodelist "${PRIMARY_NODE}" --nodes 1 --ntasks 1 '
     s += "--container-image {} ".format(eval_image)
     # Combine eval env vars with auxiliary endpoint env vars
-    all_eval_env_names = sorted(set(list(eval_env_vars.keys()) + aux_extra_env_names))
+    all_eval_env_names = sorted(
+        set(
+            list(eval_env_vars.keys())
+            + aux_extra_env_names
+            + ["NVIDIA_VISIBLE_DEVICES"]
+        )
+    )
     if all_eval_env_names:
         s += "--container-env {} ".format(",".join(all_eval_env_names))
     if not cfg.execution.get("mounts", {}).get("mount_home", True):