Skip to content

Commit 7872e32

Browse files
committed
fix(gpu): prefer single CDI devices for local runtimes
Prefer a single CDI-qualified device when Docker or Podman resolves the default GPU request to one GPU. Allow nvidia.com/gpu=all only as a WSL2 all-only compatibility fallback, using Docker daemon info and Podman's /dev/dxg probe to identify that case. Update driver docs, architecture notes, and GPU e2e coverage for the default selection behavior. Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent fb83d1a commit 7872e32

10 files changed

Lines changed: 1156 additions & 87 deletions

File tree

architecture/compute-runtimes.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ through the driver configuration. The Helm chart defaults sandbox agents to
5555
`Unconfined` so runtime/default AppArmor profiles do not block supervisor
5656
network namespace setup on AppArmor-enabled nodes.
5757

58+
GPU requests enter the driver layer through `SandboxSpec.gpu` and
59+
`SandboxSpec.gpu_device`. Docker and Podman map default GPU requests to one
60+
concrete NVIDIA CDI device when individual CDI devices are available, use
61+
`nvidia.com/gpu=all` only for WSL2/all-only compatibility, and pass explicit
62+
driver-native device IDs through.
63+
5864
VM runtime state paths are derived only from driver-validated sandbox IDs
5965
matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
6066
private `run/` directory plus Unix peer UID/PID checks. Standalone

crates/openshell-core/src/gpu.rs

Lines changed: 311 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,184 @@
33

44
//! Shared GPU request helpers.
55
6+
use std::fmt;
7+
use std::sync::atomic::{AtomicUsize, Ordering};
8+
69
use crate::config::CDI_GPU_DEVICE_ALL;
710

8-
/// Resolve a GPU request into CDI device identifiers.
11+
const CDI_NVIDIA_GPU_PREFIX: &str = "nvidia.com/gpu=";
12+
const CDI_NVIDIA_GPU_ALL_SUFFIX: &str = "all";
13+
14+
/// Normalized CDI GPU inventory used by local container drivers.
15+
#[derive(Debug, Clone, Default, PartialEq, Eq)]
16+
pub struct CdiGpuInventory {
17+
device_ids: Vec<String>,
18+
}
19+
20+
impl CdiGpuInventory {
21+
/// Build a normalized inventory from runtime-reported CDI device IDs.
22+
#[must_use]
23+
pub fn new(device_ids: impl IntoIterator<Item = impl AsRef<str>>) -> Self {
24+
let mut device_ids = device_ids
25+
.into_iter()
26+
.filter_map(|id| {
27+
let id = id.as_ref().trim();
28+
id.starts_with(CDI_NVIDIA_GPU_PREFIX)
29+
.then(|| id.to_string())
30+
})
31+
.collect::<Vec<_>>();
32+
device_ids.sort();
33+
device_ids.dedup();
34+
Self { device_ids }
35+
}
36+
37+
#[must_use]
38+
pub fn as_slice(&self) -> &[String] {
39+
&self.device_ids
40+
}
41+
42+
#[must_use]
43+
pub fn is_empty(&self) -> bool {
44+
self.device_ids.is_empty()
45+
}
46+
47+
fn default_device_family(
48+
&self,
49+
allow_all_devices: bool,
50+
) -> Result<Vec<String>, CdiGpuSelectionError> {
51+
let mut indexed = self
52+
.device_ids
53+
.iter()
54+
.filter_map(|id| {
55+
let suffix = cdi_nvidia_gpu_suffix(id)?;
56+
let index = suffix.parse::<u64>().ok()?;
57+
Some((index, id.clone()))
58+
})
59+
.collect::<Vec<_>>();
60+
if !indexed.is_empty() {
61+
indexed.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
62+
return Ok(indexed.into_iter().map(|(_, id)| id).collect());
63+
}
64+
65+
let mut named = self
66+
.device_ids
67+
.iter()
68+
.filter_map(|id| {
69+
let suffix = cdi_nvidia_gpu_suffix(id)?;
70+
(suffix != CDI_NVIDIA_GPU_ALL_SUFFIX).then(|| id.clone())
71+
})
72+
.collect::<Vec<_>>();
73+
if !named.is_empty() {
74+
named.sort();
75+
return Ok(named);
76+
}
77+
78+
if self.device_ids.iter().any(|id| id == CDI_GPU_DEVICE_ALL) {
79+
if !allow_all_devices {
80+
return Err(CdiGpuSelectionError::AllDevicesDefaultUnsupported);
81+
}
82+
return Ok(vec![CDI_GPU_DEVICE_ALL.to_string()]);
83+
}
84+
85+
Err(CdiGpuSelectionError::NoAvailableDevices)
86+
}
87+
}
88+
89+
/// Concurrency-safe round-robin cursor for default CDI GPU selection.
90+
#[derive(Debug, Default)]
91+
pub struct CdiGpuRoundRobin {
92+
next: AtomicUsize,
93+
}
94+
95+
impl CdiGpuRoundRobin {
96+
#[must_use]
97+
pub const fn new() -> Self {
98+
Self {
99+
next: AtomicUsize::new(0),
100+
}
101+
}
102+
103+
/// Return the next default device ID and advance the cursor.
104+
pub fn next_default_device_id(
105+
&self,
106+
inventory: &CdiGpuInventory,
107+
allow_all_devices: bool,
108+
) -> Result<String, CdiGpuSelectionError> {
109+
self.selected_default_device_id(inventory, true, allow_all_devices)
110+
}
111+
112+
/// Return the current default device ID without advancing the cursor.
113+
pub fn peek_default_device_id(
114+
&self,
115+
inventory: &CdiGpuInventory,
116+
allow_all_devices: bool,
117+
) -> Result<String, CdiGpuSelectionError> {
118+
self.selected_default_device_id(inventory, false, allow_all_devices)
119+
}
120+
121+
fn selected_default_device_id(
122+
&self,
123+
inventory: &CdiGpuInventory,
124+
consume: bool,
125+
allow_all_devices: bool,
126+
) -> Result<String, CdiGpuSelectionError> {
127+
let devices = inventory.default_device_family(allow_all_devices)?;
128+
let base = if consume {
129+
self.next.fetch_add(1, Ordering::Relaxed)
130+
} else {
131+
self.next.load(Ordering::Relaxed)
132+
};
133+
Ok(devices[base % devices.len()].clone())
134+
}
135+
}
136+
137+
/// CDI GPU selection failed.
138+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
139+
pub enum CdiGpuSelectionError {
140+
NoAvailableDevices,
141+
MissingDefaultDevice,
142+
AllDevicesDefaultUnsupported,
143+
}
144+
145+
impl fmt::Display for CdiGpuSelectionError {
146+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147+
match self {
148+
Self::NoAvailableDevices => f.write_str("no NVIDIA CDI GPU devices were discovered"),
149+
Self::MissingDefaultDevice => {
150+
f.write_str("GPU request requires a selected default CDI GPU device")
151+
}
152+
Self::AllDevicesDefaultUnsupported => f.write_str(
153+
"default GPU request resolved only to nvidia.com/gpu=all, which is not allowed on this platform; set driver_config.cdi_devices to [\"nvidia.com/gpu=all\"] explicitly to request all GPUs",
154+
),
155+
}
156+
}
157+
}
158+
159+
impl std::error::Error for CdiGpuSelectionError {}
160+
161+
/// Resolve a local runtime GPU request into CDI device identifiers.
9162
///
10-
/// `None` means no GPU was requested. A GPU request with no explicit CDI
11-
/// devices uses the CDI all-GPU request; otherwise the driver-configured CDI
12-
/// devices pass through unchanged.
163+
/// `None` means no GPU was requested. Explicit driver-configured CDI devices
164+
/// pass through unchanged. A default GPU request uses the driver-selected
165+
/// default CDI ID.
166+
pub fn cdi_gpu_device_ids(
167+
gpu: bool,
168+
cdi_devices: &[String],
169+
selected_default_device: Option<&str>,
170+
) -> Result<Option<Vec<String>>, CdiGpuSelectionError> {
171+
if !gpu {
172+
return Ok(None);
173+
}
174+
if !cdi_devices.is_empty() {
175+
return Ok(Some(cdi_devices.to_vec()));
176+
}
177+
let device = selected_default_device.ok_or(CdiGpuSelectionError::MissingDefaultDevice)?;
178+
Ok(Some(vec![device.to_string()]))
179+
}
180+
181+
/// Resolve a GPU request with the legacy all-GPU default.
13182
#[must_use]
14-
pub fn cdi_gpu_device_ids(gpu: bool, cdi_devices: &[String]) -> Option<Vec<String>> {
183+
pub fn cdi_gpu_device_ids_or_all(gpu: bool, cdi_devices: &[String]) -> Option<Vec<String>> {
15184
gpu.then(|| {
16185
if cdi_devices.is_empty() {
17186
vec![CDI_GPU_DEVICE_ALL.to_string()]
@@ -21,20 +190,32 @@ pub fn cdi_gpu_device_ids(gpu: bool, cdi_devices: &[String]) -> Option<Vec<Strin
21190
})
22191
}
23192

193+
fn cdi_nvidia_gpu_suffix(id: &str) -> Option<&str> {
194+
id.strip_prefix(CDI_NVIDIA_GPU_PREFIX)
195+
}
196+
24197
#[cfg(test)]
25198
mod tests {
26199
use super::*;
27200

28201
#[test]
29202
fn cdi_gpu_device_ids_returns_none_when_absent() {
30-
assert_eq!(cdi_gpu_device_ids(false, &[]), None);
203+
assert_eq!(cdi_gpu_device_ids(false, &[], None), Ok(None));
31204
}
32205

33206
#[test]
34-
fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
207+
fn cdi_gpu_device_ids_uses_selected_default_device() {
35208
assert_eq!(
36-
cdi_gpu_device_ids(true, &[]),
37-
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
209+
cdi_gpu_device_ids(true, &[], Some("nvidia.com/gpu=0")),
210+
Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
211+
);
212+
}
213+
214+
#[test]
215+
fn cdi_gpu_device_ids_rejects_missing_default_device() {
216+
assert_eq!(
217+
cdi_gpu_device_ids(true, &[], None),
218+
Err(CdiGpuSelectionError::MissingDefaultDevice)
38219
);
39220
}
40221

@@ -46,12 +227,130 @@ mod tests {
46227
&[
47228
"nvidia.com/gpu=0".to_string(),
48229
"nvidia.com/gpu=1".to_string()
49-
]
230+
],
231+
None
50232
),
51-
Some(vec![
233+
Ok(Some(vec![
234+
"nvidia.com/gpu=0".to_string(),
235+
"nvidia.com/gpu=1".to_string()
236+
]))
237+
);
238+
}
239+
240+
#[test]
241+
fn cdi_gpu_device_ids_or_all_uses_all_when_no_devices_are_configured() {
242+
assert_eq!(
243+
cdi_gpu_device_ids_or_all(true, &[]),
244+
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
245+
);
246+
}
247+
248+
#[test]
249+
fn inventory_filters_and_deduplicates_nvidia_gpu_ids() {
250+
let inventory = CdiGpuInventory::new([
251+
"nvidia.com/gpu=1",
252+
"vendor.example/device=0",
253+
"nvidia.com/gpu=1",
254+
" nvidia.com/gpu=0 ",
255+
]);
256+
257+
assert_eq!(
258+
inventory.as_slice(),
259+
&vec![
52260
"nvidia.com/gpu=0".to_string(),
53261
"nvidia.com/gpu=1".to_string()
54-
])
262+
]
263+
);
264+
}
265+
266+
#[test]
267+
fn round_robin_prefers_indexed_family_and_sorts_numerically() {
268+
let inventory = CdiGpuInventory::new([
269+
"nvidia.com/gpu=10",
270+
"nvidia.com/gpu=UUID-b",
271+
"nvidia.com/gpu=2",
272+
"nvidia.com/gpu=all",
273+
]);
274+
let selector = CdiGpuRoundRobin::new();
275+
276+
assert_eq!(
277+
selector.next_default_device_id(&inventory, false),
278+
Ok("nvidia.com/gpu=2".to_string())
279+
);
280+
assert_eq!(
281+
selector.next_default_device_id(&inventory, false),
282+
Ok("nvidia.com/gpu=10".to_string())
283+
);
284+
assert_eq!(
285+
selector.next_default_device_id(&inventory, false),
286+
Ok("nvidia.com/gpu=2".to_string())
287+
);
288+
}
289+
290+
#[test]
291+
fn round_robin_uses_named_family_when_no_indexed_ids_exist() {
292+
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=UUID-b", "nvidia.com/gpu=UUID-a"]);
293+
let selector = CdiGpuRoundRobin::new();
294+
295+
assert_eq!(
296+
selector.next_default_device_id(&inventory, false),
297+
Ok("nvidia.com/gpu=UUID-a".to_string())
298+
);
299+
}
300+
301+
#[test]
302+
fn round_robin_uses_all_only_inventory_when_allowed() {
303+
let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
304+
let selector = CdiGpuRoundRobin::new();
305+
306+
assert_eq!(
307+
selector.next_default_device_id(&inventory, true),
308+
Ok(CDI_GPU_DEVICE_ALL.to_string())
309+
);
310+
}
311+
312+
#[test]
313+
fn round_robin_rejects_all_only_inventory_when_not_allowed() {
314+
let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
315+
let selector = CdiGpuRoundRobin::new();
316+
317+
assert_eq!(
318+
selector.next_default_device_id(&inventory, false),
319+
Err(CdiGpuSelectionError::AllDevicesDefaultUnsupported)
320+
);
321+
}
322+
323+
#[test]
324+
fn round_robin_rejects_empty_inventory() {
325+
let inventory = CdiGpuInventory::new(["vendor.example/device=0"]);
326+
let selector = CdiGpuRoundRobin::new();
327+
328+
assert_eq!(
329+
selector.next_default_device_id(&inventory, false),
330+
Err(CdiGpuSelectionError::NoAvailableDevices)
331+
);
332+
}
333+
334+
#[test]
335+
fn peek_does_not_advance_round_robin_cursor() {
336+
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
337+
let selector = CdiGpuRoundRobin::new();
338+
339+
assert_eq!(
340+
selector.peek_default_device_id(&inventory, false),
341+
Ok("nvidia.com/gpu=0".to_string())
342+
);
343+
assert_eq!(
344+
selector.peek_default_device_id(&inventory, false),
345+
Ok("nvidia.com/gpu=0".to_string())
346+
);
347+
assert_eq!(
348+
selector.next_default_device_id(&inventory, false),
349+
Ok("nvidia.com/gpu=0".to_string())
350+
);
351+
assert_eq!(
352+
selector.next_default_device_id(&inventory, false),
353+
Ok("nvidia.com/gpu=1".to_string())
55354
);
56355
}
57356
}

crates/openshell-driver-docker/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ contract:
3232
| `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
3333
| `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
3434
| `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
35-
| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
35+
| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise selects one concrete NVIDIA CDI GPU when the sandbox spec asks for GPU support and daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. |
3636

3737
The agent child process does not retain these supervisor privileges.
3838

0 commit comments

Comments
 (0)