Skip to content

Commit f855c3d

Browse files
authored
feat(cli): add sandbox resource flags (#1376)
Signed-off-by: Drew Newberry <anewberry@nvidia.com>
1 parent 0471c6d commit f855c3d

12 files changed

Lines changed: 449 additions & 14 deletions

File tree

.agents/skills/openshell-cli/SKILL.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ openshell sandbox create \
141141
Key flags:
142142
- `--provider`: Attach one or more providers (repeatable)
143143
- `--policy`: Custom policy YAML (otherwise uses built-in default or `OPENSHELL_SANDBOX_POLICY` env var)
144+
- `--cpu`, `--memory`: Set per-sandbox compute sizing. Docker/Podman apply limits; Kubernetes applies matching requests and limits.
144145
- `--upload <PATH>[:<DEST>]`: Upload local files into the sandbox (default dest: `/sandbox`)
145146
- `--no-keep`: Delete the sandbox after the initial command or shell exits
146147
- `--forward <PORT>`: Forward a local port and keep the sandbox alive

.agents/skills/openshell-cli/cli-reference.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ Create a sandbox through the active gateway, wait for readiness, then connect or
143143
| `--no-keep` | Delete sandbox after the initial command or shell exits |
144144
| `--provider <NAME>` | Provider to attach (repeatable) |
145145
| `--policy <PATH>` | Path to custom policy YAML |
146+
| `--cpu <QUANTITY>` | CPU amount for the sandbox (for example: `500m`, `1`, `2.5`) |
147+
| `--memory <QUANTITY>` | Memory amount for the sandbox (for example: `512Mi`, `4Gi`, `8G`) |
146148
| `--forward <PORT>` | Forward local port to sandbox (keeps the sandbox alive) |
147149
| `--tty` | Force pseudo-terminal allocation |
148150
| `--no-tty` | Disable pseudo-terminal allocation |

architecture/compute-runtimes.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ Each runtime receives a sandbox spec from the gateway and is responsible for:
2525
| Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and GPU resources. |
2626
| VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. |
2727

28+
Per-sandbox CPU and memory values currently enter the driver layer through
29+
template resource limits. Docker and Podman apply them as runtime limits.
30+
Kubernetes mirrors each limit into the matching request. VM accepts the fields
31+
but currently ignores them.
32+
2833
VM runtime state paths are derived only from driver-validated sandbox IDs
2934
matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
3035
private `run/` directory plus Unix peer UID/PID checks. Standalone

crates/openshell-cli/src/main.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,14 @@ enum SandboxCommands {
10901090
#[arg(long, requires = "gpu")]
10911091
gpu_device: Option<String>,
10921092

1093+
/// CPU limit for the sandbox (for example: 500m, 1, 2.5).
1094+
#[arg(long)]
1095+
cpu: Option<String>,
1096+
1097+
/// Memory limit for the sandbox (for example: 512Mi, 4Gi, 8G).
1098+
#[arg(long)]
1099+
memory: Option<String>,
1100+
10931101
/// Provider names to attach to this sandbox.
10941102
#[arg(long = "provider")]
10951103
providers: Vec<String>,
@@ -2365,6 +2373,8 @@ async fn main() -> Result<()> {
23652373
editor,
23662374
gpu,
23672375
gpu_device,
2376+
cpu,
2377+
memory,
23682378
providers,
23692379
policy,
23702380
forward,
@@ -2431,6 +2441,8 @@ async fn main() -> Result<()> {
24312441
keep,
24322442
gpu,
24332443
gpu_device.as_deref(),
2444+
cpu.as_deref(),
2445+
memory.as_deref(),
24342446
editor,
24352447
&providers,
24362448
policy.as_deref(),
@@ -3637,6 +3649,40 @@ mod tests {
36373649
}
36383650
}
36393651

3652+
#[test]
3653+
fn sandbox_create_resource_flags_parse() {
3654+
let cli = Cli::try_parse_from([
3655+
"openshell",
3656+
"sandbox",
3657+
"create",
3658+
"--cpu",
3659+
"500m",
3660+
"--memory",
3661+
"2Gi",
3662+
"--",
3663+
"claude",
3664+
])
3665+
.expect("sandbox create resource flags should parse");
3666+
3667+
match cli.command {
3668+
Some(Commands::Sandbox {
3669+
command:
3670+
Some(SandboxCommands::Create {
3671+
cpu,
3672+
memory,
3673+
command,
3674+
..
3675+
}),
3676+
..
3677+
}) => {
3678+
assert_eq!(cpu.as_deref(), Some("500m"));
3679+
assert_eq!(memory.as_deref(), Some("2Gi"));
3680+
assert_eq!(command, vec!["claude".to_string()]);
3681+
}
3682+
other => panic!("expected SandboxCommands::Create, got: {other:?}"),
3683+
}
3684+
}
3685+
36403686
#[test]
36413687
fn service_expose_accepts_positional_target_port_and_service() {
36423688
let cli = Cli::try_parse_from([

crates/openshell-cli/src/run.rs

Lines changed: 158 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,101 @@ fn sandbox_should_persist(
14341434
keep || forward.is_some()
14351435
}
14361436

1437+
fn build_sandbox_resource_limits(
1438+
cpu: Option<&str>,
1439+
memory: Option<&str>,
1440+
) -> Result<Option<prost_types::Struct>> {
1441+
use prost_types::{Struct, Value, value::Kind};
1442+
1443+
fn string_value(value: String) -> Value {
1444+
Value {
1445+
kind: Some(Kind::StringValue(value)),
1446+
}
1447+
}
1448+
1449+
let mut limits = std::collections::BTreeMap::new();
1450+
if let Some(cpu) = cpu {
1451+
limits.insert("cpu".to_string(), string_value(validate_cpu_quantity(cpu)?));
1452+
}
1453+
if let Some(memory) = memory {
1454+
limits.insert(
1455+
"memory".to_string(),
1456+
string_value(validate_memory_quantity(memory)?),
1457+
);
1458+
}
1459+
1460+
if limits.is_empty() {
1461+
return Ok(None);
1462+
}
1463+
1464+
let mut fields = std::collections::BTreeMap::new();
1465+
fields.insert(
1466+
"limits".to_string(),
1467+
Value {
1468+
kind: Some(Kind::StructValue(Struct { fields: limits })),
1469+
},
1470+
);
1471+
Ok(Some(Struct { fields }))
1472+
}
1473+
1474+
fn validate_cpu_quantity(value: &str) -> Result<String> {
1475+
let value = value.trim();
1476+
if value.is_empty() {
1477+
return Err(miette!("--cpu must not be empty"));
1478+
}
1479+
1480+
if let Some(millicores) = value.strip_suffix('m') {
1481+
if millicores.is_empty() || !millicores.bytes().all(|b| b.is_ascii_digit()) {
1482+
return Err(miette!(
1483+
"invalid --cpu value '{value}': expected positive cores or millicores, for example 2, 0.5, or 500m"
1484+
));
1485+
}
1486+
let millicores = millicores.parse::<u64>().into_diagnostic()?;
1487+
if millicores == 0 {
1488+
return Err(miette!("--cpu must be greater than zero"));
1489+
}
1490+
return Ok(value.to_string());
1491+
}
1492+
1493+
let cores = value.parse::<f64>().map_err(|_| {
1494+
miette!(
1495+
"invalid --cpu value '{value}': expected positive cores or millicores, for example 2, 0.5, or 500m"
1496+
)
1497+
})?;
1498+
if !cores.is_finite() || cores <= 0.0 {
1499+
return Err(miette!("--cpu must be greater than zero"));
1500+
}
1501+
Ok(value.to_string())
1502+
}
1503+
1504+
fn validate_memory_quantity(value: &str) -> Result<String> {
1505+
let value = value.trim();
1506+
if value.is_empty() {
1507+
return Err(miette!("--memory must not be empty"));
1508+
}
1509+
1510+
let number_end = value
1511+
.find(|ch: char| !ch.is_ascii_digit())
1512+
.unwrap_or(value.len());
1513+
let (number, suffix) = value.split_at(number_end);
1514+
if number.is_empty()
1515+
|| !matches!(
1516+
suffix,
1517+
"" | "Ki" | "Mi" | "Gi" | "Ti" | "Pi" | "Ei" | "K" | "M" | "G" | "T" | "P" | "E"
1518+
)
1519+
{
1520+
return Err(miette!(
1521+
"invalid --memory value '{value}': expected positive bytes or a quantity such as 512Mi, 4Gi, or 8G"
1522+
));
1523+
}
1524+
1525+
let amount = number.parse::<u128>().into_diagnostic()?;
1526+
if amount == 0 {
1527+
return Err(miette!("--memory must be greater than zero"));
1528+
}
1529+
Ok(value.to_string())
1530+
}
1531+
14371532
async fn finalize_sandbox_create_session(
14381533
server: &str,
14391534
sandbox_name: &str,
@@ -1468,6 +1563,8 @@ pub async fn sandbox_create(
14681563
keep: bool,
14691564
gpu: bool,
14701565
gpu_device: Option<&str>,
1566+
cpu: Option<&str>,
1567+
memory: Option<&str>,
14711568
editor: Option<Editor>,
14721569
providers: &[String],
14731570
policy: Option<&str>,
@@ -1530,11 +1627,17 @@ pub async fn sandbox_create(
15301627
.await?;
15311628

15321629
let policy = load_sandbox_policy(policy)?;
1630+
let resource_limits = build_sandbox_resource_limits(cpu, memory)?;
15331631

1534-
let template = image.map(|img| SandboxTemplate {
1535-
image: img,
1536-
..SandboxTemplate::default()
1537-
});
1632+
let template = if image.is_some() || resource_limits.is_some() {
1633+
Some(SandboxTemplate {
1634+
image: image.unwrap_or_default(),
1635+
resources: resource_limits,
1636+
..SandboxTemplate::default()
1637+
})
1638+
} else {
1639+
None
1640+
};
15381641

15391642
let request = CreateSandboxRequest {
15401643
spec: Some(SandboxSpec {
@@ -6007,8 +6110,8 @@ fn format_timestamp_ms(ms: i64) -> String {
60076110
#[cfg(test)]
60086111
mod tests {
60096112
use super::{
6010-
TlsOptions, dockerfile_sources_supported_for_gateway, format_endpoint,
6011-
format_gateway_select_header, format_gateway_select_items,
6113+
TlsOptions, build_sandbox_resource_limits, dockerfile_sources_supported_for_gateway,
6114+
format_endpoint, format_gateway_select_header, format_gateway_select_items,
60126115
format_provider_attachment_table, gateway_add, gateway_auth_label,
60136116
gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files,
60146117
http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
@@ -6211,6 +6314,55 @@ mod tests {
62116314
assert!(err.to_string().contains("unknown setting key"));
62126315
}
62136316

6317+
#[test]
6318+
fn build_sandbox_resource_limits_sets_limits_only() {
6319+
let resources = build_sandbox_resource_limits(Some("500m"), Some("2Gi"))
6320+
.expect("resource limits should parse")
6321+
.expect("resource limits should be present");
6322+
6323+
let limits = resources
6324+
.fields
6325+
.get("limits")
6326+
.and_then(|value| value.kind.as_ref())
6327+
.and_then(|kind| match kind {
6328+
prost_types::value::Kind::StructValue(inner) => Some(inner),
6329+
_ => None,
6330+
})
6331+
.expect("limits should be a struct");
6332+
6333+
assert_eq!(
6334+
limits
6335+
.fields
6336+
.get("cpu")
6337+
.and_then(|value| value.kind.as_ref())
6338+
.and_then(|kind| match kind {
6339+
prost_types::value::Kind::StringValue(value) => Some(value.as_str()),
6340+
_ => None,
6341+
}),
6342+
Some("500m")
6343+
);
6344+
assert_eq!(
6345+
limits
6346+
.fields
6347+
.get("memory")
6348+
.and_then(|value| value.kind.as_ref())
6349+
.and_then(|kind| match kind {
6350+
prost_types::value::Kind::StringValue(value) => Some(value.as_str()),
6351+
_ => None,
6352+
}),
6353+
Some("2Gi")
6354+
);
6355+
assert!(!resources.fields.contains_key("requests"));
6356+
}
6357+
6358+
#[test]
6359+
fn build_sandbox_resource_limits_rejects_invalid_quantities() {
6360+
assert!(build_sandbox_resource_limits(Some("0"), None).is_err());
6361+
assert!(build_sandbox_resource_limits(Some("half"), None).is_err());
6362+
assert!(build_sandbox_resource_limits(None, Some("0Gi")).is_err());
6363+
assert!(build_sandbox_resource_limits(None, Some("1.5Gi")).is_err());
6364+
}
6365+
62146366
#[test]
62156367
fn inferred_provider_type_returns_type_for_known_command() {
62166368
let result = inferred_provider_type(&["claude".to_string(), "--help".to_string()]);

0 commit comments

Comments
 (0)