Skip to content

Commit 5e83bbe

Browse files
feat: wire sovereign.init as JSON-RPC method — first diesel engine RPC
sovereign.init: opens BAR0 via sysfs, runs staged sovereign_init pipeline (bar0_probe → pmc_enable → memory_training → falcon_boot → gr_init → verify), returns per-stage results with timing. Uses StubGspBridge for now. MappedBar::from_sysfs_rw(): new constructor enabling BAR0 access without full VFIO device open (iommufd FDs not needed for BAR0-only stages). Hardware validated (Exp 197): - Titan V (warm): stages 1-3 OK in 88ms, warm detection working - K80 x2 (cold VFIO): stages 1-2 OK, PRAMIN dead (DEVINIT replay needed) Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 5a4fa56 commit 5e83bbe

4 files changed

Lines changed: 149 additions & 0 deletions

File tree

crates/core/cylinder/src/vfio/device/mapped_bar.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,54 @@ impl RegisterAccess for MappedBar {
181181
}
182182
}
183183

184+
impl MappedBar {
185+
/// Create a `MappedBar` from a sysfs PCI BAR0 resource file (read-write).
186+
///
187+
/// Opens `/sys/bus/pci/devices/{bdf}/resource0` with `O_RDWR` and mmaps it.
188+
/// The file descriptor is leaked intentionally — the mapping lives for the
189+
/// duration of the `MappedBar` lifetime and the kernel reclaims on drop
190+
/// via `MmioRegion`'s unmap.
191+
pub fn from_sysfs_rw(bdf: &str, size: usize) -> Result<Self, DriverError> {
192+
let path = crate::linux_paths::sysfs_pci_device_file(bdf, "resource0");
193+
let file = std::fs::OpenOptions::new()
194+
.read(true)
195+
.write(true)
196+
.open(&path)
197+
.map_err(|e| {
198+
DriverError::MmapFailed(Cow::Owned(format!(
199+
"sysfs BAR0 open failed for {bdf}: {e}"
200+
)))
201+
})?;
202+
203+
let raw = unsafe {
204+
rustix::mm::mmap(
205+
std::ptr::null_mut(),
206+
size,
207+
rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE,
208+
rustix::mm::MapFlags::SHARED,
209+
&file,
210+
0,
211+
)
212+
}
213+
.map_err(|e| {
214+
DriverError::MmapFailed(Cow::Owned(format!(
215+
"sysfs BAR0 mmap failed for {bdf}: {e}"
216+
)))
217+
})?;
218+
219+
if raw.is_null() {
220+
return Err(DriverError::MmapFailed(Cow::Borrowed(
221+
"sysfs BAR0 mmap returned null",
222+
)));
223+
}
224+
225+
// Leak the file descriptor — the mmap keeps the mapping alive.
226+
std::mem::forget(file);
227+
let region = unsafe { MmioRegion::new(raw.cast::<u8>(), size) };
228+
Ok(Self { region })
229+
}
230+
}
231+
184232
/// Test-only constructor backed by heap memory.
185233
#[cfg(test)]
186234
impl MappedBar {

crates/server/src/pure_jsonrpc/handler/core/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ pub const DIRECT_JSONRPC_METHODS: &[&str] = &[
9898
"device.vfio.roundtrip",
9999
"device.gr.init",
100100
"compute.context.init",
101+
"sovereign.init",
101102
"mmio.read32",
102103
"mmio.write32",
103104
"mmio.batch",

crates/server/src/pure_jsonrpc/handler/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub mod method_gate;
1515
mod mmio;
1616
mod resources;
1717
mod silicon;
18+
mod sovereign;
1819
mod transport;
1920
mod workload;
2021

@@ -378,6 +379,8 @@ impl JsonRpcHandler {
378379
return self.dispatch.device_gr_init(params).await;
379380
}
380381

382+
"sovereign.init" => return sovereign::sovereign_init(params),
383+
381384
"mmio.read32" => return mmio::mmio_read32(params),
382385
"mmio.write32" => return mmio::mmio_write32(params),
383386
"mmio.batch" => return mmio::mmio_batch(params),
@@ -472,6 +475,7 @@ impl JsonRpcHandler {
472475
"device_vfio_open" => self.dispatch.device_vfio_open(params).await,
473476
"device_vfio_roundtrip" => self.dispatch.device_vfio_roundtrip(params).await,
474477
"device_gr_init" => self.dispatch.device_gr_init(params).await,
478+
"sovereign_init" => sovereign::sovereign_init(params),
475479
"mmio_read32" => mmio::mmio_read32(params),
476480
"mmio_write32" => mmio::mmio_write32(params),
477481
"mmio_batch" => mmio::mmio_batch(params),
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// SPDX-License-Identifier: AGPL-3.0-or-later
2+
//! Sovereign GPU initialization JSON-RPC handler.
3+
//!
4+
//! Exposes `sovereign.init` — the staged diesel-engine pipeline that brings a
5+
//! VFIO-bound GPU from cold/warm state to compute-ready.
6+
7+
use crate::pure_jsonrpc::types::JsonRpcError;
8+
use serde_json::Value;
9+
use tracing::info;
10+
11+
const DEFAULT_BAR0_SIZE: usize = 16 * 1024 * 1024;
12+
13+
/// `sovereign.init` — Run the full sovereign init pipeline on a GPU.
14+
///
15+
/// Opens BAR0 via sysfs, runs `cylinder::sovereign_init` stages, returns
16+
/// per-stage results. The GPU must be VFIO-bound (or unbound) with BAR0
17+
/// accessible via `/sys/bus/pci/devices/{bdf}/resource0`.
18+
///
19+
/// Params:
20+
/// - `bdf` (required): PCI BDF address (e.g. `"0000:4b:00.0"`)
21+
/// - `halt_before` (optional): Stop before a stage (`"pmc_enable"`, `"hbm2_training"`,
22+
/// `"kepler_pgraph_ungate"`, `"falcon_boot"`, `"gr_init"`, `"verify"`)
23+
/// - `skip_gr_init` (optional, default false): Skip GR init stage
24+
/// - `golden_state_path` (optional): Path to golden-state JSON for HBM2 replay
25+
/// - `vbios_rom_path` (optional): Path to raw VBIOS ROM dump
26+
/// - `sm_version` (optional): SM version override (auto-detected if omitted)
27+
/// - `fbpa_count` (optional): FBPA partition count override (auto-detected)
28+
pub fn sovereign_init(params: Option<&Value>) -> Result<Value, JsonRpcError> {
29+
let bdf = params
30+
.and_then(|p| p.get("bdf"))
31+
.and_then(Value::as_str)
32+
.ok_or_else(|| JsonRpcError::invalid_params("Missing 'bdf' string parameter"))?;
33+
34+
info!(bdf = %bdf, "sovereign.init: opening BAR0");
35+
36+
let bar0 = toadstool_cylinder::vfio::device::MappedBar::from_sysfs_rw(bdf, DEFAULT_BAR0_SIZE)
37+
.map_err(|e| {
38+
JsonRpcError::internal_error(format!(
39+
"BAR0 open failed for {bdf}: {e}. Ensure vfio-pci is bound and resource0 is accessible."
40+
))
41+
})?;
42+
43+
let mut opts: toadstool_cylinder::vfio::sovereign_init::SovereignInitOptions =
44+
if let Some(p) = params {
45+
serde_json::from_value(p.clone()).unwrap_or_default()
46+
} else {
47+
Default::default()
48+
};
49+
50+
if let Some(path) = opts.golden_state_path.as_ref() {
51+
match std::fs::read_to_string(path) {
52+
Ok(json_str) => {
53+
if let Ok(pairs) =
54+
serde_json::from_str::<Vec<(usize, u32)>>(&json_str)
55+
{
56+
opts.golden_state = Some(pairs);
57+
}
58+
}
59+
Err(e) => {
60+
info!(path = %path, error = %e, "golden_state_path read failed, continuing without");
61+
}
62+
}
63+
}
64+
65+
if let Some(path) = opts.vbios_rom_path.as_ref() {
66+
match std::fs::read(path) {
67+
Ok(rom) => {
68+
opts.vbios_rom = Some(rom);
69+
}
70+
Err(e) => {
71+
info!(path = %path, error = %e, "vbios_rom_path read failed, continuing without");
72+
}
73+
}
74+
}
75+
76+
let bridge = toadstool_cylinder::nv::gsp_bridge::StubGspBridge;
77+
78+
info!(bdf = %bdf, halt_before = ?opts.halt_before, "sovereign.init: starting pipeline");
79+
80+
let result = toadstool_cylinder::vfio::sovereign_init::sovereign_init(
81+
&bar0, bdf, &opts, &bridge,
82+
);
83+
84+
info!(
85+
bdf = %bdf,
86+
all_ok = result.all_ok,
87+
compute_ready = result.compute_ready,
88+
total_ms = result.total_ms,
89+
stages = result.stages.len(),
90+
warm_detected = result.warm_detected,
91+
"sovereign.init: pipeline complete"
92+
);
93+
94+
serde_json::to_value(&result)
95+
.map_err(|e| JsonRpcError::internal_error(format!("serialization failed: {e}")))
96+
}

0 commit comments

Comments
 (0)