Skip to content

Commit a0f79bc

Browse files
feat: CE engine validation, sovereignty tier model, PTOP parser fix
- ce_validate: end-to-end CE DMA validation pipeline — discovers CE runlist from PTOP topology, creates channel on non-GR runlist, submits CE DMA copy pushbuffer, polls GP_GET for consumption. - sovereign_tiers: SovereignTier enum (Cold/WarmInfra/WarmCompute/ FullSovereign), classify_tier(), TierCapabilities, TierEvidence. Integrated into sovereign.warm_status API response. - pfifo: fixed PTOP_DEVICE_INFO_V2 parser for GV100 — runlist in kind==2 entries at bits [17:14], engine type in kind==1 at bits [2:7]. discover_ce_runlist() and find_pbdma_for_runlist() now correctly identify CE runlist 10 → PBDMA 9. - pushbuf: CE method definitions (VOLTA_DMA_COPY_A 0xC3B5) and pushbuffer builders (ce_init, ce_dma_copy, ce_semaphore_release). - server: sovereign.ce_validate RPC route + dispatch handler. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 5cd57cd commit a0f79bc

8 files changed

Lines changed: 1068 additions & 113 deletions

File tree

crates/core/cylinder/src/nv/pushbuf.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,51 @@ pub mod class {
6363
pub const AMPERE_COMPUTE_A: u32 = 0xC6C0;
6464
}
6565

66+
/// Copy Engine (CE / DMA_COPY) class identifiers and method offsets.
67+
pub mod ce {
68+
/// Volta DMA copy class (VOLTA_DMA_COPY_A).
69+
pub const VOLTA_DMA_COPY_A: u32 = 0xC3B5;
70+
71+
/// Method offsets for the DMA copy engine class.
72+
pub mod method {
73+
pub const SET_OBJECT: u32 = 0x0000;
74+
/// Source address upper 8 bits.
75+
pub const OFFSET_IN_UPPER: u32 = 0x0400;
76+
/// Source address lower 32 bits.
77+
pub const OFFSET_IN_LOWER: u32 = 0x0404;
78+
/// Dest address upper 8 bits.
79+
pub const OFFSET_OUT_UPPER: u32 = 0x0408;
80+
/// Dest address lower 32 bits.
81+
pub const OFFSET_OUT_LOWER: u32 = 0x040C;
82+
/// Pitch-in (bytes per row, source).
83+
pub const PITCH_IN: u32 = 0x0410;
84+
/// Pitch-out (bytes per row, dest).
85+
pub const PITCH_OUT: u32 = 0x0414;
86+
/// Line length in bytes.
87+
pub const LINE_LENGTH_IN: u32 = 0x0418;
88+
/// Line count.
89+
pub const LINE_COUNT: u32 = 0x041C;
90+
/// Launch DMA transfer.
91+
/// Bits: [1:0] = data_transfer_type (0=NONE, 1=PIPELINED, 2=NON_PIPELINED)
92+
/// [2] = flush_enable
93+
/// [8] = src_memory_layout (0=BLOCKLINEAR, 1=PITCH)
94+
/// [12] = dst_memory_layout (0=BLOCKLINEAR, 1=PITCH)
95+
/// [20] = src_type (0=VIRTUAL, 1=PHYSICAL)
96+
/// [24] = dst_type (0=VIRTUAL, 1=PHYSICAL)
97+
pub const LAUNCH_DMA: u32 = 0x0300;
98+
/// LAUNCH_DMA value: pipelined, pitch src+dst, virtual addressing.
99+
pub const LAUNCH_PIPELINED_PITCH: u32 = 0x0000_1101;
100+
/// Semaphore address upper (CE semaphore, not compute).
101+
pub const SET_SEMAPHORE_A: u32 = 0x0240;
102+
/// Semaphore address lower.
103+
pub const SET_SEMAPHORE_B: u32 = 0x0244;
104+
/// Semaphore payload.
105+
pub const SET_SEMAPHORE_PAYLOAD: u32 = 0x0248;
106+
/// Semaphore control: bit [0] = release after copy.
107+
pub const SEMAPHORE_CTRL: u32 = 0x024C;
108+
}
109+
}
110+
66111
/// Method offsets and constants for NVIDIA compute class push buffers.
67112
pub mod method {
68113
/// Bind an engine class to a subchannel.
@@ -364,6 +409,50 @@ impl PushBuf {
364409

365410
pb
366411
}
412+
413+
/// Build a CE (Copy Engine) init pushbuffer — binds the CE class on subchannel 0.
414+
#[must_use]
415+
pub fn ce_init(ce_class: u32) -> Self {
416+
let mut pb = Self::new();
417+
pb.push_1(0, ce::method::SET_OBJECT, ce_class);
418+
pb
419+
}
420+
421+
/// Build a CE DMA copy pushbuffer.
422+
///
423+
/// Copies `byte_count` bytes from `src_iova` to `dst_iova` using the CE
424+
/// LAUNCH_DMA method. Both addresses must be GPU-virtual (IOVA) from the
425+
/// same DMA domain.
426+
#[must_use]
427+
pub fn ce_dma_copy(src_iova: u64, dst_iova: u64, byte_count: u32) -> Self {
428+
let mut pb = Self::new();
429+
let sc = 0_u32;
430+
431+
pb.push_1(sc, ce::method::OFFSET_IN_UPPER, (src_iova >> 32) as u32);
432+
pb.push_1(sc, ce::method::OFFSET_IN_LOWER, src_iova as u32);
433+
pb.push_1(sc, ce::method::OFFSET_OUT_UPPER, (dst_iova >> 32) as u32);
434+
pb.push_1(sc, ce::method::OFFSET_OUT_LOWER, dst_iova as u32);
435+
pb.push_1(sc, ce::method::PITCH_IN, byte_count);
436+
pb.push_1(sc, ce::method::PITCH_OUT, byte_count);
437+
pb.push_1(sc, ce::method::LINE_LENGTH_IN, byte_count);
438+
pb.push_1(sc, ce::method::LINE_COUNT, 1);
439+
pb.push_1(sc, ce::method::LAUNCH_DMA, ce::method::LAUNCH_PIPELINED_PITCH);
440+
pb
441+
}
442+
443+
/// Build a CE semaphore release pushbuffer.
444+
///
445+
/// After the preceding copy completes, writes `payload` to `sem_iova`.
446+
#[must_use]
447+
pub fn ce_semaphore_release(sem_iova: u64, payload: u32) -> Self {
448+
let mut pb = Self::new();
449+
let sc = 0_u32;
450+
pb.push_1(sc, ce::method::SET_SEMAPHORE_A, (sem_iova >> 32) as u32);
451+
pb.push_1(sc, ce::method::SET_SEMAPHORE_B, sem_iova as u32);
452+
pb.push_1(sc, ce::method::SET_SEMAPHORE_PAYLOAD, payload);
453+
pb.push_1(sc, ce::method::SEMAPHORE_CTRL, 0x1);
454+
pb
455+
}
367456
}
368457

369458
impl Default for PushBuf {

0 commit comments

Comments
 (0)