@@ -63,6 +63,51 @@ pub mod class {
6363 pub const AMPERE_COMPUTE_A : u32 = 0xC6C0 ;
6464}
6565
66+ /// Copy Engine (CE / DMA_COPY) class identifiers and method offsets.
67+ pub mod ce {
68+ /// Volta DMA copy class (VOLTA_DMA_COPY_A).
69+ pub const VOLTA_DMA_COPY_A : u32 = 0xC3B5 ;
70+
71+ /// Method offsets for the DMA copy engine class.
72+ pub mod method {
73+ pub const SET_OBJECT : u32 = 0x0000 ;
74+ /// Source address upper 8 bits.
75+ pub const OFFSET_IN_UPPER : u32 = 0x0400 ;
76+ /// Source address lower 32 bits.
77+ pub const OFFSET_IN_LOWER : u32 = 0x0404 ;
78+ /// Dest address upper 8 bits.
79+ pub const OFFSET_OUT_UPPER : u32 = 0x0408 ;
80+ /// Dest address lower 32 bits.
81+ pub const OFFSET_OUT_LOWER : u32 = 0x040C ;
82+ /// Pitch-in (bytes per row, source).
83+ pub const PITCH_IN : u32 = 0x0410 ;
84+ /// Pitch-out (bytes per row, dest).
85+ pub const PITCH_OUT : u32 = 0x0414 ;
86+ /// Line length in bytes.
87+ pub const LINE_LENGTH_IN : u32 = 0x0418 ;
88+ /// Line count.
89+ pub const LINE_COUNT : u32 = 0x041C ;
90+ /// Launch DMA transfer.
91+ /// Bits: [1:0] = data_transfer_type (0=NONE, 1=PIPELINED, 2=NON_PIPELINED)
92+ /// [2] = flush_enable
93+ /// [8] = src_memory_layout (0=BLOCKLINEAR, 1=PITCH)
94+ /// [12] = dst_memory_layout (0=BLOCKLINEAR, 1=PITCH)
95+ /// [20] = src_type (0=VIRTUAL, 1=PHYSICAL)
96+ /// [24] = dst_type (0=VIRTUAL, 1=PHYSICAL)
97+ pub const LAUNCH_DMA : u32 = 0x0300 ;
98+ /// LAUNCH_DMA value: pipelined, pitch src+dst, virtual addressing.
99+ pub const LAUNCH_PIPELINED_PITCH : u32 = 0x0000_1101 ;
100+ /// Semaphore address upper (CE semaphore, not compute).
101+ pub const SET_SEMAPHORE_A : u32 = 0x0240 ;
102+ /// Semaphore address lower.
103+ pub const SET_SEMAPHORE_B : u32 = 0x0244 ;
104+ /// Semaphore payload.
105+ pub const SET_SEMAPHORE_PAYLOAD : u32 = 0x0248 ;
106+ /// Semaphore control: bit [0] = release after copy.
107+ pub const SEMAPHORE_CTRL : u32 = 0x024C ;
108+ }
109+ }
110+
66111/// Method offsets and constants for NVIDIA compute class push buffers.
67112pub mod method {
68113 /// Bind an engine class to a subchannel.
@@ -364,6 +409,50 @@ impl PushBuf {
364409
365410 pb
366411 }
412+
413+ /// Build a CE (Copy Engine) init pushbuffer — binds the CE class on subchannel 0.
414+ #[ must_use]
415+ pub fn ce_init ( ce_class : u32 ) -> Self {
416+ let mut pb = Self :: new ( ) ;
417+ pb. push_1 ( 0 , ce:: method:: SET_OBJECT , ce_class) ;
418+ pb
419+ }
420+
421+ /// Build a CE DMA copy pushbuffer.
422+ ///
423+ /// Copies `byte_count` bytes from `src_iova` to `dst_iova` using the CE
424+ /// LAUNCH_DMA method. Both addresses must be GPU-virtual (IOVA) from the
425+ /// same DMA domain.
426+ #[ must_use]
427+ pub fn ce_dma_copy ( src_iova : u64 , dst_iova : u64 , byte_count : u32 ) -> Self {
428+ let mut pb = Self :: new ( ) ;
429+ let sc = 0_u32 ;
430+
431+ pb. push_1 ( sc, ce:: method:: OFFSET_IN_UPPER , ( src_iova >> 32 ) as u32 ) ;
432+ pb. push_1 ( sc, ce:: method:: OFFSET_IN_LOWER , src_iova as u32 ) ;
433+ pb. push_1 ( sc, ce:: method:: OFFSET_OUT_UPPER , ( dst_iova >> 32 ) as u32 ) ;
434+ pb. push_1 ( sc, ce:: method:: OFFSET_OUT_LOWER , dst_iova as u32 ) ;
435+ pb. push_1 ( sc, ce:: method:: PITCH_IN , byte_count) ;
436+ pb. push_1 ( sc, ce:: method:: PITCH_OUT , byte_count) ;
437+ pb. push_1 ( sc, ce:: method:: LINE_LENGTH_IN , byte_count) ;
438+ pb. push_1 ( sc, ce:: method:: LINE_COUNT , 1 ) ;
439+ pb. push_1 ( sc, ce:: method:: LAUNCH_DMA , ce:: method:: LAUNCH_PIPELINED_PITCH ) ;
440+ pb
441+ }
442+
443+ /// Build a CE semaphore release pushbuffer.
444+ ///
445+ /// After the preceding copy completes, writes `payload` to `sem_iova`.
446+ #[ must_use]
447+ pub fn ce_semaphore_release ( sem_iova : u64 , payload : u32 ) -> Self {
448+ let mut pb = Self :: new ( ) ;
449+ let sc = 0_u32 ;
450+ pb. push_1 ( sc, ce:: method:: SET_SEMAPHORE_A , ( sem_iova >> 32 ) as u32 ) ;
451+ pb. push_1 ( sc, ce:: method:: SET_SEMAPHORE_B , sem_iova as u32 ) ;
452+ pb. push_1 ( sc, ce:: method:: SET_SEMAPHORE_PAYLOAD , payload) ;
453+ pb. push_1 ( sc, ce:: method:: SEMAPHORE_CTRL , 0x1 ) ;
454+ pb
455+ }
367456}
368457
369458impl Default for PushBuf {
0 commit comments