|
20 | 20 | #NVMMA_SHARED_32 = #ttg.nvmma_shared<{swizzlingByteWidth = 32, transposed = false, elementBitWidth = 16}> |
21 | 21 | #NVMMA_SHARED_64 = #ttg.nvmma_shared<{swizzlingByteWidth = 64, transposed = false, elementBitWidth = 16}> |
22 | 22 | #NVMMA_SHARED_128 = #ttg.nvmma_shared<{swizzlingByteWidth = 128, transposed = false, elementBitWidth = 16}> |
| 23 | +#NVMMA_SHARED_FP4PADDED = #ttg.nvmma_shared<{swizzlingByteWidth = 128, transposed = false, elementBitWidth = 8, fp4Padded = true}> |
| 24 | + |
23 | 25 | #smem = #ttg.shared_memory |
24 | 26 |
|
25 | 27 | module attributes {"ttg.num-warps" = 4 : i32, "ttg.num-ctas" = 1 : i32} { |
@@ -917,6 +919,8 @@ tt.func @tightly_packed_captures(%arg0: i8, %arg1: i64) { |
917 | 919 | // expected-remark @below {{nvmma_alignment}} |
918 | 920 | // expected-remark @below {{size = 1088}} |
919 | 921 | tt.func @nvmma_alignment(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) { |
| 922 | + // expected-remark @below {{offset = 0, size = 128}} |
| 923 | + %fp4 = ttg.local_alloc : () -> !ttg.memdesc<8x8xi8, #NVMMA_SHARED_FP4PADDED, #ttg.shared_memory, mutable> |
920 | 924 | // expected-remark @below {{offset = 0, size = 64}} |
921 | 925 | %a = ttg.local_alloc : () -> !ttg.memdesc<32xf16, #A_SHARED, #ttg.shared_memory, mutable> |
922 | 926 | // expected-remark @below {{offset = 128, size = 64}} |
|
0 commit comments