Skip to content

Commit 15f9161

Browse files
authored
[Hopper] Move dummy moves before wgmma.fence (#9530)
0a0a57c introduced these moves before `wgmma.mma_async`, but producer and consumer of register inputs need to be separated by a `wgmma.fence`. See also: #9433
1 parent 434aecb commit 15f9161

2 files changed

Lines changed: 3 additions & 0 deletions

File tree

test/Conversion/tritongpu_to_llvm_hopper.mlir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
140140
// CHECK: %[[A_MOV1:.*]] = llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "mov.b32 $0, $1;", "=r,r" %{{.*}} : (i32) -> i32
141141
// CHECK: %[[A_MOV2:.*]] = llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "mov.b32 $0, $1;", "=r,r" %{{.*}} : (i32) -> i32
142142
// CHECK: %[[A_MOV3:.*]] = llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "mov.b32 $0, $1;", "=r,r" %{{.*}} : (i32) -> i32
143+
// CHECK: nvvm.wgmma.fence.aligned
143144
// CHECK: %[[A_PACK0:.*]] = llvm.insertvalue %[[A_MOV0]], %{{.*}}[0] : !llvm.struct<(i32, i32, i32, i32)>
144145
// CHECK: %[[A_PACK1:.*]] = llvm.insertvalue %[[A_MOV1]], %[[A_PACK0]][1] : !llvm.struct<(i32, i32, i32, i32)>
145146
// CHECK: %[[A_PACK2:.*]] = llvm.insertvalue %[[A_MOV2]], %[[A_PACK1]][2] : !llvm.struct<(i32, i32, i32, i32)>

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,8 @@ LogicalResult convertDot(const LLVMTypeConverter *typeConverter,
302302
// WGMMA, we can safely overlap transformations on the A operand with
303303
// the previous-iteration WGMMA still in flight.
304304
for (Value &regAVal : regA) {
305+
OpBuilder::InsertionGuard guard(rewriter);
306+
rewriter.setInsertionPoint(startSequence);
305307
Type regTy = regAVal.getType();
306308
if (!regTy.isIntOrFloat() || regTy.getIntOrFloatBitWidth() != 32) {
307309
return mlir::emitError(loc, "unsupported WGMMA A register type ")

0 commit comments

Comments
 (0)