Skip to content

Commit 34bdc40

Browse files
authored
[Backend] Skip global scratch offset computation when scratch size is zero (#9473)
`getGlobalScratchPtr()` was generating `GetNumProgramsOp` (lowered to `__ockl_get_num_groups` on AMD) even when global scratch size is 0. This breaks `test_link_extern_libs` on AMD after adding `GlobalScratchAllocation` pass to the pipeline.
1 parent ed6f1b7 commit 34bdc40

2 files changed

Lines changed: 17 additions & 1 deletion

File tree

lib/Conversion/TritonGPUToLLVM/Utility.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ Value getGlobalScratchPtr(Location loc, RewriterBase &rewriter,
12711271
ModuleOp mod = funcOp.getOperation()->getParentOfType<ModuleOp>();
12721272
auto allocSizeAttr = mod.getOperation()->getAttrOfType<mlir::IntegerAttr>(
12731273
"ttg.global_scratch_memory_size");
1274-
if (!allocSizeAttr) {
1274+
if (!allocSizeAttr || allocSizeAttr.getValue().isZero()) {
12751275
return gmemBase;
12761276
}
12771277

test/Conversion/amd/tritongpu_to_llvm.mlir

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,3 +732,19 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 1 : i32, "ttg.thr
732732
tt.return
733733
}
734734
}
735+
736+
// -----
737+
738+
// Make sure there is no rocdl.grid.dim.* generated when global_scratch_memory_size is 0.
739+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 64 : i32, ttg.global_scratch_memory_size = 0 : i32, ttg.global_scratch_memory_alignment = 1 : i32} {
740+
// CHECK-LABEL: @test_call_zero_scratch_no_grid_ops
741+
// CHECK-NOT: rocdl.grid.dim
742+
// CHECK: llvm.call @callee_zero_scratch
743+
tt.func public @test_call_zero_scratch_no_grid_ops() attributes {noinline = false} {
744+
tt.call @callee_zero_scratch() : () -> ()
745+
tt.return
746+
}
747+
tt.func private @callee_zero_scratch() attributes {noinline = true} {
748+
tt.return
749+
}
750+
}

0 commit comments

Comments
 (0)