Skip to content

Commit 281187d

Browse files
[triton][beta] Backport '[Triton] Fix LLVMDILocalVariable pass crash with LLVM_EXTRACT_DI_LOCAL_VARIABLES (#10042)' (#1390)
Summary: This is a backport of an upstream PR: triton-lang/triton#10042 Upstream commit message: ``` > [Triton] Fix LLVMDILocalVariable pass crash with LLVM_EXTRACT_DI_LOCAL_VARIABLES (#10042) > Summary: > Fix multiple bugs in the LLVMDILocalVariable pass that caused crashes > when running with LLVM_EXTRACT_DI_LOCAL_VARIABLES=1. > Root cause: The MLIR DIRecursiveTypeAttrInterface uses isRecSelf=true as > a placeholder and isRecSelf=false as its resolver. During MLIR-to-LLVM > IR translation, the resolver recursiveNodeMap entry is temporary (popped > after translation). Any isRecSelf=true references encountered outside > the resolver translation scope trigger an assertion. > The add_di_scope pass creates a DISubprogramAttr with isRecSelf=true and > embeds it in function locations and DILexicalBlockFileAttr scopes in > operation locations. The LLVMDILocalVariable pass was supposed to > resolve these but had several issues. > Fixes: > 1. Added null/scope guard for diSubprogramAttr to skip module-level ops > that appear before or between functions in the walk. > 2. Fixed getDISubprogramAttr(Operation op) to take Operation *op. > 3. Restructured fuseFuncArgVariables to create two sets of > DILocalVariableAttr: one with isRecSelf=true scope for retainedNodes > (correct recursive pattern), and separate ones with the resolved > isRecSelf=false scope for DbgValueOps (avoids unresolvable refs). > 4. Added fixLexicalBlockScopes to walk operations and replace > DILexicalBlockFileAttr scopes that reference the old isRecSelf=true > subprogram with the resolved version. > 5. Value-initialized diSubprogramAttr to avoid undefined behavior. > Test Plan: > TRITON_ALWAYS_COMPILE=1 LLVM_EXTRACT_DI_LOCAL_VARIABLES=1 \ > python3 python/tutorials/02-fused-softmax.py > <!--- > The core Triton is a small number of people, and we receive many PRs > (thank > you!). To help us review your code more quickly, **if you are a new > contributor (less than 3 PRs merged) we ask that you complete the > following > tasks and include the filled-out checklist in your PR description.** > Complete the following tasks before sending your PR, and replace `[ ]` > with > `[x]` to indicate you have done them. > --> > # New contributor declaration > - [x] I am not making a trivial change, such as fixing a typo in a > comment. > - [x] I have written a PR description following these > [rules](https://cbea.ms/git-commit/#why-not-how). > - [x] I have run `pre-commit run --from-ref origin/main --to-ref HEAD`. > - Select one of the following. > - [X] I have added tests. > - `/test` for `lit` tests > - `/unittest` for C++ tests > - `/python/test` for end-to-end tests > - [ ] This PR does not need a test because `FILL THIS IN`. > - Select one of the following. > - [ ] I have not added any `lit` tests. > - [x] The `lit` tests I have added follow these [best > practices](https://mlir.llvm.org/getting_started/TestingGuide/#filecheck-best-practices), > including the "tests should be minimal" section. (Usually running Python > code > and using the instructions it generates is not minimal.) > Co-authored-by: Bill Yoshimi <byoshimi@fb.com> > Co-authored-by: Corbin Robeck <robeck@meta.com> ``` ***Do not remove the following line from this commit*** Reactor Backport Revision: 5b3965b27eb331094854963b751709bcb148c3d4 --- This diff was generated by running: ``` buck run fbcode//triton/tools/reactor:reactor -- backport --pr 10042 ``` Differential Revision: D102859687
1 parent aac961f commit 281187d

2 files changed

Lines changed: 165 additions & 20 deletions

File tree

lib/Target/LLVMIR/LLVMDILocalVariable.cpp

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ struct LLVMDILocalVariablePass
3434
return;
3535
}
3636

37+
// Skip ops outside of a function (e.g. module-level globals) where
38+
// diSubprogramAttr may not be valid for this op's scope.
39+
if (!diSubprogramAttr || !op->getParentOfType<LLVM::LLVMFuncOp>()) {
40+
return;
41+
}
42+
3743
MLIRContext *context = op->getContext();
3844
OpBuilder builder(context);
3945
Location loc = op->getLoc();
@@ -201,7 +207,7 @@ struct LLVMDILocalVariablePass
201207

202208
// construct a subprogram of an operation by using its parent function's
203209
// DISubprogramAttr construction
204-
LLVM::DISubprogramAttr getDISubprogramAttr(Operation op) {
210+
LLVM::DISubprogramAttr getDISubprogramAttr(Operation &op) {
205211
auto funcOp = op.getParentOfType<LLVM::LLVMFuncOp>();
206212
return getDISubprogramAttr(funcOp);
207213
}
@@ -213,20 +219,31 @@ struct LLVMDILocalVariablePass
213219
MLIRContext *context = &getContext();
214220
OpBuilder builder(context);
215221
builder.setInsertionPointToStart(&funcOp.getBody().front());
216-
llvm::SmallVector<mlir::LLVM::DINodeAttr> retainedNodes;
217222

218223
LLVM::DIFileAttr fileAttr = subprogramAttr.getFile();
219224
LLVM::DISubroutineTypeAttr subroutineTypeAttr = subprogramAttr.getType();
220225
int64_t line = subprogramAttr.getLine();
221-
auto localScopeAttr = dyn_cast<LLVM::DILocalScopeAttr>(subprogramAttr);
226+
// The input subprogramAttr has isRecSelf=true. Use it as the scope for
227+
// retainedNodes variables (proper recursive self-reference pattern).
228+
auto selfRefScopeAttr = dyn_cast<LLVM::DILocalScopeAttr>(subprogramAttr);
222229
auto diFlag = LLVM::DIFlags::Zero;
223230

224-
// Extract function arguments and add them to retainedNodes:
225-
// 0. Extract function argument types from subroutineTypeAttr
226-
// 1. Create DILocalVariable and DebugValueOp for each arg
227-
// 2. Add each arg as DILocalVariableAttr to retainedNodes
231+
// Collect argument info and create retainedNodes variables scoped to the
232+
// isRecSelf=true placeholder (these are nested inside the definition and
233+
// resolved during translation).
228234
auto argTypeAttrs = subroutineTypeAttr.getTypes();
229235
unsigned resNum = funcOp.getNumResults() ? funcOp.getNumResults() : 1;
236+
237+
struct ArgInfo {
238+
unsigned argIdx;
239+
BlockArgument arg;
240+
Location childLoc;
241+
StringAttr nameAttr;
242+
LLVM::DITypeAttr typeAttr;
243+
};
244+
llvm::SmallVector<ArgInfo> argInfos;
245+
llvm::SmallVector<mlir::LLVM::DINodeAttr> retainedNodes;
246+
230247
for (unsigned idx = resNum; idx < argTypeAttrs.size(); idx++) {
231248
LLVM::DITypeAttr argTypeAttr = argTypeAttrs[idx];
232249
unsigned argIdx = idx - resNum;
@@ -236,20 +253,18 @@ struct LLVMDILocalVariablePass
236253
auto nameLoc = dyn_cast<NameLoc>(argLoc);
237254
if (!nameLoc)
238255
continue;
239-
Location childLoc = nameLoc.getChildLoc();
240-
StringAttr nameAttr = nameLoc.getName();
241256

257+
// Create variable with isRecSelf=true scope for retainedNodes
242258
auto argVarAttr = LLVM::DILocalVariableAttr::get(
243-
context, localScopeAttr, nameAttr, fileAttr, line, argIdx + 1, 0,
244-
argTypeAttr, diFlag);
245-
246-
auto exprAttr = LLVM::DIExpressionAttr::get(context);
247-
(void)LLVM::DbgValueOp::create(builder, childLoc, arg, argVarAttr,
248-
exprAttr);
249-
259+
context, selfRefScopeAttr, nameLoc.getName(), fileAttr, line,
260+
argIdx + 1, 0, argTypeAttr, diFlag);
250261
retainedNodes.push_back(argVarAttr);
262+
263+
argInfos.push_back(
264+
{argIdx, arg, nameLoc.getChildLoc(), nameLoc.getName(), argTypeAttr});
251265
}
252266

267+
// Create the resolved subprogram (isRecSelf=false) with retainedNodes.
253268
mlir::DistinctAttr recId = subprogramAttr.getRecId();
254269
mlir::DistinctAttr id =
255270
mlir::DistinctAttr::create(mlir::UnitAttr::get(context));
@@ -262,23 +277,112 @@ struct LLVMDILocalVariablePass
262277
funcNameAttr, funcNameAttr, fileAttr, line, line, subprogramFlags,
263278
subroutineTypeAttr, retainedNodes, /*annotations=*/{});
264279

280+
// Now create DbgValueOps with variables scoped to the RESOLVED subprogram.
281+
// The isRecSelf=true scope must NOT appear in DbgValueOps because the
282+
// translator's recursiveNodeMap entry is only valid during translation of
283+
// the isRecSelf=false definition and is popped afterwards.
284+
auto resolvedScopeAttr = dyn_cast<LLVM::DILocalScopeAttr>(subprogramAttr);
285+
for (auto &info : argInfos) {
286+
auto argVarAttr = LLVM::DILocalVariableAttr::get(
287+
context, resolvedScopeAttr, info.nameAttr, fileAttr, line,
288+
info.argIdx + 1, 0, info.typeAttr, diFlag);
289+
auto exprAttr = LLVM::DIExpressionAttr::get(context);
290+
(void)LLVM::DbgValueOp::create(builder, info.childLoc, info.arg,
291+
argVarAttr, exprAttr);
292+
}
293+
265294
Location loc = funcOp.getLoc();
266-
// Reset the subprogramAttr with retainedNodes to the funcOp
295+
// Unwrap the old FusedLoc to avoid nesting a stale isRecSelf=true
296+
// subprogram in the location chain.
297+
if (auto fusedLoc = dyn_cast<FusedLoc>(loc)) {
298+
if (fusedLoc.getMetadata() &&
299+
isa<LLVM::DISubprogramAttr>(fusedLoc.getMetadata())) {
300+
loc = FusedLoc::get(context, fusedLoc.getLocations(), Attribute());
301+
}
302+
}
267303
funcOp->setLoc(mlir::FusedLoc::get(context, {loc}, subprogramAttr));
268304
return subprogramAttr;
269305
}
270306

307+
// After creating the resolved subprogram, fix any DILexicalBlockFileAttr
308+
// locations in the function that still reference the old isRecSelf=true
309+
// subprogram. These were created by add_di_scope before this pass ran.
310+
void fixLexicalBlockScopes(LLVM::LLVMFuncOp funcOp,
311+
LLVM::DISubprogramAttr oldSubprogram,
312+
LLVM::DISubprogramAttr newSubprogram) {
313+
MLIRContext *context = &getContext();
314+
funcOp.walk([&](Operation *op) {
315+
Location loc = op->getLoc();
316+
Location newLoc =
317+
replaceLexicalBlockScope(context, loc, oldSubprogram, newSubprogram);
318+
if (newLoc != loc)
319+
op->setLoc(newLoc);
320+
});
321+
}
322+
323+
Location replaceLexicalBlockScope(MLIRContext *context, Location loc,
324+
LLVM::DISubprogramAttr oldSP,
325+
LLVM::DISubprogramAttr newSP) {
326+
if (auto fusedLoc = dyn_cast<FusedLoc>(loc)) {
327+
// Recursively fix inner locations
328+
SmallVector<Location> newLocs;
329+
bool changed = false;
330+
for (Location inner : fusedLoc.getLocations()) {
331+
Location fixed = replaceLexicalBlockScope(context, inner, oldSP, newSP);
332+
newLocs.push_back(fixed);
333+
if (fixed != inner)
334+
changed = true;
335+
}
336+
337+
auto metadata = fusedLoc.getMetadata();
338+
if (auto lexBlock =
339+
dyn_cast_or_null<LLVM::DILexicalBlockFileAttr>(metadata)) {
340+
if (auto scope =
341+
dyn_cast_or_null<LLVM::DISubprogramAttr>(lexBlock.getScope())) {
342+
if (scope == oldSP) {
343+
auto newBlock = LLVM::DILexicalBlockFileAttr::get(
344+
context, newSP, lexBlock.getFile(),
345+
lexBlock.getDiscriminator());
346+
return FusedLoc::get(context, newLocs, newBlock);
347+
}
348+
}
349+
}
350+
351+
if (changed)
352+
return FusedLoc::get(context, newLocs, metadata);
353+
} else if (auto callSiteLoc = dyn_cast<CallSiteLoc>(loc)) {
354+
Location newCallee = replaceLexicalBlockScope(
355+
context, callSiteLoc.getCallee(), oldSP, newSP);
356+
Location newCaller = replaceLexicalBlockScope(
357+
context, callSiteLoc.getCaller(), oldSP, newSP);
358+
if (newCallee != callSiteLoc.getCallee() ||
359+
newCaller != callSiteLoc.getCaller())
360+
return CallSiteLoc::get(newCallee, newCaller);
361+
}
362+
return loc;
363+
}
364+
271365
// set it while traversing into a function
272-
LLVM::DISubprogramAttr diSubprogramAttr;
366+
LLVM::DISubprogramAttr diSubprogramAttr = {};
273367

274368
void runOnOperation() override {
275369
Operation *op = getOperation();
276370

277371
getOperation()->walk<WalkOrder::PreOrder>([&](Operation *op) -> void {
278372
if (isa<LLVM::LLVMFuncOp>(op)) {
279373
auto funcOp = cast<LLVM::LLVMFuncOp>(op);
280-
diSubprogramAttr = getDISubprogramAttr(funcOp);
281-
diSubprogramAttr = fuseFuncArgVariables(funcOp, diSubprogramAttr);
374+
auto oldSubprogram = getDISubprogramAttr(funcOp);
375+
// External declarations (e.g., runtime builtins like vprintf) have no
376+
// body, so we cannot insert debug value intrinsics into them.
377+
if (!funcOp.isExternal()) {
378+
diSubprogramAttr = fuseFuncArgVariables(funcOp, oldSubprogram);
379+
// Fix DILexicalBlockFileAttr locations that still reference the old
380+
// isRecSelf=true subprogram from add_di_scope.
381+
if (oldSubprogram.getIsRecSelf())
382+
fixLexicalBlockScopes(funcOp, oldSubprogram, diSubprogramAttr);
383+
} else {
384+
diSubprogramAttr = oldSubprogram;
385+
}
282386
} else {
283387
fuseDILocalVariable(op);
284388
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: LLVM_EXTRACT_DI_LOCAL_VARIABLES=1 triton-opt %s -o - --mlir-print-debuginfo --mlir-use-nameloc-as-prefix --enable-line-info --extract-variable-info | \
2+
// RUN: mlir-translate --mlir-to-llvmir | FileCheck %s
3+
4+
// Regression test for a crash when CallSiteLoc operations coexist with
5+
// NameLoc operations in the same function. When enable-line-info runs with
6+
// LLVM_EXTRACT_DI_LOCAL_VARIABLES=1, it creates a DISubprogramAttr with
7+
// isRecSelf=true and wraps CallSiteLoc ops in DILexicalBlockFileAttr that
8+
// references the isRecSelf=true subprogram. The extract-variable-info pass
9+
// must fix these lexical block scopes to reference the resolved
10+
// (isRecSelf=false) subprogram via fixLexicalBlockScopes, otherwise
11+
// mlir-translate crashes with an assertion in DebugTranslation.
12+
13+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
14+
llvm.func @kernel_with_callsite(%arg0: !llvm.ptr<1> loc(#loc_arg0), %arg1: i32 loc(#loc_arg1), %arg2: !llvm.ptr<1>) {
15+
// CHECK-DAG: distinct !DISubprogram({{.*}}, retainedNodes:
16+
// CHECK-DAG: !DILocalVariable(name: "input_ptr", arg: 1, scope:
17+
// CHECK-DAG: !DILocalVariable(name: "n_elements", arg: 2, scope:
18+
19+
%c = llvm.mlir.constant(42 : i32) : i32
20+
21+
// This op has a CallSiteLoc, triggering DILexicalBlockFileAttr creation
22+
// in add_di_scope. Without fixLexicalBlockScopes, the stale isRecSelf=true
23+
// reference would cause an assertion failure in mlir-translate.
24+
%sum = llvm.add %arg1, %c : i32 loc(#loc_callsite1)
25+
26+
// This op has a NameLoc, triggering DILocalVariable creation in
27+
// fuseDILocalVariable.
28+
// CHECK-DAG: !DILocalVariable(name: "result", scope:
29+
%result = llvm.mul %sum, %c : i32 loc(#loc_result)
30+
31+
llvm.return
32+
}
33+
}
34+
#loc_base = loc("kernel.py":10:0)
35+
#loc_arg0 = loc("input_ptr"(#loc_base))
36+
#loc_arg1 = loc("n_elements"(#loc_base))
37+
#loc_callee = loc("helper.py":5:3)
38+
#loc_caller = loc("kernel.py":20:5)
39+
#loc_callsite1 = loc(callsite(#loc_callee at #loc_caller))
40+
#loc_result_base = loc("kernel.py":25:10)
41+
#loc_result = loc("result"(#loc_result_base))

0 commit comments

Comments
 (0)