Skip to content

Commit b939bc3

Browse files
committed
[AIE] ReservedRegsLICM: fix livein detection for multi-block loops
The flat sequential backward walk in collectLoopReservedLiveins was incorrect for loops with conditional paths. When L.getBlocks() returns blocks in RPO order (header first, latch last), a def in one branch could remove a register from the live set after a sibling branch's use had already added it back. This caused the register to be incorrectly classified as not a loop livein, allowing processForExitSink to sink its definition to the exit block even though the register carried a pre-loop value on some paths. For example, in a loop where bb.2 uses $srssign0 and bb.3 defines it (both successors of the header), the old code would process bb.2's use first (adding $srssign0 to the live set) and then bb.3's def (removing it), concluding $srssign0 is not a livein. But $srssign0 IS a livein because bb.2 can execute on the first iteration without going through bb.3. Fix this by replacing the flat walk with a backward dataflow worklist algorithm that correctly propagates liveness through the CFG until a fixed point is reached. The algorithm initialises live-in sets to empty and grows them monotonically, re-queuing predecessors whenever a block's live-in set changes. Add a test case (livein_bug_no_sink) to the aie2ps reserved-reg-licm test that reproduces the bug and verifies the MOVX is not sunk.
1 parent a6e4dbd commit b939bc3

2 files changed

Lines changed: 141 additions & 11 deletions

File tree

llvm/lib/Target/AIE/ReservedRegsLICM.cpp

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "AIE.h"
1919
#include "llvm/ADT/BitVector.h"
20+
#include "llvm/ADT/SmallPtrSet.h"
2021
#include "llvm/CodeGen/LivePhysRegs.h"
2122
#include "llvm/CodeGen/MachineFunctionPass.h"
2223
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -234,28 +235,75 @@ bool ReservedRegsLICM::runOnMachineFunction(MachineFunction &MF) {
234235
}
235236

236237
BitVector ReservedRegsLICM::collectLoopReservedLiveins(const MachineLoop &L) {
237-
LivePhysRegs LiveRegs(*TRI);
238-
239-
// Conservatively assume reserved regs are all liveouts
238+
// Compute the live-in set at the loop header using a backward dataflow
239+
// worklist algorithm. A flat sequential backward walk over all loop blocks
240+
// is incorrect for multi-block loops with conditional paths: a def in one
241+
// branch can incorrectly kill a register that is used (without a preceding
242+
// def) in a sibling branch, making the register appear to not be a livein.
243+
//
244+
// The worklist algorithm propagates liveness backward through the CFG until
245+
// a fixed point is reached, correctly handling all control-flow shapes.
246+
247+
// Conservative liveout assumption: all candidate reserved regs are live at
248+
// the exit of the loop (they may be used by code after the loop).
249+
BitVector InitLive(TRI->getNumRegs());
240250
for (MCPhysReg PhysReg : MRI->getReservedRegs().set_bits()) {
241251
if (MRI->canSimplifyPhysReg(PhysReg)) {
242-
LiveRegs.addReg(PhysReg);
252+
InitLive.set(PhysReg);
243253
}
244254
}
245255

246-
// Traverse all blocks in the loop to remove defs. stepBackward handles
247-
// regmask operands (calls) correctly.
256+
// live_in[MBB] = set of reserved registers live at the entry of MBB.
257+
// Initialised to empty; the worklist grows these sets monotonically.
258+
DenseMap<MachineBasicBlock *, BitVector> LiveIn;
248259
for (MachineBasicBlock *MBB : L.getBlocks())
260+
LiveIn[MBB] = BitVector(TRI->getNumRegs());
261+
262+
// Seed the worklist with every block in the loop.
263+
SmallPtrSet<MachineBasicBlock *, 8> InWorklist;
264+
SmallVector<MachineBasicBlock *, 8> Worklist;
265+
for (MachineBasicBlock *MBB : L.getBlocks()) {
266+
Worklist.push_back(MBB);
267+
InWorklist.insert(MBB);
268+
}
269+
270+
while (!Worklist.empty()) {
271+
MachineBasicBlock *MBB = Worklist.pop_back_val();
272+
InWorklist.erase(MBB);
273+
274+
// live-out(MBB) = union of live-in of successors.
275+
// For successors outside the loop use InitLive conservatively.
276+
LivePhysRegs LiveRegs(*TRI);
277+
for (MachineBasicBlock *Succ : MBB->successors()) {
278+
const BitVector &SuccLive = L.contains(Succ) ? LiveIn[Succ] : InitLive;
279+
for (unsigned Reg : SuccLive.set_bits())
280+
LiveRegs.addReg(Reg);
281+
}
282+
283+
// Walk backward through MBB to compute live-in. stepBackward handles
284+
// regmask operands (calls) correctly.
249285
for (const MachineInstr &MI : reverse(*MBB))
250286
LiveRegs.stepBackward(MI);
251287

252-
BitVector ReservedLiveins(TRI->getNumRegs());
253-
for (MCRegister Reg : LiveRegs) {
254-
if (MRI->isReserved(Reg)) {
255-
ReservedLiveins.set(Reg);
288+
// Convert to a BitVector, keeping only reserved registers.
289+
BitVector NewLiveIn(TRI->getNumRegs());
290+
for (MCRegister Reg : LiveRegs)
291+
if (MRI->isReserved(Reg))
292+
NewLiveIn.set(Reg);
293+
294+
// If the live-in set grew, re-process all in-loop predecessors.
295+
if (NewLiveIn != LiveIn[MBB]) {
296+
LiveIn[MBB] = NewLiveIn;
297+
for (MachineBasicBlock *Pred : MBB->predecessors()) {
298+
if (L.contains(Pred) && !InWorklist.count(Pred)) {
299+
Worklist.push_back(Pred);
300+
InWorklist.insert(Pred);
301+
}
302+
}
256303
}
257304
}
258-
return ReservedLiveins;
305+
306+
return LiveIn[L.getHeader()];
259307
}
260308

261309
/// Walk the specified region of the CFG and hoist loop invariants out to the

llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,88 @@ body: |
436436
PseudoJ_jump_imm %bb.1
437437
...
438438

439+
# $srssign0 is used in one conditional branch (bb.2) but defined in the other
440+
# (bb.3). The latch (bb.4) defines $srssign0 = MOVX 0 without using it first,
441+
# so LatchEntryLive does NOT flag it. However, $srssign0 IS a true loop livein
442+
# because bb.2 can execute on the first iteration without going through bb.3.
443+
# collectLoopReservedLiveins must detect this via backward dataflow and prevent
444+
# the MOVX from being sunk to the exit block.
445+
---
446+
name: livein_bug_no_sink
447+
tracksRegLiveness: true
448+
body: |
449+
; CHECK-LABEL: name: livein_bug_no_sink
450+
; CHECK: bb.0:
451+
; CHECK-NEXT: successors: %bb.1(0x80000000)
452+
; CHECK-NEXT: liveins: $p0, $r0, $r1, $dm0, $s0
453+
; CHECK-NEXT: {{ $}}
454+
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0
455+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0
456+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1
457+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0
458+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0
459+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
460+
; CHECK-NEXT: {{ $}}
461+
; CHECK-NEXT: bb.1:
462+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
463+
; CHECK-NEXT: {{ $}}
464+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.3
465+
; CHECK-NEXT: {{ $}}
466+
; CHECK-NEXT: bb.2:
467+
; CHECK-NEXT: successors: %bb.4(0x80000000)
468+
; CHECK-NEXT: {{ $}}
469+
; CHECK-NEXT: [[VSRS_4x_mv_x_srs_dm_srsSign0_:%[0-9]+]]:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 [[COPY3]], [[COPY4]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0
470+
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
471+
; CHECK-NEXT: {{ $}}
472+
; CHECK-NEXT: bb.3:
473+
; CHECK-NEXT: successors: %bb.4(0x80000000)
474+
; CHECK-NEXT: {{ $}}
475+
; CHECK-NEXT: $srssign0 = COPY [[COPY2]]
476+
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
477+
; CHECK-NEXT: {{ $}}
478+
; CHECK-NEXT: bb.4:
479+
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.5(0x04000000)
480+
; CHECK-NEXT: {{ $}}
481+
; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0
482+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1
483+
; CHECK-NEXT: PseudoJ_jump_imm %bb.5
484+
; CHECK-NEXT: {{ $}}
485+
; CHECK-NEXT: bb.5:
486+
; CHECK-NEXT: PseudoRET implicit $lr
487+
bb.0:
488+
successors: %bb.1(0x80000000)
489+
liveins: $p0, $r0, $r1, $dm0, $s0
490+
%0:er = COPY $r0
491+
%1:ep = COPY $p0
492+
%2:er = COPY $r1
493+
%3:edm = COPY $dm0
494+
%4:es = COPY $s0
495+
PseudoJ_jump_imm %bb.1
496+
497+
bb.1:
498+
successors: %bb.2(0x40000000), %bb.3(0x40000000)
499+
PseudoJNZ %0, %bb.3
500+
501+
bb.2:
502+
successors: %bb.4(0x80000000)
503+
%5:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 %3, %4, implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0
504+
PseudoJ_jump_imm %bb.4
505+
506+
bb.3:
507+
successors: %bb.4(0x80000000)
508+
$srssign0 = COPY %2
509+
PseudoJ_jump_imm %bb.4
510+
511+
bb.4:
512+
successors: %bb.1(0x7c000000), %bb.5(0x04000000)
513+
$srssign0 = MOVX_mvx_cr_imm 0
514+
PseudoJNZ %0, %bb.1
515+
PseudoJ_jump_imm %bb.5
516+
517+
bb.5:
518+
PseudoRET implicit $lr
519+
...
520+
439521
# $srssign0 is defined in one conditional branch (bb.2) but not the other
440522
# (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is
441523
# live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0

0 commit comments

Comments
 (0)