Skip to content

Commit 447ee79

Browse files
committed
[AIE] ReservedRegsLICM: hoist/sink save/restore brackets
Some loops protect a reserved register with a save/restore bracket: vreg = COPY $reserved_reg ; save pre-loop value $reserved_reg = MOVX imm ; set loop-invariant value ... uses of $reserved_reg ... $reserved_reg = COPY vreg ; restore pre-loop value The existing hoisting and sinking passes cannot handle this pattern: the register is a loop livein (the save reads it before the set defines it), and it has two defs (set + restore), so both getUniqueDef and the livein guard block any transformation. Add findSaveRestoreBracket / processSaveRestoreBracket to detect and transform the pattern as a unit. The detection checks that: - the save is a vreg = COPY $reserved_reg with exactly one use (the restore), - no use of the register occurs between the save and the set, - the set is loop-invariant, - the restore is the last def of the register in the latch and copies exactly the vreg from the save, - the register is not used in any non-latch block of the loop (which would observe the wrong value after the set is hoisted). When the pattern matches, the save and set are moved to the preheader (in order, so the save captures the pre-loop value before the set overwrites it) and the restore is sinked to the exit block. The loop body then always sees the loop-invariant value with no per-iteration save/restore overhead. Add a test (crsat_save_restore_bracket) that verifies the transformation for $crsat.
1 parent dbdf947 commit 447ee79

2 files changed

Lines changed: 280 additions & 3 deletions

File tree

llvm/lib/Target/AIE/ReservedRegsLICM.cpp

Lines changed: 174 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,19 @@ struct CandidateInfo {
8888
MachineInstr *HoistCandidate = nullptr;
8989
};
9090

91+
/// Describes a matched save/restore bracket for a reserved register:
92+
/// save: vreg = COPY $reserved_reg (first use in latch, before set)
93+
/// set: $reserved_reg = <imm> (loop-invariant, immediately after)
94+
/// restore: $reserved_reg = COPY vreg (last def in latch)
95+
/// The transformation hoists save+set to the preheader and sinks restore to
96+
/// the exit block, so the loop body always sees the loop-invariant value.
97+
struct SaveRestoreBracket {
98+
MachineInstr *Save = nullptr;
99+
MachineInstr *Set = nullptr;
100+
MachineInstr *Restore = nullptr;
101+
MCPhysReg Reg = MCRegister::NoRegister;
102+
};
103+
91104
/// Used to collect candidates for hoisting/sinking
92105
class Candidates {
93106
DenseMap<MCPhysReg, CandidateInfo> Candidates;
@@ -163,6 +176,19 @@ class ReservedRegsLICM : public MachineFunctionPass {
163176
/// Hoist \p Cand to \p L's preheader if it is safe to do so.
164177
bool tryHoistToPreHeader(const CandidateInfo &Cand, MachineLoop &L);
165178

179+
/// Detect and transform a save/restore bracket in \p L's latch block.
180+
/// A bracket has the form:
181+
/// vreg = COPY $reserved_reg (save)
182+
/// $reserved_reg = <imm> (set, loop-invariant)
183+
/// ...uses of $reserved_reg...
184+
/// $reserved_reg = COPY vreg (restore)
185+
/// Returns the matched bracket, or std::nullopt if not found.
186+
std::optional<SaveRestoreBracket> findSaveRestoreBracket(MachineLoop &L);
187+
188+
/// Apply the save/restore bracket transformation: hoist save+set to the
189+
/// preheader and sink restore to the exit block.
190+
bool processSaveRestoreBracket(MachineLoop &L);
191+
166192
/// Verify if \p Cand is loop invariant and can be safely hoisted.
167193
/// \pre Cand->DefinedReg has a unique live value within the loop. This is
168194
/// verified by processForExitSink() or processForPreheaderHoist().
@@ -257,8 +283,14 @@ void ReservedRegsLICM::runOnLoop(MachineLoop &L) {
257283
BitVector ReservedLiveins = collectLoopReservedLiveins(L);
258284
processForExitSink(L, ReservedLiveins);
259285
processForPreheaderHoist(L, ReservedLiveins);
286+
Changed |= processSaveRestoreBracket(L);
260287
}
261288

289+
// Forward declaration — defined after processForPreheaderHoist.
290+
static void moveInstruction(const CandidateInfo &Cand,
291+
MachineBasicBlock::iterator InsertBefore,
292+
MachineBasicBlock &InsertMBB);
293+
262294
void ReservedRegsLICM::processForExitSink(MachineLoop &L,
263295
const BitVector &ReservedLiveins) {
264296
// Pre-compute what's live at the entry of the latch block by walking it
@@ -305,6 +337,145 @@ void ReservedRegsLICM::processForExitSink(MachineLoop &L,
305337
}
306338
}
307339

340+
std::optional<SaveRestoreBracket>
341+
ReservedRegsLICM::findSaveRestoreBracket(MachineLoop &L) {
342+
MachineBasicBlock *Latch = L.getLoopLatch();
343+
assert(Latch);
344+
345+
for (MachineInstr &SaveMI : *Latch) {
346+
// Look for: vreg = COPY $reserved_reg
347+
if (!SaveMI.isCopy())
348+
continue;
349+
Register SaveDst = SaveMI.getOperand(0).getReg();
350+
Register SaveSrc = SaveMI.getOperand(1).getReg();
351+
if (!SaveDst.isVirtual() || !SaveSrc.isPhysical())
352+
continue;
353+
MCPhysReg PhysReg = SaveSrc.asMCReg();
354+
if (!TRI->isSimplifiableReservedReg(PhysReg))
355+
continue;
356+
357+
// The saved vreg must have exactly one non-debug use (the restore).
358+
if (!MRI->hasOneNonDBGUse(SaveDst))
359+
continue;
360+
361+
// Helper: does MI use PhysReg?
362+
auto UsesPhysReg = [&](const MachineInstr &MI) {
363+
return any_of(MI.operands(), [&](const MachineOperand &MO) {
364+
return MO.isReg() && MO.isUse() && MO.getReg() == PhysReg;
365+
});
366+
};
367+
368+
// No uses of PhysReg before the save. Instructions before the save
369+
// rely on the original value of PhysReg; hoisting the set would make
370+
// them see the loop-invariant value instead.
371+
auto BeforeSave =
372+
make_range(Latch->begin(), MachineBasicBlock::iterator(SaveMI));
373+
if (any_of(BeforeSave, UsesPhysReg))
374+
continue;
375+
376+
// Find the set: the next def of PhysReg after the save.
377+
// No use of PhysReg is allowed between the save and the set.
378+
MachineInstr *SetMI = nullptr;
379+
bool PhysRegUsedBeforeSet = false;
380+
for (MachineInstr *Next = SaveMI.getNextNode(); Next;
381+
Next = Next->getNextNode()) {
382+
if (getSinglePhysRegDef(*Next) == PhysReg) {
383+
SetMI = Next;
384+
break;
385+
}
386+
if (UsesPhysReg(*Next)) {
387+
PhysRegUsedBeforeSet = true;
388+
break;
389+
}
390+
}
391+
if (!SetMI || PhysRegUsedBeforeSet)
392+
continue;
393+
394+
// The set must be loop-invariant (e.g. MOVX imm).
395+
CandidateInfo SetCand(PhysReg);
396+
SetCand.HoistCandidate = SetMI;
397+
if (!isLoopInvariantInst(SetCand, L))
398+
continue;
399+
400+
// Find the restore: the last def of PhysReg in the latch.
401+
// It must be: $reserved_reg = COPY SaveDst.
402+
MachineInstr *RestoreMI = nullptr;
403+
for (MachineInstr &MI2 : reverse(*Latch)) {
404+
if (getSinglePhysRegDef(MI2) == PhysReg) {
405+
if (MI2.isCopy() && MI2.getOperand(1).getReg() == SaveDst)
406+
RestoreMI = &MI2;
407+
break;
408+
}
409+
}
410+
if (!RestoreMI)
411+
continue;
412+
413+
// No uses of PhysReg after the restore. Once the restore is sinked to
414+
// the exit block, instructions after the restore position in the loop
415+
// body would see the loop-invariant value (from the set) instead of
416+
// the restored value.
417+
auto AfterRestore = make_range(
418+
std::next(MachineBasicBlock::iterator(RestoreMI)), Latch->end());
419+
if (any_of(AfterRestore, UsesPhysReg))
420+
continue;
421+
422+
// PhysReg must not be used in any non-latch block of the loop.
423+
// If it were, those uses might see the wrong value after we hoist
424+
// the set to the preheader.
425+
if (any_of(L.getBlocks(), [&](MachineBasicBlock *MBB) {
426+
return MBB != Latch && any_of(*MBB, UsesPhysReg);
427+
}))
428+
continue;
429+
430+
return SaveRestoreBracket{&SaveMI, SetMI, RestoreMI, PhysReg};
431+
}
432+
return std::nullopt;
433+
}
434+
435+
bool ReservedRegsLICM::processSaveRestoreBracket(MachineLoop &L) {
436+
std::optional<SaveRestoreBracket> BracketOpt = findSaveRestoreBracket(L);
437+
if (!BracketOpt)
438+
return false;
439+
440+
const SaveRestoreBracket &Bracket = *BracketOpt;
441+
MachineBasicBlock *Preheader = L.getLoopPreheader();
442+
MachineBasicBlock *ExitMBB = L.getExitBlock();
443+
assert(Preheader && ExitMBB);
444+
445+
// Ensure the exit block is a dedicated exit (single predecessor).
446+
// runOnLoop already verified that the critical edge can be split if needed.
447+
if (!ExitMBB->getSinglePredecessor()) {
448+
MachineBasicBlock *ExitingBlock = L.getExitingBlock();
449+
ExitMBB = ExitingBlock->SplitCriticalEdge(ExitMBB, *this);
450+
assert(ExitMBB);
451+
LLVM_DEBUG(dbgs() << "Created dedicated exit: "
452+
<< printMBBReference(*ExitMBB) << "\n");
453+
}
454+
455+
LLVM_DEBUG(dbgs() << "Save/restore bracket for " << TRI->getName(Bracket.Reg)
456+
<< ":\n"
457+
<< " Save: " << *Bracket.Save << " Set: "
458+
<< *Bracket.Set << " Restore: " << *Bracket.Restore);
459+
460+
// Move save + set to the preheader (save first so it captures the
461+
// pre-loop value before the set overwrites it).
462+
auto InsertPt = Preheader->getFirstTerminator();
463+
CandidateInfo SaveCand(Bracket.Reg);
464+
SaveCand.HoistCandidate = Bracket.Save;
465+
moveInstruction(SaveCand, InsertPt, *Preheader);
466+
467+
CandidateInfo SetCand(Bracket.Reg);
468+
SetCand.HoistCandidate = Bracket.Set;
469+
moveInstruction(SetCand, InsertPt, *Preheader);
470+
471+
// Sink restore to the exit block.
472+
CandidateInfo RestoreCand(Bracket.Reg);
473+
RestoreCand.HoistCandidate = Bracket.Restore;
474+
moveInstruction(RestoreCand, ExitMBB->getFirstNonPHI(), *ExitMBB);
475+
476+
return true;
477+
}
478+
308479
void ReservedRegsLICM::processForPreheaderHoist(
309480
MachineLoop &L, const BitVector &ReservedLiveins) {
310481
Candidates HoistCandidates;
@@ -333,9 +504,9 @@ void ReservedRegsLICM::processForPreheaderHoist(
333504
/// When an instruction is found to only use loop invariant operands that is
334505
/// safe to hoist/sink, this function is called to actually move the MI out of
335506
/// the loop.
336-
void moveInstruction(const CandidateInfo &Cand,
337-
MachineBasicBlock::iterator InsertBefore,
338-
MachineBasicBlock &InsertMBB) {
507+
static void moveInstruction(const CandidateInfo &Cand,
508+
MachineBasicBlock::iterator InsertBefore,
509+
MachineBasicBlock &InsertMBB) {
339510
MachineInstr &MI = *Cand.HoistCandidate;
340511
LLVM_DEBUG(dbgs() << "Moving to " << printMBBReference(InsertMBB) << " from "
341512
<< printMBBReference(*MI.getParent()) << ": " << MI);

llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,112 @@ body: |
330330
PseudoJ_jump_imm %bb.1
331331
...
332332

333+
# $crsat save/restore bracket: the loop saves $crsat, sets it to 1, uses it,
334+
# then restores it. The save+set should be hoisted to the preheader and the
335+
# restore should be sinked to the exit block.
336+
---
337+
name: crsat_save_restore_bracket
338+
tracksRegLiveness: true
339+
body: |
340+
; CHECK-LABEL: name: crsat_save_restore_bracket
341+
; CHECK: bb.0:
342+
; CHECK-NEXT: successors: %bb.2(0x80000000)
343+
; CHECK-NEXT: liveins: $r0, $r1, $s0
344+
; CHECK-NEXT: {{ $}}
345+
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0
346+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $r1
347+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:es = COPY $s0
348+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $crsat
349+
; CHECK-NEXT: $crsat = MOVX_mvx_cr_imm 1
350+
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
351+
; CHECK-NEXT: {{ $}}
352+
; CHECK-NEXT: bb.1:
353+
; CHECK-NEXT: $crsat = COPY [[COPY3]]
354+
; CHECK-NEXT: PseudoRET implicit $lr
355+
; CHECK-NEXT: {{ $}}
356+
; CHECK-NEXT: bb.2:
357+
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.1(0x04000000)
358+
; CHECK-NEXT: {{ $}}
359+
; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
360+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.2
361+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
362+
bb.0:
363+
successors: %bb.2(0x80000000)
364+
liveins: $r0, $r1, $s0
365+
366+
%0:er = COPY $r0
367+
%1:acc1024 = COPY $r1
368+
%2:es = COPY $s0
369+
PseudoJ_jump_imm %bb.2
370+
371+
bb.1:
372+
PseudoRET implicit $lr
373+
374+
bb.2:
375+
successors: %bb.2(0x7c000000), %bb.1(0x04000000)
376+
%3:er = COPY $crsat
377+
$crsat = MOVX_mvx_cr_imm 1
378+
%4:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
379+
$crsat = COPY %3
380+
PseudoJNZ %0, %bb.2
381+
PseudoJ_jump_imm %bb.1
382+
...
383+
384+
# $crrnd save/restore bracket with uses before the save and after the restore.
385+
# The bracket must NOT be transformed because $crrnd is used outside the
386+
# [save, restore] window: once before the save and once after the restore.
387+
---
388+
name: crrnd_bracket_uses_outside_window
389+
tracksRegLiveness: true
390+
body: |
391+
; CHECK-LABEL: name: crrnd_bracket_uses_outside_window
392+
; CHECK: bb.0:
393+
; CHECK-NEXT: successors: %bb.2(0x80000000)
394+
; CHECK-NEXT: liveins: $r0, $r1, $s0
395+
; CHECK-NEXT: {{ $}}
396+
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0
397+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $r1
398+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:es = COPY $s0
399+
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
400+
; CHECK-NEXT: {{ $}}
401+
; CHECK-NEXT: bb.1:
402+
; CHECK-NEXT: PseudoRET implicit $lr
403+
; CHECK-NEXT: {{ $}}
404+
; CHECK-NEXT: bb.2:
405+
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.1(0x04000000)
406+
; CHECK-NEXT: {{ $}}
407+
; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
408+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $crrnd
409+
; CHECK-NEXT: $crrnd = MOVX_mvx_cr_imm 0
410+
; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_1:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
411+
; CHECK-NEXT: $crrnd = COPY [[COPY3]]
412+
; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_2:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
413+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.2
414+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
415+
bb.0:
416+
successors: %bb.2(0x80000000)
417+
liveins: $r0, $r1, $s0
418+
419+
%0:er = COPY $r0
420+
%1:acc1024 = COPY $r1
421+
%2:es = COPY $s0
422+
PseudoJ_jump_imm %bb.2
423+
424+
bb.1:
425+
PseudoRET implicit $lr
426+
427+
bb.2:
428+
successors: %bb.2(0x7c000000), %bb.1(0x04000000)
429+
%5:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
430+
%3:er = COPY $crrnd
431+
$crrnd = MOVX_mvx_cr_imm 0
432+
%4:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
433+
$crrnd = COPY %3
434+
%6:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1
435+
PseudoJNZ %0, %bb.2
436+
PseudoJ_jump_imm %bb.1
437+
...
438+
333439
# $srssign0 is defined in one conditional branch (bb.2) but not the other
334440
# (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is
335441
# live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0

0 commit comments

Comments
 (0)