From 527cc5a9efb408a9605cfe33705eb0bf1e26321e Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Fri, 24 Apr 2026 01:25:29 -0600 Subject: [PATCH 1/5] [AIE] ReservedRegsLICM: hoist/sink control registers outside outerloop. --- llvm/lib/Target/AIE/ReservedRegsLICM.cpp | 35 ++++++++++--------- .../GlobalISel/post-select-licm-reserved.mir | 6 ++-- .../CodeGen/AIE/aie2p/reserved-reg-licm.mir | 4 +-- .../CodeGen/AIE/aie2ps/reserved-reg-licm.mir | 4 +-- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp index ae8b9bdbff9f..39b0279ba4d4 100644 --- a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp +++ b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -208,19 +208,20 @@ bool ReservedRegsLICM::runOnMachineFunction(MachineFunction &MF) { } BitVector ReservedRegsLICM::collectLoopReservedLiveins(const MachineLoop &L) { - const MachineBasicBlock &Header = *L.getHeader(); LivePhysRegs LiveRegs(*TRI); - // Conservativaly assume reserved reserved regs are all liveouts + // Conservatively assume reserved regs are all liveouts for (MCPhysReg PhysReg : MRI->getReservedRegs().set_bits()) { if (MRI->canSimplifyPhysReg(PhysReg)) { LiveRegs.addReg(PhysReg); } } - // Traverse instructions to remove defs - for (const MachineInstr &MI : reverse(Header)) - LiveRegs.stepBackward(MI); + // Traverse all blocks in the loop to remove defs. stepBackward handles + // regmask operands (calls) correctly. + for (MachineBasicBlock *MBB : L.getBlocks()) + for (const MachineInstr &MI : reverse(*MBB)) + LiveRegs.stepBackward(MI); BitVector ReservedLiveins(TRI->getNumRegs()); for (MCRegister Reg : LiveRegs) { @@ -241,9 +242,8 @@ void ReservedRegsLICM::runOnLoop(MachineLoop &L) { return; } - // TODO: Handle simple multi-BB loops. - if (L.getNumBlocks() != 1) { - LLVM_DEBUG(dbgs() << " Loop has multiple blocks.\n"); + if (!L.getLoopLatch()) { + LLVM_DEBUG(dbgs() << " Loop has no single latch.\n"); return; } const MachineBasicBlock *LoopBlock = L.getExitingBlock(); @@ -265,9 +265,9 @@ void ReservedRegsLICM::processForExitSink(MachineLoop &L, LivePhysRegs LiveRegs(*TRI); Candidates SinkCandidates; - // Walk the entire region, track defs for each register, and + // Walk the latch block, track defs for each register, and // collect potential LICM candidates. - assert(L.getNumBlocks() == 1 && L.getLoopLatch()); + assert(L.getLoopLatch()); for (MachineInstr &MI : reverse(*L.getLoopLatch())) { CandidateInfo *CandInfo = SinkCandidates.getInfo(MI); @@ -295,12 +295,15 @@ void ReservedRegsLICM::processForPreheaderHoist( MachineLoop &L, const BitVector &ReservedLiveins) { Candidates HoistCandidates; - // Walk the entire loop to find unique defs and LICM candidates - assert(L.getNumBlocks() == 1 && L.getHeader()); + // Walk all blocks in the loop to find unique defs and LICM candidates. + // RegDefMap::addChangedRegs handles regmask (calls clear UniqueDefs). + assert(L.getHeader()); RegDefMap PhysRegChanged(*TRI); - for (MachineInstr &MI : *L.getHeader()) { - PhysRegChanged.addChangedRegs(MI); - HoistCandidates.getInfo(MI); + for (MachineBasicBlock *MBB : L.getBlocks()) { + for (MachineInstr &MI : *MBB) { + PhysRegChanged.addChangedRegs(MI); + HoistCandidates.getInfo(MI); + } } for (auto &[Reg, CandInfo] : HoistCandidates) { diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/post-select-licm-reserved.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/post-select-licm-reserved.mir index cbd1f54314c4..658838d6f964 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/post-select-licm-reserved.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/post-select-licm-reserved.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2 -run-pass=reserved-reg-licm %s -verify-machineinstrs -o - \ # RUN: | FileCheck %s @@ -130,12 +130,12 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: $crpacksign = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $crpacksign = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -148,11 +148,11 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $crpacksign = MOV_scalar_imm10_pseudo 0 ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1 ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $crpacksign = MOV_scalar_imm10_pseudo 0 ; CHECK-NEXT: PseudoRET implicit $lr bb.0: liveins: $p0, $r0, $r1, $x0 diff --git a/llvm/test/CodeGen/AIE/aie2p/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2p/reserved-reg-licm.mir index 3209d9cf89ff..90fd09cca425 100644 --- a/llvm/test/CodeGen/AIE/aie2p/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2p/reserved-reg-licm.mir @@ -130,12 +130,12 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mxs = COPY $x0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -148,11 +148,11 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1 ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 ; CHECK-NEXT: PseudoRET implicit $lr bb.0: liveins: $p0, $r0, $r1, $x0 diff --git a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir index 789f6b70878e..0a6f02291bc2 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir @@ -134,12 +134,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: $srssign0 = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $srssign0 = COPY [[COPY2]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -152,11 +152,11 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0 ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1 ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0 ; CHECK-NEXT: PseudoRET implicit $lr bb.0: liveins: $p0, $r0, $r1, $dm0, $s0 From a31406198486372cbfce4da41c91332fc40f9084 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Tue, 28 Apr 2026 08:29:54 -0600 Subject: [PATCH 2/5] [AIE] ReservedRegsLICM: don't sink if reg is live at latch entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a multi-block loop with conditional branches, a candidate register may be defined on some paths to the latch but not others. On the paths that skip the definition the register is live at the latch entry, carrying a value from a previous iteration. Sinking the last def from the latch to the exit block would expose that stale value to the instructions that use the register before the def in the latch. Fix this by pre-computing LatchEntryLive — the set of registers live at the entry of the latch block — and rejecting any sink candidate whose register appears in that set. Add a test (srs_dyn_sign_conditional_no_sink) where $srssign0 is defined in one conditional branch but not the other, and the latch uses it before redefining it. The pass must leave both the COPY and the MOVX_mvx_cr_imm in place. --- llvm/lib/Target/AIE/ReservedRegsLICM.cpp | 22 ++++- .../CodeGen/AIE/aie2ps/reserved-reg-licm.mir | 81 +++++++++++++++++++ 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp index 39b0279ba4d4..16b9156c9b47 100644 --- a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp +++ b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp @@ -261,22 +261,36 @@ void ReservedRegsLICM::runOnLoop(MachineLoop &L) { void ReservedRegsLICM::processForExitSink(MachineLoop &L, const BitVector &ReservedLiveins) { + // Pre-compute what's live at the entry of the latch block by walking it + // fully backward. For multi-block loops with conditional paths, a candidate + // register may be defined in some branches but not others, making it live at + // the latch entry on those paths. If the register is live at the latch entry + // (i.e., used before its first def in the latch), sinking its last def to + // the exit block would expose the wrong value on those paths. + assert(L.getLoopLatch()); + LivePhysRegs LatchEntryLive(*TRI); + for (const MachineInstr &MI : reverse(*L.getLoopLatch())) + LatchEntryLive.stepBackward(MI); + RegDefMap PhysRegChanged(*TRI); LivePhysRegs LiveRegs(*TRI); Candidates SinkCandidates; // Walk the latch block, track defs for each register, and // collect potential LICM candidates. - assert(L.getLoopLatch()); for (MachineInstr &MI : reverse(*L.getLoopLatch())) { CandidateInfo *CandInfo = SinkCandidates.getInfo(MI); // First time we meet a reserved reg definition while iterating upwards. - // If that def is not a loop livein and it isn't used in this block either, - // then one can move the instruction to the exit BB of the loop. + // If that def is not a loop livein, it isn't used in this block after the + // def, and it isn't live at the latch entry (which would indicate it is + // used before its def in the latch, possibly carrying a value from a + // conditional path that skips the def), then one can move the instruction + // to the exit BB of the loop. if (CandInfo && !PhysRegChanged.hasChanged(CandInfo->DefinedReg) && !ReservedLiveins.test(CandInfo->DefinedReg) && - !LiveRegs.contains(CandInfo->DefinedReg)) { + !LiveRegs.contains(CandInfo->DefinedReg) && + !LatchEntryLive.contains(CandInfo->DefinedReg)) { assert(!CandInfo->HoistCandidate); CandInfo->HoistCandidate = &MI; } diff --git a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir index 0a6f02291bc2..5bc87c01a241 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir @@ -329,3 +329,84 @@ body: | PseudoJNZ %0, %bb.2 PseudoJ_jump_imm %bb.1 ... + +# $srssign0 is defined in one conditional branch (bb.2) but not the other +# (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is +# live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0 +# from the latch to the exit would expose the wrong value on that path. +# Neither the COPY nor the MOVX should be moved. +--- +name: srs_dyn_sign_conditional_no_sink +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: srs_dyn_sign_conditional_no_sink + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0, $r1, $dm0, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $srssign0 = COPY [[COPY2]] + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.5(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VSRS_4x_mv_x_srs_dm_srsSign0_:%[0-9]+]]:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 [[COPY3]], [[COPY4]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + ; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0 + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: PseudoRET implicit $lr + bb.0: + successors: %bb.1(0x80000000) + liveins: $p0, $r0, $r1, $dm0, $s0 + %0:er = COPY $r0 + %1:ep = COPY $p0 + %2:er = COPY $r1 + %3:edm = COPY $dm0 + %4:es = COPY $s0 + PseudoJ_jump_imm %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + PseudoJNZ %0, %bb.3 + + bb.2: + successors: %bb.4(0x80000000) + $srssign0 = COPY %2 + PseudoJ_jump_imm %bb.4 + + bb.3: + successors: %bb.4(0x80000000) + PseudoJ_jump_imm %bb.4 + + bb.4: + successors: %bb.1(0x7c000000), %bb.5(0x04000000) + %5:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 %3, %4, implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + $srssign0 = MOVX_mvx_cr_imm 0 + PseudoJNZ %0, %bb.1 + PseudoJ_jump_imm %bb.5 + + bb.5: + PseudoRET implicit $lr +... From a6e4dbdad14293241ac6991cb6d1463e5be496c2 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Tue, 28 Apr 2026 08:59:09 -0600 Subject: [PATCH 3/5] [AIE] ReservedRegsLICM: hoist/sink save/restore brackets Some loops protect a reserved register with a save/restore bracket: vreg = COPY $reserved_reg ; save pre-loop value $reserved_reg = MOVX imm ; set loop-invariant value ... uses of $reserved_reg ... $reserved_reg = COPY vreg ; restore pre-loop value The existing hoisting and sinking passes cannot handle this pattern: the register is a loop livein (the save reads it before the set defines it), and it has two defs (set + restore), so both getUniqueDef and the livein guard block any transformation. Add findSaveRestoreBracket / processSaveRestoreBracket to detect and transform the pattern as a unit. The detection checks that: - the save is a vreg = COPY $reserved_reg with exactly one use (the restore), - no use of the register occurs between the save and the set, - the set is loop-invariant, - the restore is the last def of the register in the latch and copies exactly the vreg from the save, - the register is not used in any non-latch block of the loop (which would observe the wrong value after the set is hoisted). When the pattern matches, the save and set are moved to the preheader (in order, so the save captures the pre-loop value before the set overwrites it) and the restore is sinked to the exit block. The loop body then always sees the loop-invariant value with no per-iteration save/restore overhead. Add a test (crsat_save_restore_bracket) that verifies the transformation for $crsat. --- llvm/lib/Target/AIE/ReservedRegsLICM.cpp | 177 +++++++++++++++++- .../CodeGen/AIE/aie2ps/reserved-reg-licm.mir | 106 +++++++++++ 2 files changed, 280 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp index 16b9156c9b47..5726222772f6 100644 --- a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp +++ b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp @@ -88,6 +88,19 @@ struct CandidateInfo { MachineInstr *HoistCandidate = nullptr; }; +/// Describes a matched save/restore bracket for a reserved register: +/// save: vreg = COPY $reserved_reg (first use in latch, before set) +/// set: $reserved_reg = (loop-invariant, immediately after) +/// restore: $reserved_reg = COPY vreg (last def in latch) +/// The transformation hoists save+set to the preheader and sinks restore to +/// the exit block, so the loop body always sees the loop-invariant value. +struct SaveRestoreBracket { + MachineInstr *Save = nullptr; + MachineInstr *Set = nullptr; + MachineInstr *Restore = nullptr; + MCPhysReg Reg = MCRegister::NoRegister; +}; + /// Used to collect candidates for hoisting/sinking class Candidates { DenseMap Candidates; @@ -163,6 +176,19 @@ class ReservedRegsLICM : public MachineFunctionPass { /// Hoist \p Cand to \p L's preheader if it is safe to do so. bool tryHoistToPreHeader(const CandidateInfo &Cand, MachineLoop &L); + /// Detect and transform a save/restore bracket in \p L's latch block. + /// A bracket has the form: + /// vreg = COPY $reserved_reg (save) + /// $reserved_reg = (set, loop-invariant) + /// ...uses of $reserved_reg... + /// $reserved_reg = COPY vreg (restore) + /// Returns the matched bracket, or std::nullopt if not found. + std::optional findSaveRestoreBracket(MachineLoop &L); + + /// Apply the save/restore bracket transformation: hoist save+set to the + /// preheader and sink restore to the exit block. + bool processSaveRestoreBracket(MachineLoop &L); + /// Verify if \p Cand is loop invariant and can be safely hoisted. /// \pre Cand->DefinedReg has a unique live value within the loop. This is /// verified by processForExitSink() or processForPreheaderHoist(). @@ -257,8 +283,14 @@ void ReservedRegsLICM::runOnLoop(MachineLoop &L) { BitVector ReservedLiveins = collectLoopReservedLiveins(L); processForExitSink(L, ReservedLiveins); processForPreheaderHoist(L, ReservedLiveins); + Changed |= processSaveRestoreBracket(L); } +// Forward declaration — defined after processForPreheaderHoist. +static void moveInstruction(const CandidateInfo &Cand, + MachineBasicBlock::iterator InsertBefore, + MachineBasicBlock &InsertMBB); + void ReservedRegsLICM::processForExitSink(MachineLoop &L, const BitVector &ReservedLiveins) { // Pre-compute what's live at the entry of the latch block by walking it @@ -305,6 +337,145 @@ void ReservedRegsLICM::processForExitSink(MachineLoop &L, } } +std::optional +ReservedRegsLICM::findSaveRestoreBracket(MachineLoop &L) { + MachineBasicBlock *Latch = L.getLoopLatch(); + assert(Latch); + + for (MachineInstr &SaveMI : *Latch) { + // Look for: vreg = COPY $reserved_reg + if (!SaveMI.isCopy()) + continue; + Register SaveDst = SaveMI.getOperand(0).getReg(); + Register SaveSrc = SaveMI.getOperand(1).getReg(); + if (!SaveDst.isVirtual() || !SaveSrc.isPhysical()) + continue; + MCPhysReg PhysReg = SaveSrc.asMCReg(); + if (!TRI->isSimplifiableReservedReg(PhysReg)) + continue; + + // The saved vreg must have exactly one non-debug use (the restore). + if (!MRI->hasOneNonDBGUse(SaveDst)) + continue; + + // Helper: does MI use PhysReg? + auto UsesPhysReg = [&](const MachineInstr &MI) { + return any_of(MI.operands(), [&](const MachineOperand &MO) { + return MO.isReg() && MO.isUse() && MO.getReg() == PhysReg; + }); + }; + + // No uses of PhysReg before the save. Instructions before the save + // rely on the original value of PhysReg; hoisting the set would make + // them see the loop-invariant value instead. + auto BeforeSave = + make_range(Latch->begin(), MachineBasicBlock::iterator(SaveMI)); + if (any_of(BeforeSave, UsesPhysReg)) + continue; + + // Find the set: the next def of PhysReg after the save. + // No use of PhysReg is allowed between the save and the set. + MachineInstr *SetMI = nullptr; + bool PhysRegUsedBeforeSet = false; + for (MachineInstr *Next = SaveMI.getNextNode(); Next; + Next = Next->getNextNode()) { + if (getSinglePhysRegDef(*Next) == PhysReg) { + SetMI = Next; + break; + } + if (UsesPhysReg(*Next)) { + PhysRegUsedBeforeSet = true; + break; + } + } + if (!SetMI || PhysRegUsedBeforeSet) + continue; + + // The set must be loop-invariant (e.g. MOVX imm). + CandidateInfo SetCand(PhysReg); + SetCand.HoistCandidate = SetMI; + if (!isLoopInvariantInst(SetCand, L)) + continue; + + // Find the restore: the last def of PhysReg in the latch. + // It must be: $reserved_reg = COPY SaveDst. + MachineInstr *RestoreMI = nullptr; + for (MachineInstr &MI2 : reverse(*Latch)) { + if (getSinglePhysRegDef(MI2) == PhysReg) { + if (MI2.isCopy() && MI2.getOperand(1).getReg() == SaveDst) + RestoreMI = &MI2; + break; + } + } + if (!RestoreMI) + continue; + + // No uses of PhysReg after the restore. Once the restore is sinked to + // the exit block, instructions after the restore position in the loop + // body would see the loop-invariant value (from the set) instead of + // the restored value. + auto AfterRestore = make_range( + std::next(MachineBasicBlock::iterator(RestoreMI)), Latch->end()); + if (any_of(AfterRestore, UsesPhysReg)) + continue; + + // PhysReg must not be used in any non-latch block of the loop. + // If it were, those uses might see the wrong value after we hoist + // the set to the preheader. + if (any_of(L.getBlocks(), [&](MachineBasicBlock *MBB) { + return MBB != Latch && any_of(*MBB, UsesPhysReg); + })) + continue; + + return SaveRestoreBracket{&SaveMI, SetMI, RestoreMI, PhysReg}; + } + return std::nullopt; +} + +bool ReservedRegsLICM::processSaveRestoreBracket(MachineLoop &L) { + std::optional BracketOpt = findSaveRestoreBracket(L); + if (!BracketOpt) + return false; + + const SaveRestoreBracket &Bracket = *BracketOpt; + MachineBasicBlock *Preheader = L.getLoopPreheader(); + MachineBasicBlock *ExitMBB = L.getExitBlock(); + assert(Preheader && ExitMBB); + + // Ensure the exit block is a dedicated exit (single predecessor). + // runOnLoop already verified that the critical edge can be split if needed. + if (!ExitMBB->getSinglePredecessor()) { + MachineBasicBlock *ExitingBlock = L.getExitingBlock(); + ExitMBB = ExitingBlock->SplitCriticalEdge(ExitMBB, *this); + assert(ExitMBB); + LLVM_DEBUG(dbgs() << "Created dedicated exit: " + << printMBBReference(*ExitMBB) << "\n"); + } + + LLVM_DEBUG(dbgs() << "Save/restore bracket for " << TRI->getName(Bracket.Reg) + << ":\n" + << " Save: " << *Bracket.Save << " Set: " + << *Bracket.Set << " Restore: " << *Bracket.Restore); + + // Move save + set to the preheader (save first so it captures the + // pre-loop value before the set overwrites it). + auto InsertPt = Preheader->getFirstTerminator(); + CandidateInfo SaveCand(Bracket.Reg); + SaveCand.HoistCandidate = Bracket.Save; + moveInstruction(SaveCand, InsertPt, *Preheader); + + CandidateInfo SetCand(Bracket.Reg); + SetCand.HoistCandidate = Bracket.Set; + moveInstruction(SetCand, InsertPt, *Preheader); + + // Sink restore to the exit block. + CandidateInfo RestoreCand(Bracket.Reg); + RestoreCand.HoistCandidate = Bracket.Restore; + moveInstruction(RestoreCand, ExitMBB->getFirstNonPHI(), *ExitMBB); + + return true; +} + void ReservedRegsLICM::processForPreheaderHoist( MachineLoop &L, const BitVector &ReservedLiveins) { Candidates HoistCandidates; @@ -333,9 +504,9 @@ void ReservedRegsLICM::processForPreheaderHoist( /// When an instruction is found to only use loop invariant operands that is /// safe to hoist/sink, this function is called to actually move the MI out of /// the loop. -void moveInstruction(const CandidateInfo &Cand, - MachineBasicBlock::iterator InsertBefore, - MachineBasicBlock &InsertMBB) { +static void moveInstruction(const CandidateInfo &Cand, + MachineBasicBlock::iterator InsertBefore, + MachineBasicBlock &InsertMBB) { MachineInstr &MI = *Cand.HoistCandidate; LLVM_DEBUG(dbgs() << "Moving to " << printMBBReference(InsertMBB) << " from " << printMBBReference(*MI.getParent()) << ": " << MI); diff --git a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir index 5bc87c01a241..23b1b8c98d37 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir @@ -330,6 +330,112 @@ body: | PseudoJ_jump_imm %bb.1 ... +# $crsat save/restore bracket: the loop saves $crsat, sets it to 1, uses it, +# then restores it. The save+set should be hoisted to the preheader and the +# restore should be sinked to the exit block. +--- +name: crsat_save_restore_bracket +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: crsat_save_restore_bracket + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $r0, $r1, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $crsat + ; CHECK-NEXT: $crsat = MOVX_mvx_cr_imm 1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $crsat = COPY [[COPY3]] + ; CHECK-NEXT: PseudoRET implicit $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.1(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.2 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + bb.0: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $s0 + + %0:er = COPY $r0 + %1:acc1024 = COPY $r1 + %2:es = COPY $s0 + PseudoJ_jump_imm %bb.2 + + bb.1: + PseudoRET implicit $lr + + bb.2: + successors: %bb.2(0x7c000000), %bb.1(0x04000000) + %3:er = COPY $crsat + $crsat = MOVX_mvx_cr_imm 1 + %4:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + $crsat = COPY %3 + PseudoJNZ %0, %bb.2 + PseudoJ_jump_imm %bb.1 +... + +# $crrnd save/restore bracket with uses before the save and after the restore. +# The bracket must NOT be transformed because $crrnd is used outside the +# [save, restore] window: once before the save and once after the restore. +--- +name: crrnd_bracket_uses_outside_window +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: crrnd_bracket_uses_outside_window + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $r0, $r1, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $r1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: PseudoRET implicit $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.1(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $crrnd + ; CHECK-NEXT: $crrnd = MOVX_mvx_cr_imm 0 + ; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_1:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + ; CHECK-NEXT: $crrnd = COPY [[COPY3]] + ; CHECK-NEXT: [[VSRS_4x_mv_w_srs_cm_srsSign1_2:%[0-9]+]]:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 [[COPY1]], [[COPY2]], implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.2 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + bb.0: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $s0 + + %0:er = COPY $r0 + %1:acc1024 = COPY $r1 + %2:es = COPY $s0 + PseudoJ_jump_imm %bb.2 + + bb.1: + PseudoRET implicit $lr + + bb.2: + successors: %bb.2(0x7c000000), %bb.1(0x04000000) + %5:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + %3:er = COPY $crrnd + $crrnd = MOVX_mvx_cr_imm 0 + %4:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + $crrnd = COPY %3 + %6:vec256 = VSRS_4x_mv_w_srs_cm_srsSign1 %1, %2, implicit-def dead $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign1 + PseudoJNZ %0, %bb.2 + PseudoJ_jump_imm %bb.1 +... + # $srssign0 is defined in one conditional branch (bb.2) but not the other # (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is # live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0 From b939bc393ff31704e3e3dcb341a582c95e408058 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Mon, 4 May 2026 14:39:01 -0600 Subject: [PATCH 4/5] [AIE] ReservedRegsLICM: fix livein detection for multi-block loops The flat sequential backward walk in collectLoopReservedLiveins was incorrect for loops with conditional paths. When L.getBlocks() returns blocks in RPO order (header first, latch last), a def in one branch could remove a register from the live set after a sibling branch's use had already added it back. This caused the register to be incorrectly classified as not a loop livein, allowing processForExitSink to sink its definition to the exit block even though the register carried a pre-loop value on some paths. For example, in a loop where bb.2 uses $srssign0 and bb.3 defines it (both successors of the header), the old code would process bb.2's use first (adding $srssign0 to the live set) and then bb.3's def (removing it), concluding $srssign0 is not a livein. But $srssign0 IS a livein because bb.2 can execute on the first iteration without going through bb.3. Fix this by replacing the flat walk with a backward dataflow worklist algorithm that correctly propagates liveness through the CFG until a fixed point is reached. The algorithm initialises live-in sets to empty and grows them monotonically, re-queuing predecessors whenever a block's live-in set changes. Add a test case (livein_bug_no_sink) to the aie2ps reserved-reg-licm test that reproduces the bug and verifies the MOVX is not sunk. --- llvm/lib/Target/AIE/ReservedRegsLICM.cpp | 70 +++++++++++++--- .../CodeGen/AIE/aie2ps/reserved-reg-licm.mir | 82 +++++++++++++++++++ 2 files changed, 141 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp index 5726222772f6..e7bd1abc0790 100644 --- a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp +++ b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp @@ -17,6 +17,7 @@ #include "AIE.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -234,28 +235,75 @@ bool ReservedRegsLICM::runOnMachineFunction(MachineFunction &MF) { } BitVector ReservedRegsLICM::collectLoopReservedLiveins(const MachineLoop &L) { - LivePhysRegs LiveRegs(*TRI); - - // Conservatively assume reserved regs are all liveouts + // Compute the live-in set at the loop header using a backward dataflow + // worklist algorithm. A flat sequential backward walk over all loop blocks + // is incorrect for multi-block loops with conditional paths: a def in one + // branch can incorrectly kill a register that is used (without a preceding + // def) in a sibling branch, making the register appear to not be a livein. + // + // The worklist algorithm propagates liveness backward through the CFG until + // a fixed point is reached, correctly handling all control-flow shapes. + + // Conservative liveout assumption: all candidate reserved regs are live at + // the exit of the loop (they may be used by code after the loop). + BitVector InitLive(TRI->getNumRegs()); for (MCPhysReg PhysReg : MRI->getReservedRegs().set_bits()) { if (MRI->canSimplifyPhysReg(PhysReg)) { - LiveRegs.addReg(PhysReg); + InitLive.set(PhysReg); } } - // Traverse all blocks in the loop to remove defs. stepBackward handles - // regmask operands (calls) correctly. + // live_in[MBB] = set of reserved registers live at the entry of MBB. + // Initialised to empty; the worklist grows these sets monotonically. + DenseMap LiveIn; for (MachineBasicBlock *MBB : L.getBlocks()) + LiveIn[MBB] = BitVector(TRI->getNumRegs()); + + // Seed the worklist with every block in the loop. + SmallPtrSet InWorklist; + SmallVector Worklist; + for (MachineBasicBlock *MBB : L.getBlocks()) { + Worklist.push_back(MBB); + InWorklist.insert(MBB); + } + + while (!Worklist.empty()) { + MachineBasicBlock *MBB = Worklist.pop_back_val(); + InWorklist.erase(MBB); + + // live-out(MBB) = union of live-in of successors. + // For successors outside the loop use InitLive conservatively. + LivePhysRegs LiveRegs(*TRI); + for (MachineBasicBlock *Succ : MBB->successors()) { + const BitVector &SuccLive = L.contains(Succ) ? LiveIn[Succ] : InitLive; + for (unsigned Reg : SuccLive.set_bits()) + LiveRegs.addReg(Reg); + } + + // Walk backward through MBB to compute live-in. stepBackward handles + // regmask operands (calls) correctly. for (const MachineInstr &MI : reverse(*MBB)) LiveRegs.stepBackward(MI); - BitVector ReservedLiveins(TRI->getNumRegs()); - for (MCRegister Reg : LiveRegs) { - if (MRI->isReserved(Reg)) { - ReservedLiveins.set(Reg); + // Convert to a BitVector, keeping only reserved registers. + BitVector NewLiveIn(TRI->getNumRegs()); + for (MCRegister Reg : LiveRegs) + if (MRI->isReserved(Reg)) + NewLiveIn.set(Reg); + + // If the live-in set grew, re-process all in-loop predecessors. + if (NewLiveIn != LiveIn[MBB]) { + LiveIn[MBB] = NewLiveIn; + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (L.contains(Pred) && !InWorklist.count(Pred)) { + Worklist.push_back(Pred); + InWorklist.insert(Pred); + } + } } } - return ReservedLiveins; + + return LiveIn[L.getHeader()]; } /// Walk the specified region of the CFG and hoist loop invariants out to the diff --git a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir index 23b1b8c98d37..fa2dddd0d559 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir @@ -436,6 +436,88 @@ body: | PseudoJ_jump_imm %bb.1 ... +# $srssign0 is used in one conditional branch (bb.2) but defined in the other +# (bb.3). The latch (bb.4) defines $srssign0 = MOVX 0 without using it first, +# so LatchEntryLive does NOT flag it. However, $srssign0 IS a true loop livein +# because bb.2 can execute on the first iteration without going through bb.3. +# collectLoopReservedLiveins must detect this via backward dataflow and prevent +# the MOVX from being sunk to the exit block. +--- +name: livein_bug_no_sink +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: livein_bug_no_sink + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0, $r1, $dm0, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VSRS_4x_mv_x_srs_dm_srsSign0_:%[0-9]+]]:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 [[COPY3]], [[COPY4]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $srssign0 = COPY [[COPY2]] + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.5(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0 + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: PseudoRET implicit $lr + bb.0: + successors: %bb.1(0x80000000) + liveins: $p0, $r0, $r1, $dm0, $s0 + %0:er = COPY $r0 + %1:ep = COPY $p0 + %2:er = COPY $r1 + %3:edm = COPY $dm0 + %4:es = COPY $s0 + PseudoJ_jump_imm %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + PseudoJNZ %0, %bb.3 + + bb.2: + successors: %bb.4(0x80000000) + %5:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 %3, %4, implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + PseudoJ_jump_imm %bb.4 + + bb.3: + successors: %bb.4(0x80000000) + $srssign0 = COPY %2 + PseudoJ_jump_imm %bb.4 + + bb.4: + successors: %bb.1(0x7c000000), %bb.5(0x04000000) + $srssign0 = MOVX_mvx_cr_imm 0 + PseudoJNZ %0, %bb.1 + PseudoJ_jump_imm %bb.5 + + bb.5: + PseudoRET implicit $lr +... + # $srssign0 is defined in one conditional branch (bb.2) but not the other # (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is # live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0 From 40e5a8ce914f20f8a93d067dfc886bc99d8be8cc Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Mon, 4 May 2026 14:45:10 -0600 Subject: [PATCH 5/5] [AIE] ReservedRegsLICM: fix exit-sink to scan the exiting block, not the latch processForExitSink was scanning L.getLoopLatch() for sink candidates. When the latch and the exiting block differ, instructions in the latch are not on the exit path: sinking them to the exit block inserts a def that was never executed on that path, corrupting the register's value at the exit. Fix this by scanning L.getExitingBlock() instead. The guard that checks whether a register is live at the entry of the scanned block is also updated to use the exiting block (renamed LatchEntryLive -> ExitingBlockEntryLive). Add a test case (latch_vs_exiting_no_sink) that reproduces the bug: a loop where the latch (bb.3) defines $srssign0 = MOVX 0 but the exiting block (bb.2) exits directly to bb.4 without going through bb.3. The MOVX must remain in bb.3. --- llvm/lib/Target/AIE/ReservedRegsLICM.cpp | 38 ++++++---- .../CodeGen/AIE/aie2ps/reserved-reg-licm.mir | 71 +++++++++++++++++++ 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp index e7bd1abc0790..3a0d839c4ec7 100644 --- a/llvm/lib/Target/AIE/ReservedRegsLICM.cpp +++ b/llvm/lib/Target/AIE/ReservedRegsLICM.cpp @@ -341,36 +341,44 @@ static void moveInstruction(const CandidateInfo &Cand, void ReservedRegsLICM::processForExitSink(MachineLoop &L, const BitVector &ReservedLiveins) { - // Pre-compute what's live at the entry of the latch block by walking it + // Sink candidates must come from the exiting block (the block that has the + // edge to the exit block), not the latch. When the latch and the exiting + // block differ, instructions in the latch are not on the exit path: sinking + // them to the exit block would insert a def that was never executed on that + // path, corrupting the register's exit value. + MachineBasicBlock *ExitingBlock = L.getExitingBlock(); + assert(ExitingBlock); + + // Pre-compute what's live at the entry of the exiting block by walking it // fully backward. For multi-block loops with conditional paths, a candidate // register may be defined in some branches but not others, making it live at - // the latch entry on those paths. If the register is live at the latch entry - // (i.e., used before its first def in the latch), sinking its last def to - // the exit block would expose the wrong value on those paths. - assert(L.getLoopLatch()); - LivePhysRegs LatchEntryLive(*TRI); - for (const MachineInstr &MI : reverse(*L.getLoopLatch())) - LatchEntryLive.stepBackward(MI); + // the exiting block entry on those paths. If the register is live at the + // exiting block entry (i.e., used before its first def in the exiting block, + // possibly carrying a value from a conditional path that skips the def), + // sinking its last def to the exit block would expose the wrong value. + LivePhysRegs ExitingBlockEntryLive(*TRI); + for (const MachineInstr &MI : reverse(*ExitingBlock)) + ExitingBlockEntryLive.stepBackward(MI); RegDefMap PhysRegChanged(*TRI); LivePhysRegs LiveRegs(*TRI); Candidates SinkCandidates; - // Walk the latch block, track defs for each register, and + // Walk the exiting block, track defs for each register, and // collect potential LICM candidates. - for (MachineInstr &MI : reverse(*L.getLoopLatch())) { + for (MachineInstr &MI : reverse(*ExitingBlock)) { CandidateInfo *CandInfo = SinkCandidates.getInfo(MI); // First time we meet a reserved reg definition while iterating upwards. // If that def is not a loop livein, it isn't used in this block after the - // def, and it isn't live at the latch entry (which would indicate it is - // used before its def in the latch, possibly carrying a value from a - // conditional path that skips the def), then one can move the instruction - // to the exit BB of the loop. + // def, and it isn't live at the exiting block entry (which would indicate + // it is used before its def in the exiting block, possibly carrying a + // value from a conditional path that skips the def), then one can move + // the instruction to the exit BB of the loop. if (CandInfo && !PhysRegChanged.hasChanged(CandInfo->DefinedReg) && !ReservedLiveins.test(CandInfo->DefinedReg) && !LiveRegs.contains(CandInfo->DefinedReg) && - !LatchEntryLive.contains(CandInfo->DefinedReg)) { + !ExitingBlockEntryLive.contains(CandInfo->DefinedReg)) { assert(!CandInfo->HoistCandidate); CandInfo->HoistCandidate = &MI; } diff --git a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir index fa2dddd0d559..180e89027600 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir @@ -436,6 +436,77 @@ body: | PseudoJ_jump_imm %bb.1 ... +# The latch (bb.3) defines $srssign0 = MOVX 0 but is NOT the exiting block. +# The exiting block (bb.2) exits to bb.4 without going through bb.3, so the +# MOVX is not on the exit path. processForExitSink must scan the exiting block +# (bb.2), not the latch (bb.3), to avoid sinking the MOVX to bb.4. +--- +name: latch_vs_exiting_no_sink +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: latch_vs_exiting_no_sink + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0, $r1, $dm0, $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $srssign0 = COPY [[COPY2]] + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VSRS_4x_mv_x_srs_dm_srsSign0_:%[0-9]+]]:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 [[COPY3]], [[COPY4]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + ; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.3 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: PseudoRET implicit $lr + bb.0: + successors: %bb.1(0x80000000) + liveins: $p0, $r0, $r1, $dm0, $s0 + %0:er = COPY $r0 + %1:ep = COPY $p0 + %2:er = COPY $r1 + %3:edm = COPY $dm0 + %4:es = COPY $s0 + PseudoJ_jump_imm %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + $srssign0 = COPY %2 + PseudoJ_jump_imm %bb.2 + + bb.2: + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + %5:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 %3, %4, implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 + PseudoJNZ %0, %bb.3 + PseudoJ_jump_imm %bb.4 + + bb.3: + successors: %bb.1(0x80000000) + $srssign0 = MOVX_mvx_cr_imm 0 + PseudoJ_jump_imm %bb.1 + + bb.4: + PseudoRET implicit $lr +... + # $srssign0 is used in one conditional branch (bb.2) but defined in the other # (bb.3). The latch (bb.4) defines $srssign0 = MOVX 0 without using it first, # so LatchEntryLive does NOT flag it. However, $srssign0 IS a true loop livein