Skip to content

Commit 05a0166

Browse files
committed
[AIEX] Refactor determineCalleeSaves for L register spill optimization
1 parent dd39b9d commit 05a0166

3 files changed

Lines changed: 91 additions & 71 deletions

File tree

llvm/lib/Target/AIE/AIEBaseFrameLowering.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,3 +446,83 @@ void AIEBaseFrameLowering::orderFrameObjects(
446446
dbgs() << "\n";
447447
});
448448
}
449+
450+
void AIEBaseFrameLowering::optimizeLRegCalleeSaves(
451+
MachineFunction &MF, BitVector &SavedRegs,
452+
const TargetRegisterClass &LRegClass, unsigned SubRegIdxEven,
453+
unsigned SubRegIdxOdd) const {
454+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
455+
const MachineFrameInfo &MFI = MF.getFrameInfo();
456+
457+
// When both L registers and their sub-GPRs are in the CSR list, we need to
458+
// decide whether to save as L register or individual GPRs.
459+
//
460+
// Strategy:
461+
// - If only one GPR of the pair is used: save just that GPR
462+
// - If both GPRs are used AND function has calls: use L register save
463+
// (stack spill is required, 1 L spill is more efficient than 2 GPR spills)
464+
// - If both GPRs are used AND no calls: use individual GPR saves
465+
// (allows GPR-to-GPR spilling via scratch registers)
466+
467+
// Build the list of callee-saved L registers from the callee-saved regs
468+
// provided by CSR list.
469+
SmallVector<MCPhysReg, 4> CalleeSavedLRegs;
470+
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
471+
for (unsigned I = 0; CSRegs[I]; ++I) {
472+
MCPhysReg Reg = CSRegs[I];
473+
if (LRegClass.contains(Reg))
474+
CalleeSavedLRegs.push_back(Reg);
475+
}
476+
477+
for (MCPhysReg LReg : CalleeSavedLRegs) {
478+
// Get the two GPR subregisters of this L register
479+
MCPhysReg EvenGPR = TRI->getSubReg(LReg, SubRegIdxEven);
480+
MCPhysReg OddGPR = TRI->getSubReg(LReg, SubRegIdxOdd);
481+
482+
// Check what's marked for saving by the base determineCalleeSaves.
483+
// This already reflects which registers are actually clobbered.
484+
const bool LRegMarked = SavedRegs.test(LReg);
485+
const bool EvenMarked = SavedRegs.test(EvenGPR);
486+
const bool OddMarked = SavedRegs.test(OddGPR);
487+
488+
if (!LRegMarked && !EvenMarked && !OddMarked)
489+
continue;
490+
491+
SavedRegs.reset(EvenGPR);
492+
SavedRegs.reset(OddGPR);
493+
SavedRegs.reset(LReg);
494+
495+
assert((!(EvenMarked || OddMarked) || LRegMarked) &&
496+
"sub-reg mark without L pair mark violates invariant");
497+
498+
// Determine if both subregisters actually need saving.
499+
// LRegMarked alone doesn't mean both - check individual GPR marks.
500+
const bool BothNeeded =
501+
(EvenMarked && OddMarked) || (LRegMarked && !EvenMarked && !OddMarked);
502+
503+
// When there is calls we mark the L register so that we get a single
504+
// spill instead of 2. When there are no calls, we prefer marking the
505+
// subregisters since they can be copied to non CSR registers instead of
506+
// spilled to memory (There is no move instruction between L registers).
507+
// For the call case we have no choice but to spill anyway since we don't
508+
// know which registers the callee is going to use.
509+
if (BothNeeded) {
510+
// Both subregisters need saving.
511+
if (MFI.hasCalls()) {
512+
// Use L register save. Stack spill is required (scratch regs
513+
// clobbered by calls), so 1 L spill is more efficient than 2 GPR
514+
// spills.
515+
SavedRegs.set(LReg);
516+
} else {
517+
// No calls: use individual GPRs for GPR-to-GPR copy.
518+
SavedRegs.set(EvenGPR);
519+
SavedRegs.set(OddGPR);
520+
}
521+
} else if (EvenMarked) {
522+
SavedRegs.set(EvenGPR);
523+
} else {
524+
assert(OddMarked);
525+
SavedRegs.set(OddGPR);
526+
}
527+
}
528+
}

llvm/lib/Target/AIE/AIEBaseFrameLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@ class AIEBaseFrameLowering : public TargetFrameLowering {
7474

7575
void determineFrameLayout(MachineFunction &MF) const;
7676

77+
/// Optimize callee-saved L registers by deciding whether to save as L
78+
/// register pair or individual GPRs based on usage and call patterns.
79+
void optimizeLRegCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
80+
const TargetRegisterClass &LRegClass,
81+
unsigned SubRegIdxEven,
82+
unsigned SubRegIdxOdd) const;
83+
7784
private:
7885
virtual void adjustSPReg(MachineBasicBlock &MBB,
7986
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,

llvm/lib/Target/AIE/aie2ps/AIE2PSFrameLowering.cpp

Lines changed: 4 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -85,84 +85,17 @@ void AIE2PSFrameLowering::determineCalleeSaves(MachineFunction &MF,
8585
BitVector &SavedRegs,
8686
RegScavenger *RS) const {
8787
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
88-
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
89-
const MachineFrameInfo &MFI = MF.getFrameInfo();
90-
91-
// When both L registers and their sub-GPRs are in the CSR list, we need to
92-
// decide whether to save as L register or individual GPRs.
93-
//
94-
// Strategy:
95-
// - If only one GPR of the pair is used: save just that GPR
96-
// - If both GPRs are used AND function has calls: use L register save
97-
// (stack spill is required, 1 L spill is more efficient than 2 GPR spills)
98-
// - If both GPRs are used AND no calls: use individual GPR saves
99-
// (allows GPR-to-GPR spilling via scratch registers)
100-
// Build the list of callee-saved L registers from the callee-saved regs
101-
// provided by CSR list.
102-
SmallVector<MCPhysReg, 4> CalleeSavedLRegs;
103-
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
104-
for (unsigned I = 0; CSRegs[I]; ++I) {
105-
MCPhysReg Reg = CSRegs[I];
106-
if (AIE2PS::eLRegClass.contains(Reg))
107-
CalleeSavedLRegs.push_back(Reg);
108-
}
109-
110-
for (MCPhysReg LReg : CalleeSavedLRegs) {
111-
// Get the two GPR subregisters of this L register
112-
MCPhysReg EvenGPR = TRI->getSubReg(LReg, AIE2PS::sub_l_even);
113-
MCPhysReg OddGPR = TRI->getSubReg(LReg, AIE2PS::sub_l_odd);
114-
115-
// Check what's marked for saving by the base determineCalleeSaves.
116-
// This already reflects which registers are actually clobbered.
117-
const bool LRegMarked = SavedRegs.test(LReg);
118-
const bool EvenMarked = SavedRegs.test(EvenGPR);
119-
const bool OddMarked = SavedRegs.test(OddGPR);
12088

121-
if (!LRegMarked && !EvenMarked && !OddMarked)
122-
continue;
89+
// Optimize L register callee-saves using shared implementation.
90+
optimizeLRegCalleeSaves(MF, SavedRegs, AIE2PS::eLRegClass, AIE2PS::sub_l_even,
91+
AIE2PS::sub_l_odd);
12392

124-
SavedRegs.reset(EvenGPR);
125-
SavedRegs.reset(OddGPR);
126-
SavedRegs.reset(LReg);
127-
128-
assert((!(EvenMarked || OddMarked) || LRegMarked) &&
129-
"sub-reg mark without L pair mark violates invariant");
130-
131-
// Determine if both subregisters actually need saving.
132-
// LRegMarked alone doesn't mean both - check individual GPR marks.
133-
const bool BothNeeded =
134-
(EvenMarked && OddMarked) || (LRegMarked && !EvenMarked && !OddMarked);
135-
136-
// When there is calls we mark the L register so that we get a single
137-
// spill instead of 2. When there are no calls, we prefer marking the
138-
// subregisters since they can be copied to non CSR registers instead of
139-
// spilled to memory (There is no move instruction between L registers).
140-
// For the call case we have no choice but to spill anyway since we don't
141-
// know which registers the callee is going to use.
142-
if (BothNeeded) {
143-
// Both subregisters need saving.
144-
if (MFI.hasCalls()) {
145-
// Use L register save. Stack spill is required (scratch regs
146-
// clobbered by calls), so 1 L spill is more efficient than 2 GPR
147-
// spills.
148-
SavedRegs.set(LReg);
149-
} else {
150-
// No calls: use individual GPRs for GPR-to-GPR copy.
151-
SavedRegs.set(EvenGPR);
152-
SavedRegs.set(OddGPR);
153-
}
154-
} else if (EvenMarked) {
155-
SavedRegs.set(EvenGPR);
156-
} else {
157-
assert(OddMarked);
158-
SavedRegs.set(OddGPR);
159-
}
160-
}
16193
// If there is a frame pointer (dynamic stack allocation), p7 will be used
16294
// as a frame pointer. The register allocator will not be able to see the
16395
// redefinition of p7 as the prologue and the epilogue are emitted after the
16496
// register allocation. Thus, we make sure to spill p7 at the beginning of
16597
// the function body and restore it at the end by adding it in SavedRegs.
98+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
16699
const Register FPReg = TRI->getFrameRegister(MF);
167100
if (hasFP(MF))
168101
SavedRegs.set(FPReg);

0 commit comments

Comments
 (0)