Skip to content

Commit 4318ee0

Browse files
committed
[AIE2PS] Spill 512-bit accumulators into vector registers
Introduce a combined 512-bit spill register class that unions the vector (mXm), accumulator (mBMm), and FIFO (lfh0/lfh1/lfl0/lfl1/ sfl/sfh/lfe) physical registers. Exposing this class to the register coalescer via getLargestLegalSuperClass lets a 512-bit value stored in an ACC512 vreg be allocated to a free X register instead of spilling to the stack when the accumulator bank is under pressure. This mirrors the existing AIE2P optimization. The widening is opt-in only for ACC512 and VEC512 (compared by pointer equality, not hasSubClassEq) to limit ripple effects on operand-restricted sub-classes that would otherwise alter coalescing and pre-RA scheduling. Spill / reload of a composite-class vreg goes through two new pseudos, VST_512_COMPOSED_REG_SPILL and VLDA_512_COMPOSED_REG_SPILL. eliminateFrameIndex resolves the frame index to an SP-relative immediate, and expandPostRAPseudo swaps the descriptor to the native opcode that matches the actual physical register chosen by the allocator: VST_dmx_sts_x_spill / VLDA_dmx_lda_x_spill for VEC512, and VST_dmx_sts_bm_spill / VLDA_dmx_lda_bm_spill for ACC512. AIE2PS has no native FIFO spill opcode, so the FIFO branch falls through to report_fatal_error; in practice the allocator should not assign a FIFO physreg to a composite-class vreg. A new test exercises both branches of the post-RA descriptor swap end-to-end through prologepilog and postrapseudos.
1 parent 0581878 commit 4318ee0

6 files changed

Lines changed: 103 additions & 3 deletions

File tree

llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,9 @@ void AIE2PSInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
966966
regClassMatches(AIE2PS::spill_eS_to_eRRegClass, RC, SrcReg)) {
967967
// Can't spill these directly. Need to bounce through a GPR.
968968
return bounceViaRegClass(&AIE2PS::eRRegClass);
969+
} else if (regClassMatches(AIE2PS::spill_vec512_to_compositeRegClass, RC,
970+
SrcReg)) {
971+
Opcode = AIE2PS::VST_512_COMPOSED_REG_SPILL;
969972
} else {
970973
LLVM_DEBUG(I->dump());
971974
llvm_unreachable("Can't store this register to stack slot: is it virtual?");
@@ -1080,6 +1083,9 @@ void AIE2PSInstrInfo::loadRegFromStackSlot(
10801083
regClassMatches(AIE2PS::spill_eS_to_eRRegClass, RC, DstReg)) {
10811084
// Can't spill these directly. Need to bounce through a GPR.
10821085
return bounceViaRegClass(&AIE2PS::eRRegClass);
1086+
} else if (regClassMatches(AIE2PS::spill_vec512_to_compositeRegClass, RC,
1087+
DstReg)) {
1088+
Opcode = AIE2PS::VLDA_512_COMPOSED_REG_SPILL;
10831089
} else {
10841090
LLVM_DEBUG(I->dump());
10851091
llvm_unreachable(
@@ -1545,6 +1551,38 @@ bool AIE2PSInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
15451551
MI.eraseFromParent();
15461552
return true;
15471553
}
1554+
case AIE2PS::VLDA_512_COMPOSED_REG_SPILL: {
1555+
unsigned int Opcode;
1556+
const Register Dst = MI.getOperand(0).getReg();
1557+
if (AIE2PS::VEC512RegClass.contains(Dst)) {
1558+
Opcode = AIE2PS::VLDA_dmx_lda_x_spill;
1559+
} else if (AIE2PS::ACC512RegClass.contains(Dst)) {
1560+
Opcode = AIE2PS::VLDA_dmx_lda_bm_spill;
1561+
} else {
1562+
// FIFO512 is part of the composite RC for parity with AIE2P, but
1563+
// AIE2PS has no native FIFO spill opcode. The allocator should not
1564+
// assign a FIFO physreg to a composite-class vreg in practice; if
1565+
// it does, fail loudly so the assumption can be revisited.
1566+
report_fatal_error("VLDA_512_COMPOSED_REG_SPILL: no native AIE2PS "
1567+
"spill opcode for non-VEC/non-ACC physreg");
1568+
}
1569+
MI.setDesc(get(Opcode));
1570+
return false;
1571+
}
1572+
case AIE2PS::VST_512_COMPOSED_REG_SPILL: {
1573+
unsigned int Opcode;
1574+
const Register Src = MI.getOperand(0).getReg();
1575+
if (AIE2PS::VEC512RegClass.contains(Src)) {
1576+
Opcode = AIE2PS::VST_dmx_sts_x_spill;
1577+
} else if (AIE2PS::ACC512RegClass.contains(Src)) {
1578+
Opcode = AIE2PS::VST_dmx_sts_bm_spill;
1579+
} else {
1580+
report_fatal_error("VST_512_COMPOSED_REG_SPILL: no native AIE2PS "
1581+
"spill opcode for non-VEC/non-ACC physreg");
1582+
}
1583+
MI.setDesc(get(Opcode));
1584+
return false;
1585+
}
15481586
}
15491587
return false;
15501588
}

llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def VST_Y_SPILL : Pseudo<(outs ), (ins VEC1024:$src, c17n_step128:$imm), "vst_y_
160160
def VST_BM_SPILL : Pseudo<(outs ), (ins ACC512:$src, c16n_step64:$imm), "vst_bm_spill", "$src, [sp, $imm]">;
161161
def VST_CM_SPILL : Pseudo<(outs ), (ins ACC1024:$src, c17n_step128:$imm), "vst_cm_spill", "$src, [sp, $imm]">;
162162
def VST_DM_SPILL : Pseudo<(outs ), (ins ACC2048:$src, c17n_step128:$imm), "vst_dm_spill", "$src, [sp, $imm]">;
163+
def VST_512_COMPOSED_REG_SPILL : Pseudo<(outs ), (ins spill_vec512_to_composite:$src, c16n_step64:$imm), "vst_512_composed_reg_spill", "${src}, [sp, $imm]">;
163164

164165
def VST_E_SPILL : Pseudo<(outs ), (ins mEs:$src, c12n_step4:$imm), "vst_e_spill", "$src, [sp, $imm]">;
165166
def VST_EE_SPILL : Pseudo<(outs ), (ins mEEs:$src, c13n_step8:$imm), "vst_ee_spill", "$src, [sp, $imm]">;
@@ -193,6 +194,7 @@ def VLDA_Y_SPILL : Pseudo<(outs VEC1024:$dst), (ins c17n_step128:$imm), "vlda_y_
193194
def VLDA_BM_SPILL : Pseudo<(outs ACC512:$dst), (ins c16n_step64:$imm), "vlda_bm_spill", "${dst}, [sp, $imm]">;
194195
def VLDA_CM_SPILL : Pseudo<(outs ACC1024:$dst), (ins c17n_step128:$imm), "vlda_cm_spill", "${dst}, [sp, $imm]">;
195196
def VLDA_DM_SPILL : Pseudo<(outs ACC2048:$dst), (ins c17n_step128:$imm), "vlda_dm_spill", "${dst}, [sp, $imm]">;
197+
def VLDA_512_COMPOSED_REG_SPILL : Pseudo<(outs spill_vec512_to_composite:$dst), (ins c16n_step64:$imm), "vlda_512_composed_reg_spill", "${dst}, [sp, $imm]">;
196198

197199
def VLDA_E_SPILL : Pseudo<(outs mEs:$dst), (ins c12n_step4:$imm), "vlda_e_spill", "$dst, [sp, $imm]">;
198200
def VLDA_EE_SPILL : Pseudo<(outs mEEs:$dst), (ins c13n_step8:$imm), "vlda_ee_spill", "$dst, [sp, $imm]">;

llvm/lib/Target/AIE/aie2ps/AIE2PSRegisterInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ bool AIE2PSRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
189189
case AIE2PS::VST_dmw_sts_w_spill:
190190
case AIE2PS::VST_dmx_sts_bm_spill:
191191
case AIE2PS::VST_dmx_sts_x_spill:
192+
case AIE2PS::VLDA_512_COMPOSED_REG_SPILL:
193+
case AIE2PS::VST_512_COMPOSED_REG_SPILL:
192194
MI.getOperand(FIOperandNum).ChangeToImmediate(Offset);
193195
return false;
194196
case AIE2PS::LDA_R_SPILL:
@@ -395,6 +397,10 @@ AIE2PSRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
395397
return &AIE2PS::spill_eDC_to_eRRegClass;
396398
if (AIE2PS::eSRegClass.hasSubClassEq(RC))
397399
return &AIE2PS::spill_eS_to_eRRegClass;
400+
if (RC == &AIE2PS::ACC512RegClass || RC == &AIE2PS::VEC512RegClass)
401+
// using hasSubClassEq leads to register coalescer changes (spill_vec512
402+
// will be used more frequently) and thus change machine scheduling
403+
return &AIE2PS::spill_vec512_to_compositeRegClass;
398404

399405
return RC;
400406
}

llvm/lib/Target/AIE/aie2ps/AIE2PSRegisterInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,10 @@ def spill_eDN_to_eR : AIE2PSScalarRegisterClass<(add eDN, eR)>;
12071207
def spill_eDJ_to_eR : AIE2PSScalarRegisterClass<(add eDJ, eR, eDN)>;
12081208
def spill_eDC_to_eR : AIE2PSScalarRegisterClass<(add eDC, eR)>;
12091209

1210+
def spill_vec512_to_composite : AIE2PSVector512RegisterClass<(add mXm, mBMm, lfh0, lfh1, lfl0, lfl1, sfl, sfh, lfe)> {
1211+
let ConsiderInPreRAScheduling = false;
1212+
}
1213+
12101214
class AIE2PVector1076FifoRegisterClass<dag reglist> :
12111215
AIE2PSRegisterClass<1088, 512, [i32], reglist>;
12121216
def sub_fifo : SubRegIndex<1024, 0>;
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#
2+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
3+
# See https://llvm.org/LICENSE.txt for license information.
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
#
6+
# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
7+
8+
# RUN: llc -mtriple=aie2ps -verify-machineinstrs \
9+
# RUN: -run-pass=prologepilog -run-pass=postrapseudos %s -o - \
10+
# RUN: | FileCheck %s
11+
12+
# The combined spill_vec512_to_composite register class lets the
13+
# allocator place a 512-bit value in either an X (VEC512) or a BM
14+
# (ACC512) physreg, sharing one stack slot for either bank. After
15+
# allocation, eliminateFrameIndex resolves the FI to an SP-relative
16+
# immediate and expandPostRAPseudo swaps the composite pseudo for the
17+
# native opcode that matches the actual physical register chosen by
18+
# the allocator. This test exercises both branches of that swap.
19+
20+
---
21+
name: composite_spill_lowers_to_x_native
22+
tracksRegLiveness: true
23+
stack:
24+
- { id: 0, type: spill-slot, size: 64, alignment: 64 }
25+
body: |
26+
bb.0:
27+
liveins: $x0
28+
; CHECK-LABEL: name: composite_spill_lowers_to_x_native
29+
; CHECK: VST_dmx_sts_x_spill renamable $x0,
30+
; CHECK: renamable $x0 = VLDA_dmx_lda_x_spill
31+
VST_512_COMPOSED_REG_SPILL renamable $x0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
32+
renamable $x0 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
33+
PseudoRET implicit $lr, implicit killed $x0
34+
...
35+
36+
---
37+
name: composite_spill_lowers_to_bm_native
38+
tracksRegLiveness: true
39+
stack:
40+
- { id: 0, type: spill-slot, size: 64, alignment: 64 }
41+
body: |
42+
bb.0:
43+
liveins: $bmll0
44+
; CHECK-LABEL: name: composite_spill_lowers_to_bm_native
45+
; CHECK: VST_dmx_sts_bm_spill renamable $bmll0,
46+
; CHECK: renamable $bmll0 = VLDA_dmx_lda_bm_spill
47+
VST_512_COMPOSED_REG_SPILL renamable $bmll0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
48+
renamable $bmll0 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
49+
PseudoRET implicit $lr, implicit killed $bmll0
50+
...

llvm/test/CodeGen/AIE/aie2ps/ra/spill-vec-acc.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ body: |
3131
; CHECK-NEXT: renamable $x5 = VBCST_32 renamable $r16
3232
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19312
3333
; CHECK-NEXT: renamable $x0 = VBCST_16 killed renamable $r0
34-
; CHECK-NEXT: VST_X_SPILL killed renamable $x0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
34+
; CHECK-NEXT: renamable $bmll2 = COPY killed renamable $x0
3535
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19360
3636
; CHECK-NEXT: renamable $x2 = VBCST_16 killed renamable $r0
3737
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19424
@@ -56,10 +56,10 @@ body: |
5656
; CHECK-NEXT: {{ $}}
5757
; CHECK-NEXT: bb.1:
5858
; CHECK-NEXT: successors: %bb.1(0x80000000)
59-
; CHECK-NEXT: liveins: $cml0:0x000000000000000C, $r16, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
59+
; CHECK-NEXT: liveins: $bmll2, $cml0:0x000000000000000C, $r16, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
6060
; CHECK-NEXT: {{ $}}
6161
; CHECK-NEXT: renamable $x11 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 renamable $cml0, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd
62-
; CHECK-NEXT: renamable $x0 = VLDA_X_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
62+
; CHECK-NEXT: renamable $x0 = COPY renamable $bmll2
6363
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, killed renamable $x0, implicit $crbf8conf, implicit $crfp8conf
6464
; CHECK-NEXT: renamable $x0 = VSEL_16 renamable $x7, renamable $x10, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf
6565
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, renamable $x2, implicit $crbf8conf, implicit $crfp8conf

0 commit comments

Comments
 (0)