Skip to content

Commit e9815c3

Browse files
committed
[AIEX] Implement foldImmediate Peephole optimization
Fold move-immediate + COPY pairs into a single move-immediate at the consumer site: %c = MOVA 42 → %a = MOVA 42 %a = COPY %c Guarded by hasOneNonDBGUse to avoid materializing constants multiple times when they have several consumers, which would inflate register pressure and perturb allocation.
1 parent c8ec7bb commit e9815c3

8 files changed

Lines changed: 149 additions & 84 deletions

File tree

llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/STLExtras.h"
2525
#include "llvm/ADT/SmallSet.h"
2626
#include "llvm/ADT/SmallVector.h"
27+
#include "llvm/ADT/Statistic.h"
2728
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2829
#include "llvm/CodeGen/LivePhysRegs.h"
2930
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,6 +33,7 @@
3233
#include "llvm/CodeGen/RegisterScavenging.h"
3334
#include "llvm/MC/MCInstrItineraries.h"
3435
#include "llvm/MC/TargetRegistry.h"
36+
#include "llvm/Support/CommandLine.h"
3537
#include "llvm/Support/Debug.h"
3638
#include "llvm/Support/ErrorHandling.h"
3739
#include <limits>
@@ -40,6 +42,20 @@
4042

4143
using namespace llvm;
4244

45+
STATISTIC(NumFoldImmAttempts, "Number of foldImmediate attempts (AIE)");
46+
STATISTIC(NumFoldImmBlockedMultiUse,
47+
"foldImmediate calls blocked by hasOneNonDBGUse mitigation");
48+
STATISTIC(NumFoldImmSuccesses, "Number of foldImmediate successes (AIE)");
49+
static cl::opt<bool> AIEFoldImmRequireOneUse(
50+
"aie-fold-imm-require-one-use", cl::init(true), cl::Hidden,
51+
cl::desc("Only fold immediate into COPY when the constant has a single "
52+
"non-debug use (so the def can be DCE'd)."));
53+
54+
static cl::opt<bool> AIEDisableFoldImm(
55+
"aie-disable-fold-imm", cl::init(false), cl::Hidden,
56+
cl::desc("Completely disable the AIE foldImmediate override (fall back to "
57+
"default no-op behaviour)."));
58+
4359
static cl::opt<bool>
4460
NoCheapInstHoisting("aie-no-cheap-inst-hoising",
4561
cl::desc("Disable hoisting of cheap instructions"),
@@ -539,6 +555,64 @@ unsigned AIEBaseInstrInfo::getAIEMachineBundleSize(
539555
return 0;
540556
}
541557

558+
// TODO: implement folding for opcodes other than COPY
559+
bool AIEBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
560+
Register Reg,
561+
MachineRegisterInfo *MRI) const {
562+
563+
if (AIEDisableFoldImm)
564+
return false;
565+
566+
// Only handle COPY instructions as the use
567+
if (!UseMI.isCopy())
568+
return false;
569+
570+
// Check if DefMI is a move-immediate instruction
571+
if (!isIConst(DefMI.getOpcode()))
572+
return false;
573+
574+
// Bail out if operand 1 is not an immediate (e.g., a GlobalAddress
575+
// relocation)
576+
if (!DefMI.getOperand(1).isImm())
577+
return false;
578+
579+
int64_t ImmVal = DefMI.getOperand(1).getImm();
580+
581+
// Get the destination register of the COPY
582+
Register DstReg = UseMI.getOperand(0).getReg();
583+
584+
// Only handle virtual registers - physical registers are more complex
585+
if (!DstReg.isVirtual())
586+
return false;
587+
588+
++NumFoldImmAttempts;
589+
590+
// Only fold when the constant has a single non-debug use.
591+
// The TargetInstrInfo::foldImmediate contract lets the caller (PeepholeOpt)
592+
// erase DefMI when hasOneNonDBGUse(Reg) holds; without this guard we leave
593+
// DefMI alive for other consumers and end up materializing the constant
594+
// twice, inflating register pressure.
595+
if (AIEFoldImmRequireOneUse && !MRI->hasOneNonDBGUse(Reg)) {
596+
++NumFoldImmBlockedMultiUse;
597+
return false;
598+
}
599+
600+
// Get the appropriate move-immediate opcode for the destination register
601+
APInt ImmAPInt(32, ImmVal, /*isSigned=*/true);
602+
unsigned NewOpc = getConstantMovOpcode(*MRI, DstReg, ImmAPInt);
603+
604+
// Build the new move-immediate instruction
605+
MachineBasicBlock &MBB = *UseMI.getParent();
606+
const DebugLoc &DL = UseMI.getDebugLoc();
607+
BuildMI(MBB, UseMI, DL, get(NewOpc), DstReg).addImm(ImmAPInt.getSExtValue());
608+
609+
// Remove the old COPY
610+
UseMI.eraseFromParent();
611+
612+
++NumFoldImmSuccesses;
613+
return true;
614+
}
615+
542616
unsigned
543617
AIEBaseInstrInfo::getMBBSizeInBytes(const MachineBasicBlock &MBB) const {
544618
unsigned Size = 0;

llvm/lib/Target/AIE/AIEBaseInstrInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,11 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
666666
const MachineInstr &UseMI,
667667
unsigned UseIdx) const override;
668668

669+
/// Try to fold an immediate from DefMI into UseMI.
670+
/// When DefMI is a move-immediate and UseMI is a COPY, replace the COPY
671+
/// with a move-immediate to the destination register directly.
672+
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
673+
MachineRegisterInfo *MRI) const override;
669674
// Check if the MII points to a BUNDLE which contains a call instruction
670675
bool isCallBundle(MachineBasicBlock::iterator MII) const;
671676
// Check if the MII points to a BUNDLE which contains an instruction

llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; See https://llvm.org/LICENSE.txt for license information.
44
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55
;
6-
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
6+
; (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
77
;
88
; RUN: llc -mtriple=aie2 -O2 --issue-limit=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
99

@@ -29,9 +29,8 @@ entry:
2929
define void @callee1() {
3030
; CHECK-LABEL: callee1:
3131
; CHECK: // %bb.0: // %entry
32-
; CHECK-NEXT: mova r0, #1; nopb ; nopxm ; nops
33-
; CHECK-NEXT: ret lr
34-
; CHECK-NEXT: mov s0, r0 // Delay Slot 5
32+
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
33+
; CHECK-NEXT: nopa ; mov s0, #1 // Delay Slot 5
3534
; CHECK-NEXT: vsrs.d8.s32 wh0, cm0, s0 // Delay Slot 4
3635
; CHECK-NEXT: nop // Delay Slot 3
3736
; CHECK-NEXT: nop // Delay Slot 2

llvm/test/CodeGen/AIE/aie2/vlda_ups.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@
1111
define dso_local noundef <8 x i64> @_Z5test0Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
1212
; CHECK-LABEL: _Z5test0Dv16_s:
1313
; CHECK: // %bb.0: // %entry
14-
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
14+
; CHECK-NEXT: paddb [sp], #128; nopxm
1515
; CHECK-NEXT: mov p0, sp
1616
; CHECK-NEXT: vst wl0, [sp, #-64]
1717
; CHECK-NEXT: paddb [p0], #-64
1818
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
1919
; CHECK-NEXT: nop
2020
; CHECK-NEXT: nop
2121
; CHECK-NEXT: nop
22-
; CHECK-NEXT: mova r0, #4
23-
; CHECK-NEXT: mov s0, r0
22+
; CHECK-NEXT: nop
23+
; CHECK-NEXT: mov s0, #4
2424
; CHECK-NEXT: nop
2525
; CHECK-NEXT: mov p0, sp
2626
; CHECK-NEXT: paddb [p0], #-128
@@ -59,16 +59,16 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
5959
define dso_local noundef <8 x i64> @_Z5test1Dv8_i(<8 x i32> noundef %arg0) local_unnamed_addr #0 {
6060
; CHECK-LABEL: _Z5test1Dv8_i:
6161
; CHECK: // %bb.0: // %entry
62-
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
62+
; CHECK-NEXT: paddb [sp], #128; nopxm
6363
; CHECK-NEXT: mov p0, sp
6464
; CHECK-NEXT: vst wl0, [sp, #-64]
6565
; CHECK-NEXT: paddb [p0], #-64
6666
; CHECK-NEXT: vlda.ups.s64.d32 bmh0, s0, [p0, #0]
6767
; CHECK-NEXT: nop
6868
; CHECK-NEXT: nop
6969
; CHECK-NEXT: nop
70-
; CHECK-NEXT: mova r0, #4
71-
; CHECK-NEXT: mov s0, r0
70+
; CHECK-NEXT: nop
71+
; CHECK-NEXT: mov s0, #4
7272
; CHECK-NEXT: nop
7373
; CHECK-NEXT: mov p0, sp
7474
; CHECK-NEXT: paddb [p0], #-128
@@ -101,16 +101,16 @@ entry:
101101
define dso_local noundef <16 x i64> @_Z5test2Dv32_a(<32 x i8> noundef %arg0) local_unnamed_addr #0 {
102102
; CHECK-LABEL: _Z5test2Dv32_a:
103103
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
104+
; CHECK-NEXT: paddb [sp], #256; nopxm
105105
; CHECK-NEXT: mov p0, sp
106106
; CHECK-NEXT: vst wl0, [sp, #-128]
107107
; CHECK-NEXT: paddb [p0], #-128
108108
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0, #0]
109109
; CHECK-NEXT: nop
110110
; CHECK-NEXT: nop
111111
; CHECK-NEXT: nop
112-
; CHECK-NEXT: mova r0, #4
113-
; CHECK-NEXT: mov s0, r0
112+
; CHECK-NEXT: nop
113+
; CHECK-NEXT: mov s0, #4
114114
; CHECK-NEXT: nop
115115
; CHECK-NEXT: mov p0, sp
116116
; CHECK-NEXT: paddb [p0], #-256
@@ -147,16 +147,16 @@ entry:
147147
define dso_local noundef <8 x i64> @_Z5test3Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
148148
; CHECK-LABEL: _Z5test3Dv16_s:
149149
; CHECK: // %bb.0: // %entry
150-
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
150+
; CHECK-NEXT: paddb [sp], #128; nopxm
151151
; CHECK-NEXT: mov p0, sp
152152
; CHECK-NEXT: vst wl0, [sp, #-64]
153153
; CHECK-NEXT: paddb [p0], #-64
154154
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
155155
; CHECK-NEXT: nop
156156
; CHECK-NEXT: nop
157157
; CHECK-NEXT: nop
158-
; CHECK-NEXT: mova r0, #4
159-
; CHECK-NEXT: mov s0, r0
158+
; CHECK-NEXT: nop
159+
; CHECK-NEXT: mov s0, #4
160160
; CHECK-NEXT: nop
161161
; CHECK-NEXT: mov p0, sp
162162
; CHECK-NEXT: paddb [p0], #-128
@@ -189,16 +189,16 @@ entry:
189189
define dso_local noundef <16 x i64> @_Z5test4Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
190190
; CHECK-LABEL: _Z5test4Dv16_s:
191191
; CHECK: // %bb.0: // %entry
192-
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
192+
; CHECK-NEXT: paddb [sp], #256; nopxm
193193
; CHECK-NEXT: mov p0, sp
194194
; CHECK-NEXT: vst wl0, [sp, #-128]
195195
; CHECK-NEXT: paddb [p0], #-128
196196
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
197197
; CHECK-NEXT: nop
198198
; CHECK-NEXT: nop
199199
; CHECK-NEXT: nop
200-
; CHECK-NEXT: mova r0, #4
201-
; CHECK-NEXT: mov s0, r0
200+
; CHECK-NEXT: nop
201+
; CHECK-NEXT: mov s0, #4
202202
; CHECK-NEXT: nop
203203
; CHECK-NEXT: mov p0, sp
204204
; CHECK-NEXT: paddb [p0], #-256
@@ -235,16 +235,16 @@ entry:
235235
define dso_local noundef <16 x i64> @_Z5test5Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
236236
; CHECK-LABEL: _Z5test5Dv16_s:
237237
; CHECK: // %bb.0: // %entry
238-
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
238+
; CHECK-NEXT: paddb [sp], #256; nopxm
239239
; CHECK-NEXT: mov p0, sp
240240
; CHECK-NEXT: vst wl0, [sp, #-128]
241241
; CHECK-NEXT: paddb [p0], #-128
242242
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
243243
; CHECK-NEXT: nop
244244
; CHECK-NEXT: nop
245245
; CHECK-NEXT: nop
246-
; CHECK-NEXT: mova r0, #4
247-
; CHECK-NEXT: mov s0, r0
246+
; CHECK-NEXT: nop
247+
; CHECK-NEXT: mov s0, #4
248248
; CHECK-NEXT: nop
249249
; CHECK-NEXT: mov p0, sp
250250
; CHECK-NEXT: paddb [p0], #-256
@@ -281,16 +281,16 @@ entry:
281281
define dso_local noundef <16 x i64> @_Z5test6Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
282282
; CHECK-LABEL: _Z5test6Dv16_s:
283283
; CHECK: // %bb.0: // %entry
284-
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
284+
; CHECK-NEXT: paddb [sp], #256; nopxm
285285
; CHECK-NEXT: mov p0, sp
286286
; CHECK-NEXT: vst wl0, [sp, #-128]
287287
; CHECK-NEXT: paddb [p0], #-128
288288
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
289289
; CHECK-NEXT: nop
290290
; CHECK-NEXT: nop
291291
; CHECK-NEXT: nop
292-
; CHECK-NEXT: mova r0, #4
293-
; CHECK-NEXT: mov s0, r0
292+
; CHECK-NEXT: nop
293+
; CHECK-NEXT: mov s0, #4
294294
; CHECK-NEXT: nop
295295
; CHECK-NEXT: mov p0, sp
296296
; CHECK-NEXT: paddb [p0], #-256
@@ -327,16 +327,16 @@ entry:
327327
define dso_local noundef <8 x i64> @_Z5test7Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
328328
; CHECK-LABEL: _Z5test7Dv16_s:
329329
; CHECK: // %bb.0: // %entry
330-
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
330+
; CHECK-NEXT: paddb [sp], #128; nopxm
331331
; CHECK-NEXT: mov p0, sp
332332
; CHECK-NEXT: vst wl0, [sp, #-64]
333333
; CHECK-NEXT: paddb [p0], #-64
334334
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
335335
; CHECK-NEXT: nop
336336
; CHECK-NEXT: nop
337337
; CHECK-NEXT: nop
338-
; CHECK-NEXT: mova r0, #4
339-
; CHECK-NEXT: mov s0, r0
338+
; CHECK-NEXT: nop
339+
; CHECK-NEXT: mov s0, #4
340340
; CHECK-NEXT: nop
341341
; CHECK-NEXT: mov p0, sp
342342
; CHECK-NEXT: paddb [p0], #-128
@@ -369,16 +369,16 @@ entry:
369369
define dso_local noundef <8 x i64> @_Z5test8Dv16_t(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
370370
; CHECK-LABEL: _Z5test8Dv16_t:
371371
; CHECK: // %bb.0: // %entry
372-
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
372+
; CHECK-NEXT: paddb [sp], #128; nopxm
373373
; CHECK-NEXT: mov p0, sp
374374
; CHECK-NEXT: vst wl0, [sp, #-64]
375375
; CHECK-NEXT: paddb [p0], #-64
376376
; CHECK-NEXT: vlda.ups.s32.d16 bmh0, s0, [p0, #0]
377377
; CHECK-NEXT: nop
378378
; CHECK-NEXT: nop
379379
; CHECK-NEXT: nop
380-
; CHECK-NEXT: mova r0, #4
381-
; CHECK-NEXT: mov s0, r0
380+
; CHECK-NEXT: nop
381+
; CHECK-NEXT: mov s0, #4
382382
; CHECK-NEXT: nop
383383
; CHECK-NEXT: mov p0, sp
384384
; CHECK-NEXT: paddb [p0], #-128

0 commit comments

Comments
 (0)