From 580ebc9087f56054650b726c7a3d2c4cc1e690ba Mon Sep 17 00:00:00 2001 From: Hamza Khallouki Date: Tue, 26 May 2026 06:26:44 -0600 Subject: [PATCH] [AIEX] Implement foldImmediate Peephole optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fold move-immediate + COPY pairs into a single move-immediate at the consumer site: %c = MOVA 42 → %a = MOVA 42 %a = COPY %c Guarded by hasOneNonDBGUse to avoid materializing constants multiple times when they have several consumers, which would inflate register pressure and perturb allocation. --- llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp | 75 +++++++++++++++++++ llvm/lib/Target/AIE/AIEBaseInstrInfo.h | 5 ++ .../CodeGen/AIE/aie2/GlobalISel/bfloat.ll | 2 +- llvm/test/CodeGen/AIE/aie2/fadd.ll | 2 +- .../CodeGen/AIE/aie2/float_to_bfloat16.ll | 2 +- llvm/test/CodeGen/AIE/aie2/fsub.ll | 2 +- .../AIE/aie2/live-reserved-regs-call.ll | 7 +- llvm/test/CodeGen/AIE/aie2/vlda_ups.ll | 54 ++++++------- llvm/test/CodeGen/AIE/aie2/vst_srs.ll | 65 +++++++--------- llvm/test/CodeGen/AIE/aie2p/cnvf2f.ll | 14 ++-- 10 files changed, 147 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp index 484a513b30d2..b1514d1359ae 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include @@ -40,6 +42,20 @@ using namespace llvm; +STATISTIC(NumFoldImmAttempts, "Number of foldImmediate attempts (AIE)"); +STATISTIC(NumFoldImmBlockedMultiUse, + "foldImmediate calls blocked by hasOneNonDBGUse mitigation"); +STATISTIC(NumFoldImmSuccesses, "Number of foldImmediate successes (AIE)"); +static cl::opt AIEFoldImmRequireOneUse( + "aie-fold-imm-require-one-use", cl::init(true), cl::Hidden, + cl::desc("Only fold immediate into COPY when the constant has a single " + "non-debug use (so the def can be DCE'd).")); + +static cl::opt AIEDisableFoldImm( + "aie-disable-fold-imm", cl::init(false), cl::Hidden, + cl::desc("Completely disable the AIE foldImmediate override (fall back to " + "default no-op behaviour).")); + static cl::opt NoCheapInstHoisting("aie-no-cheap-inst-hoising", cl::desc("Disable hoisting of cheap instructions"), @@ -539,6 +555,65 @@ unsigned AIEBaseInstrInfo::getAIEMachineBundleSize( return 0; } +// TODO: implement folding for opcodes other than COPY +bool AIEBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, + MachineRegisterInfo *MRI) const { + + if (AIEDisableFoldImm) + return false; + + // Only handle COPY instructions as the use + if (!UseMI.isCopy()) + return false; + + // Check if DefMI is a move-immediate instruction + if (!isIConst(DefMI.getOpcode())) + return false; + + // Get the immediate value from DefMI + // Move-immediate instructions have the format: Dst = MOV Imm + // The immediate is typically in operand 1 + if (DefMI.getNumOperands() < 2 || !DefMI.getOperand(1).isImm()) + return false; + + int64_t ImmVal = DefMI.getOperand(1).getImm(); + + // Get the destination register of the COPY + Register DstReg = UseMI.getOperand(0).getReg(); + + // Only handle virtual registers - physical registers are more complex + if (!DstReg.isVirtual()) + return false; + + ++NumFoldImmAttempts; + + // Only fold when the constant has a single non-debug use. + // The TargetInstrInfo::foldImmediate contract lets the caller (PeepholeOpt) + // erase DefMI when hasOneNonDBGUse(Reg) holds; without this guard we leave + // DefMI alive for other consumers and end up materializing the constant + // twice, inflating register pressure. + if (AIEFoldImmRequireOneUse && !MRI->hasOneNonDBGUse(Reg)) { + ++NumFoldImmBlockedMultiUse; + return false; + } + + // Get the appropriate move-immediate opcode for the destination register + APInt ImmAPInt(32, ImmVal, /*isSigned=*/true); + unsigned NewOpc = getConstantMovOpcode(*MRI, DstReg, ImmAPInt); + + // Build the new move-immediate instruction + MachineBasicBlock &MBB = *UseMI.getParent(); + const DebugLoc &DL = UseMI.getDebugLoc(); + BuildMI(MBB, UseMI, DL, get(NewOpc), DstReg).addImm(ImmAPInt.getSExtValue()); + + // Remove the old COPY + UseMI.eraseFromParent(); + + ++NumFoldImmSuccesses; + return true; +} + unsigned AIEBaseInstrInfo::getMBBSizeInBytes(const MachineBasicBlock &MBB) const { unsigned Size = 0; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index bd88e7759525..304331bf4850 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -666,6 +666,11 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { const MachineInstr &UseMI, unsigned UseIdx) const override; + /// Try to fold an immediate from DefMI into UseMI. + /// When DefMI is a move-immediate and UseMI is a COPY, replace the COPY + /// with a move-immediate to the destination register directly. + bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, + MachineRegisterInfo *MRI) const override; // Check if the MII points to a BUNDLE which contains a call instruction bool isCallBundle(MachineBasicBlock::iterator MII) const; // Check if the MII points to a BUNDLE which contains an instruction diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/bfloat.ll b/llvm/test/CodeGen/AIE/aie2/GlobalISel/bfloat.ll index 9d92b7b58960..95a748900a65 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/bfloat.ll +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/bfloat.ll @@ -4,7 +4,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates ; RUN: llc -O2 -mtriple=aie2 --issue-limit=1 %s -o - | FileCheck %s %class.bfloat16 = type { bfloat } diff --git a/llvm/test/CodeGen/AIE/aie2/fadd.ll b/llvm/test/CodeGen/AIE/aie2/fadd.ll index 0222056008c5..c0d9c719b0f7 100644 --- a/llvm/test/CodeGen/AIE/aie2/fadd.ll +++ b/llvm/test/CodeGen/AIE/aie2/fadd.ll @@ -4,7 +4,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates ; ; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s diff --git a/llvm/test/CodeGen/AIE/aie2/float_to_bfloat16.ll b/llvm/test/CodeGen/AIE/aie2/float_to_bfloat16.ll index 1486ab9cdb00..c035cabdae79 100644 --- a/llvm/test/CodeGen/AIE/aie2/float_to_bfloat16.ll +++ b/llvm/test/CodeGen/AIE/aie2/float_to_bfloat16.ll @@ -4,7 +4,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates ; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s define bfloat @float_to_bf16_test(float %v) { diff --git a/llvm/test/CodeGen/AIE/aie2/fsub.ll b/llvm/test/CodeGen/AIE/aie2/fsub.ll index d6a9a53e64e9..fbf5945ebead 100644 --- a/llvm/test/CodeGen/AIE/aie2/fsub.ll +++ b/llvm/test/CodeGen/AIE/aie2/fsub.ll @@ -4,7 +4,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates ; ; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s diff --git a/llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll b/llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll index f0ac73f6d653..8d48a7ed222f 100644 --- a/llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll +++ b/llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll @@ -3,7 +3,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates ; ; RUN: llc -mtriple=aie2 -O2 --issue-limit=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s @@ -29,9 +29,8 @@ entry: define void @callee1() { ; CHECK-LABEL: callee1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mova r0, #1; nopb ; nopxm ; nops -; CHECK-NEXT: ret lr -; CHECK-NEXT: mov s0, r0 // Delay Slot 5 +; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv +; CHECK-NEXT: nopa ; mov s0, #1 // Delay Slot 5 ; CHECK-NEXT: vsrs.d8.s32 wh0, cm0, s0 // Delay Slot 4 ; CHECK-NEXT: nop // Delay Slot 3 ; CHECK-NEXT: nop // Delay Slot 2 diff --git a/llvm/test/CodeGen/AIE/aie2/vlda_ups.ll b/llvm/test/CodeGen/AIE/aie2/vlda_ups.ll index 81261ef824d8..eafcfda7df27 100644 --- a/llvm/test/CodeGen/AIE/aie2/vlda_ups.ll +++ b/llvm/test/CodeGen/AIE/aie2/vlda_ups.ll @@ -11,7 +11,7 @@ define dso_local noundef <8 x i64> @_Z5test0Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test0Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #128; nopx +; CHECK-NEXT: paddb [sp], #128; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-64] ; CHECK-NEXT: paddb [p0], #-64 @@ -19,8 +19,8 @@ define dso_local noundef <8 x i64> @_Z5test0Dv16_s(<16 x i16> noundef %arg0) loc ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-128 @@ -59,7 +59,7 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 define dso_local noundef <8 x i64> @_Z5test1Dv8_i(<8 x i32> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test1Dv8_i: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #128; nopx +; CHECK-NEXT: paddb [sp], #128; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-64] ; CHECK-NEXT: paddb [p0], #-64 @@ -67,8 +67,8 @@ define dso_local noundef <8 x i64> @_Z5test1Dv8_i(<8 x i32> noundef %arg0) local ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-128 @@ -101,7 +101,7 @@ entry: define dso_local noundef <16 x i64> @_Z5test2Dv32_a(<32 x i8> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test2Dv32_a: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #256; nopx +; CHECK-NEXT: paddb [sp], #256; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-128] ; CHECK-NEXT: paddb [p0], #-128 @@ -109,8 +109,8 @@ define dso_local noundef <16 x i64> @_Z5test2Dv32_a(<32 x i8> noundef %arg0) loc ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-256 @@ -147,7 +147,7 @@ entry: define dso_local noundef <8 x i64> @_Z5test3Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test3Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #128; nopx +; CHECK-NEXT: paddb [sp], #128; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-64] ; CHECK-NEXT: paddb [p0], #-64 @@ -155,8 +155,8 @@ define dso_local noundef <8 x i64> @_Z5test3Dv16_s(<16 x i16> noundef %arg0) loc ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-128 @@ -189,7 +189,7 @@ entry: define dso_local noundef <16 x i64> @_Z5test4Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test4Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #256; nopx +; CHECK-NEXT: paddb [sp], #256; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-128] ; CHECK-NEXT: paddb [p0], #-128 @@ -197,8 +197,8 @@ define dso_local noundef <16 x i64> @_Z5test4Dv16_s(<16 x i16> noundef %arg0) lo ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-256 @@ -235,7 +235,7 @@ entry: define dso_local noundef <16 x i64> @_Z5test5Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test5Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #256; nopx +; CHECK-NEXT: paddb [sp], #256; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-128] ; CHECK-NEXT: paddb [p0], #-128 @@ -243,8 +243,8 @@ define dso_local noundef <16 x i64> @_Z5test5Dv16_s(<16 x i16> noundef %arg0) lo ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-256 @@ -281,7 +281,7 @@ entry: define dso_local noundef <16 x i64> @_Z5test6Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test6Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #256; nopx +; CHECK-NEXT: paddb [sp], #256; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-128] ; CHECK-NEXT: paddb [p0], #-128 @@ -289,8 +289,8 @@ define dso_local noundef <16 x i64> @_Z5test6Dv16_s(<16 x i16> noundef %arg0) lo ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-256 @@ -327,7 +327,7 @@ entry: define dso_local noundef <8 x i64> @_Z5test7Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test7Dv16_s: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #128; nopx +; CHECK-NEXT: paddb [sp], #128; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-64] ; CHECK-NEXT: paddb [p0], #-64 @@ -335,8 +335,8 @@ define dso_local noundef <8 x i64> @_Z5test7Dv16_s(<16 x i16> noundef %arg0) loc ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-128 @@ -369,7 +369,7 @@ entry: define dso_local noundef <8 x i64> @_Z5test8Dv16_t(<16 x i16> noundef %arg0) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test8Dv16_t: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; paddb [sp], #128; nopx +; CHECK-NEXT: paddb [sp], #128; nopxm ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: vst wl0, [sp, #-64] ; CHECK-NEXT: paddb [p0], #-64 @@ -377,8 +377,8 @@ define dso_local noundef <8 x i64> @_Z5test8Dv16_t(<16 x i16> noundef %arg0) loc ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: mova r0, #4 -; CHECK-NEXT: mov s0, r0 +; CHECK-NEXT: nop +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: nop ; CHECK-NEXT: mov p0, sp ; CHECK-NEXT: paddb [p0], #-128 diff --git a/llvm/test/CodeGen/AIE/aie2/vst_srs.ll b/llvm/test/CodeGen/AIE/aie2/vst_srs.ll index 5e686843b51b..b9de44d4b3d1 100644 --- a/llvm/test/CodeGen/AIE/aie2/vst_srs.ll +++ b/llvm/test/CodeGen/AIE/aie2/vst_srs.ll @@ -4,7 +4,7 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates ; RUN: llc < %s --issue-limit=1 -verify-machineinstrs -mtriple=aie2 \ ; RUN: | FileCheck %s @@ -12,10 +12,9 @@ define dso_local noundef <16 x i16> @_Z5test0Dv16_u7__acc32(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test0Dv16_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d16.s32 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -47,10 +46,9 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 define dso_local noundef <8 x i32> @_Z5test1Dv8_u7__acc64(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test1Dv8_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d32.s64 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -76,10 +74,9 @@ entry: define dso_local noundef <16 x i16> @_Z5test2Dv16_u7__acc32(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test2Dv16_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s16.s32 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -105,10 +102,9 @@ entry: define dso_local noundef <8 x i32> @_Z5test3Dv8_u7__acc64(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test3Dv8_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s32.s64 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -134,10 +130,9 @@ entry: define dso_local noundef <16 x i16> @_Z5test4Dv16_u7__acc64(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test4Dv16_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s16.s64 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -163,10 +158,9 @@ entry: define dso_local noundef <32 x i8> @_Z5test5Dv32_u7__acc32(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test5Dv32_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s8.s32 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -192,10 +186,9 @@ entry: define dso_local noundef <16 x i16> @_Z5test6Dv16_u7__acc64(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test6Dv16_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d16.s64 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -221,10 +214,9 @@ entry: define dso_local noundef <32 x i8> @_Z5test7Dv32_u7__acc32(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test7Dv32_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d8.s32 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -250,10 +242,9 @@ entry: define dso_local noundef <16 x i16> @_Z5test8Dv16_u7__acc64(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test8Dv16_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s16.s64 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -279,10 +270,9 @@ entry: define dso_local noundef <32 x i8> @_Z5test9Dv32_u7__acc32(<16 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z5test9Dv32_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #2 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #2 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.s8.s32 cm0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -308,10 +298,9 @@ entry: define dso_local noundef <16 x i16> @_Z6test10Dv16_u7__acc32(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z6test10Dv16_u7__acc32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #4 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d16.s32 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -337,10 +326,9 @@ entry: define dso_local noundef <8 x i32> @_Z6test11Dv8_u7__acc64(<8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: _Z6test11Dv8_u7__acc64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: paddb [sp], #32 -; CHECK-NEXT: mova r0, #4 +; CHECK-NEXT: nopa ; paddb [sp], #32; nopx +; CHECK-NEXT: mov s0, #4 ; CHECK-NEXT: mov p0, sp -; CHECK-NEXT: mov s0, r0 ; CHECK-NEXT: paddb [p0], #-32 ; CHECK-NEXT: vst.srs.d32.s64 bml0, s0, [p0, #0] ; CHECK-NEXT: nop @@ -366,8 +354,7 @@ entry: define dso_local noundef <16 x i16> @test_postincrement(ptr %array, <8 x i64> noundef %acc) local_unnamed_addr #0 { ; CHECK-LABEL: test_postincrement: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopb ; mova r0, #2; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; mov s0, r0 +; CHECK-NEXT: nopa ; mov s0, #2 ; CHECK-NEXT: vst.srs.d16.s32 bml0, s0, [p0, #0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/AIE/aie2p/cnvf2f.ll b/llvm/test/CodeGen/AIE/aie2p/cnvf2f.ll index 56f4907749fa..da1c6a34a757 100644 --- a/llvm/test/CodeGen/AIE/aie2p/cnvf2f.ll +++ b/llvm/test/CodeGen/AIE/aie2p/cnvf2f.ll @@ -4,16 +4,16 @@ ; See https://llvm.org/LICENSE.txt for license information. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; -; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +; (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates ; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s define dso_local noundef float @test_fix2float(i32 noundef %n) { ; CHECK-LABEL: test_fix2float: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv -; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: mova r0, #0 // Delay Slot 4 -; CHECK-NEXT: mov s2, r0 // Delay Slot 3 +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov s2, #0 // Delay Slot 3 ; CHECK-NEXT: mov r0, r1.fx2flt, s2 // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -25,9 +25,9 @@ define dso_local noundef i32 @test_float2fix(float noundef %n) { ; CHECK-LABEL: test_float2fix: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv -; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: mova r0, #0 // Delay Slot 4 -; CHECK-NEXT: mov s3, r0 // Delay Slot 3 +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mov s3, #0 // Delay Slot 3 ; CHECK-NEXT: mov r0, r1.flt2fx, s3 // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: