Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand All @@ -32,6 +33,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include <limits>
Expand All @@ -40,6 +42,20 @@

using namespace llvm;

STATISTIC(NumFoldImmAttempts, "Number of foldImmediate attempts (AIE)");
STATISTIC(NumFoldImmBlockedMultiUse,
"foldImmediate calls blocked by hasOneNonDBGUse mitigation");
STATISTIC(NumFoldImmSuccesses, "Number of foldImmediate successes (AIE)");
static cl::opt<bool> AIEFoldImmRequireOneUse(
"aie-fold-imm-require-one-use", cl::init(true), cl::Hidden,
cl::desc("Only fold immediate into COPY when the constant has a single "
"non-debug use (so the def can be DCE'd)."));

static cl::opt<bool> AIEDisableFoldImm(
"aie-disable-fold-imm", cl::init(false), cl::Hidden,
cl::desc("Completely disable the AIE foldImmediate override (fall back to "
"default no-op behaviour)."));

static cl::opt<bool>
NoCheapInstHoisting("aie-no-cheap-inst-hoising",
cl::desc("Disable hoisting of cheap instructions"),
Expand Down Expand Up @@ -539,6 +555,65 @@ unsigned AIEBaseInstrInfo::getAIEMachineBundleSize(
return 0;
}

// TODO: implement folding for opcodes other than COPY
bool AIEBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg,
MachineRegisterInfo *MRI) const {

if (AIEDisableFoldImm)
return false;

// Only handle COPY instructions as the use
if (!UseMI.isCopy())
return false;

// Check if DefMI is a move-immediate instruction
if (!isIConst(DefMI.getOpcode()))
return false;

// Get the immediate value from DefMI
// Move-immediate instructions have the format: Dst = MOV Imm
// The immediate is typically in operand 1
if (DefMI.getNumOperands() < 2 || !DefMI.getOperand(1).isImm())
return false;

int64_t ImmVal = DefMI.getOperand(1).getImm();

// Get the destination register of the COPY
Register DstReg = UseMI.getOperand(0).getReg();

// Only handle virtual registers - physical registers are more complex
if (!DstReg.isVirtual())
return false;

++NumFoldImmAttempts;

// Only fold when the constant has a single non-debug use.
// The TargetInstrInfo::foldImmediate contract lets the caller (PeepholeOpt)
// erase DefMI when hasOneNonDBGUse(Reg) holds; without this guard we leave
// DefMI alive for other consumers and end up materializing the constant
// twice, inflating register pressure.
if (AIEFoldImmRequireOneUse && !MRI->hasOneNonDBGUse(Reg)) {
++NumFoldImmBlockedMultiUse;
return false;
}

// Get the appropriate move-immediate opcode for the destination register
APInt ImmAPInt(32, ImmVal, /*isSigned=*/true);
unsigned NewOpc = getConstantMovOpcode(*MRI, DstReg, ImmAPInt);

// Build the new move-immediate instruction
MachineBasicBlock &MBB = *UseMI.getParent();
const DebugLoc &DL = UseMI.getDebugLoc();
BuildMI(MBB, UseMI, DL, get(NewOpc), DstReg).addImm(ImmAPInt.getSExtValue());

// Remove the old COPY
UseMI.eraseFromParent();

++NumFoldImmSuccesses;
return true;
}

unsigned
AIEBaseInstrInfo::getMBBSizeInBytes(const MachineBasicBlock &MBB) const {
unsigned Size = 0;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,11 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
const MachineInstr &UseMI,
unsigned UseIdx) const override;

/// Try to fold an immediate from DefMI into UseMI.
/// When DefMI is a move-immediate and UseMI is a COPY, replace the COPY
/// with a move-immediate to the destination register directly.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
// Check if the MII points to a BUNDLE which contains a call instruction
bool isCallBundle(MachineBasicBlock::iterator MII) const;
// Check if the MII points to a BUNDLE which contains an instruction
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AIE/aie2/GlobalISel/bfloat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates
; RUN: llc -O2 -mtriple=aie2 --issue-limit=1 %s -o - | FileCheck %s
%class.bfloat16 = type { bfloat }

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AIE/aie2/fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates
;
; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AIE/aie2/float_to_bfloat16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates
; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s

define bfloat @float_to_bf16_test(float %v) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AIE/aie2/fsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
; (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates
;
; RUN: llc -O2 -mtriple=aie2 %s -o - | FileCheck %s

Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/live-reserved-regs-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
; (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
;
; RUN: llc -mtriple=aie2 -O2 --issue-limit=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s

Expand All @@ -29,9 +29,8 @@ entry:
define void @callee1() {
; CHECK-LABEL: callee1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mova r0, #1; nopb ; nopxm ; nops
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov s0, r0 // Delay Slot 5
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nopa ; mov s0, #1 // Delay Slot 5
; CHECK-NEXT: vsrs.d8.s32 wh0, cm0, s0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AIE/aie2/vlda_ups.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
define dso_local noundef <8 x i64> @_Z5test0Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test0Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
; CHECK-NEXT: paddb [sp], #128; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-64]
; CHECK-NEXT: paddb [p0], #-64
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-128
Expand Down Expand Up @@ -59,16 +59,16 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
define dso_local noundef <8 x i64> @_Z5test1Dv8_i(<8 x i32> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test1Dv8_i:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
; CHECK-NEXT: paddb [sp], #128; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-64]
; CHECK-NEXT: paddb [p0], #-64
; CHECK-NEXT: vlda.ups.s64.d32 bmh0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-128
Expand Down Expand Up @@ -101,16 +101,16 @@ entry:
define dso_local noundef <16 x i64> @_Z5test2Dv32_a(<32 x i8> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test2Dv32_a:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
; CHECK-NEXT: paddb [sp], #256; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-128]
; CHECK-NEXT: paddb [p0], #-128
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-256
Expand Down Expand Up @@ -147,16 +147,16 @@ entry:
define dso_local noundef <8 x i64> @_Z5test3Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test3Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
; CHECK-NEXT: paddb [sp], #128; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-64]
; CHECK-NEXT: paddb [p0], #-64
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-128
Expand Down Expand Up @@ -189,16 +189,16 @@ entry:
define dso_local noundef <16 x i64> @_Z5test4Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test4Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
; CHECK-NEXT: paddb [sp], #256; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-128]
; CHECK-NEXT: paddb [p0], #-128
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-256
Expand Down Expand Up @@ -235,16 +235,16 @@ entry:
define dso_local noundef <16 x i64> @_Z5test5Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test5Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
; CHECK-NEXT: paddb [sp], #256; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-128]
; CHECK-NEXT: paddb [p0], #-128
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-256
Expand Down Expand Up @@ -281,16 +281,16 @@ entry:
define dso_local noundef <16 x i64> @_Z5test6Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test6Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #256; nopx
; CHECK-NEXT: paddb [sp], #256; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-128]
; CHECK-NEXT: paddb [p0], #-128
; CHECK-NEXT: vlda.ups.s64.s16 cm0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-256
Expand Down Expand Up @@ -327,16 +327,16 @@ entry:
define dso_local noundef <8 x i64> @_Z5test7Dv16_s(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test7Dv16_s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
; CHECK-NEXT: paddb [sp], #128; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-64]
; CHECK-NEXT: paddb [p0], #-64
; CHECK-NEXT: vlda.ups.s32.s16 bmh0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-128
Expand Down Expand Up @@ -369,16 +369,16 @@ entry:
define dso_local noundef <8 x i64> @_Z5test8Dv16_t(<16 x i16> noundef %arg0) local_unnamed_addr #0 {
; CHECK-LABEL: _Z5test8Dv16_t:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: nopa ; paddb [sp], #128; nopx
; CHECK-NEXT: paddb [sp], #128; nopxm
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: vst wl0, [sp, #-64]
; CHECK-NEXT: paddb [p0], #-64
; CHECK-NEXT: vlda.ups.s32.d16 bmh0, s0, [p0, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mova r0, #4
; CHECK-NEXT: mov s0, r0
; CHECK-NEXT: nop
; CHECK-NEXT: mov s0, #4
; CHECK-NEXT: nop
; CHECK-NEXT: mov p0, sp
; CHECK-NEXT: paddb [p0], #-128
Expand Down
Loading
Loading