Skip to content

Commit 7507012

Browse files
authored
[AMDGPU][GISEL] Adding new reg bank select rules for G_DYN_STACKALLOC (llvm#200369)
Add register bank selection and legalization support for G_DYN_STACKALLOC in the new RegBankLegalize framework.
1 parent f3d5fda commit 7507012

8 files changed

Lines changed: 133 additions & 97 deletions

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "AMDGPURegisterBankInfo.h"
2020
#include "GCNSubtarget.h"
2121
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22+
#include "SIMachineFunctionInfo.h"
2223
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2324
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
2425
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -1304,6 +1305,61 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
13041305
}
13051306
return true;
13061307
}
1308+
case DynStackAlloc: {
1309+
const auto &TFI = *ST.getFrameLowering();
1310+
// Guard in case the stack growth direction ever changes with scratch
1311+
// instructions.
1312+
assert(TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp &&
1313+
"Stack grows upwards for AMDGPU");
1314+
1315+
Register Dst = MI.getOperand(0).getReg();
1316+
Register AllocSize = MI.getOperand(1).getReg();
1317+
Align Alignment = assumeAligned(MI.getOperand(2).getImm());
1318+
1319+
// Erase before building new instrs to avoid hitting multiple Dst assert
1320+
// with CSE.
1321+
B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
1322+
MI.eraseFromParent();
1323+
1324+
if (MRI.getRegBank(AllocSize) != SgprRB) {
1325+
auto WaveReduction =
1326+
B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {SgprRB_S32})
1327+
.addUse(AllocSize)
1328+
.addImm(0);
1329+
AllocSize = WaveReduction.getReg(0);
1330+
}
1331+
1332+
LLT PtrTy = MRI.getType(Dst);
1333+
assert(PtrTy.getSizeInBits() == 32 &&
1334+
"Expected 32-bit pointer for stack allocation");
1335+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1336+
Register SPReg = Info->getStackPtrOffsetReg();
1337+
1338+
// When using flat-scratch, the stack offset is unscaled.
1339+
const bool HasFlatScratch = ST.hasFlatScratchEnabled();
1340+
const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2();
1341+
1342+
Register AdjustedSize = AllocSize;
1343+
if (!HasFlatScratch) {
1344+
auto WaveSize = B.buildConstant(SgprRB_S32, WavefrontSizeLog2);
1345+
AdjustedSize = B.buildShl(SgprRB_S32, AllocSize, WaveSize).getReg(0);
1346+
}
1347+
if (Alignment > TFI.getStackAlign()) {
1348+
const uint64_t EffectiveAlignment =
1349+
Alignment.value() << (HasFlatScratch ? 0 : WavefrontSizeLog2);
1350+
auto OldSP = B.buildCopy({SgprRB, PtrTy}, SPReg);
1351+
auto Tmp1 =
1352+
B.buildPtrAdd({SgprRB, PtrTy}, OldSP,
1353+
B.buildConstant(SgprRB_S32, EffectiveAlignment - 1));
1354+
uint64_t Mask = maskTrailingZeros<uint64_t>(Log2_64(EffectiveAlignment));
1355+
B.buildPtrMask(Dst, Tmp1, B.buildConstant(SgprRB_S32, Mask));
1356+
} else {
1357+
B.buildCopy(Dst, SPReg);
1358+
}
1359+
auto PtrAdd = B.buildPtrAdd({SgprRB, PtrTy}, Dst, AdjustedSize);
1360+
B.buildCopy(SPReg, PtrAdd);
1361+
return true;
1362+
}
13071363
case WidenLoad: {
13081364
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
13091365
if (DstTy == S96)

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
13331333
.Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
13341334
.Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
13351335

1336+
addRulesForGOpcs({G_DYN_STACKALLOC})
1337+
.Any({{UniP5, UniS32}, {{SgprP5}, {Sgpr32}, DynStackAlloc}})
1338+
.Any({{UniP5, DivS32}, {{SgprP5}, {Vgpr32}, DynStackAlloc}});
1339+
13361340
addRulesForGOpcs({G_ABS}, Standard)
13371341
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}})
13381342
.Div(S16, {{Vgpr16}, {Vgpr16}, AbsToNegMax})

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,8 @@ enum LoweringMethodID {
339339
InsVecEltToSel,
340340
InsVecEltTo32,
341341
AbsToNegMax,
342-
AbsToS32
342+
AbsToS32,
343+
DynStackAlloc
343344
};
344345

345346
enum FastRulesTypes {

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s
55

66
@gv = external addrspace(4) constant i32
77

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,DEFAULTSIZE %s
3-
; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=GCN,ASSUME1024 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,DEFAULTSIZE %s
3+
; RUN: llc -global-isel -new-reg-bank-select -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=GCN,ASSUME1024 %s
44

55
; FIXME: Generated test checks do not check metadata at the end of the
66
; function, so this also includes manually added checks.

0 commit comments

Comments
 (0)