Skip to content

Commit 6bef697

Browse files
committed
[AIEX] Split Constant and Load narrowing combiner
Now we have a dedicated Load narrowing combiner that also checks for multiple users.
1 parent e0aac4e commit 6bef697

4 files changed

Lines changed: 76 additions & 45 deletions

File tree

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,10 @@ def combine_narrow_phi_node_s20 : GICombineRule<
167167
[{ return matchNarrowPhi(*${root}, MRI, Helper, Observer, ${matchinfo}); }]),
168168
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
169169

170-
def combine_narrow_trunc_s20 : GICombineRule<
170+
def combine_narrow_trunc_s20_const : GICombineRule<
171171
(defs root:$root, build_fn_matchinfo:$matchinfo),
172172
(match (wip_match_opcode G_TRUNC): $root,
173-
[{ return matchNarrowTrunc(*${root}, MRI, Observer, ${matchinfo}); }]),
173+
[{ return matchNarrowTruncConstant(*${root}, MRI, Observer, ${matchinfo}); }]),
174174
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
175175

176176
def combine_narrow_zext_s20 : GICombineRule<
@@ -240,6 +240,12 @@ def combine_pack_stores_into_memset : GICombineRule<
240240
[{ return matchSequentialStores(cast<GStore>(*${root}), MRI, Observer, ${matchinfo}); }]),
241241
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
242242

243+
def combine_trunc_load : GICombineRule<
244+
(defs root:$root, build_fn_matchinfo:$matchinfo),
245+
(match (wip_match_opcode G_LOAD): $root,
246+
[{ return matchNarrowTruncLoad(*${root}, MRI, Helper, Observer, ${matchinfo}); }]),
247+
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
248+
243249
// AIE-specifc combines (currently shared by AIE2 and AIE2P).
244250
def aie_additional_combines : GICombineGroup<[
245251
combine_unpad_vector,
@@ -253,15 +259,16 @@ def aie_additional_combines : GICombineGroup<[
253259
combine_symmetric_build_vector,
254260
combine_broadcast_shl_to_add,
255261
combine_vector_shuffle_extract_subvec,
256-
combine_narrow_trunc_s20,
262+
combine_narrow_trunc_s20_const,
257263
combine_narrow_zext_s20,
258264
combine_concat_unmerge_phis,
259265
combine_trunc_ext,
260266
combine_load_const,
261267
combine_phi_undef,
262268
combine_align_memset,
263269
combine_peel_memset,
264-
combine_pack_stores_into_memset
270+
combine_pack_stores_into_memset,
271+
combine_trunc_load
265272
]>;
266273

267274
// AIE2P-specific combines.

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3417,10 +3417,10 @@ static void changeLoadStoreDataRegister(MachineInstr &MI, Register DataReg,
34173417
}
34183418

34193419
/// Narrow operations that are feeding truncations to s20.
3420-
/// Covers G_LOAD and G_CONSTANT.
3421-
bool llvm::matchNarrowTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
3422-
GISelChangeObserver &Observer,
3423-
BuildFnTy &MatchInfo) {
3420+
/// Covers G_CONSTANT.
3421+
bool llvm::matchNarrowTruncConstant(MachineInstr &MI, MachineRegisterInfo &MRI,
3422+
GISelChangeObserver &Observer,
3423+
BuildFnTy &MatchInfo) {
34243424

34253425
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
34263426

@@ -3431,38 +3431,59 @@ bool llvm::matchNarrowTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
34313431

34323432
MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg);
34333433

3434-
if (SrcMI.getOpcode() == TargetOpcode::G_CONSTANT) {
3435-
MatchInfo = [=, &MI, &SrcMI, &MRI, &Observer](MachineIRBuilder &B) {
3436-
auto NewConstant = B.buildConstant(
3437-
LLT::scalar(20),
3438-
*getIConstantVRegSExtVal(SrcMI.getOperand(0).getReg(), MRI));
3439-
Register FromReg = MI.getOperand(0).getReg();
3440-
Observer.changingAllUsesOfReg(MRI, FromReg);
3441-
MRI.replaceRegWith(FromReg, NewConstant->getOperand(0).getReg());
3442-
Observer.finishedChangingAllUsesOfReg();
3443-
MI.eraseFromParent();
3444-
};
3445-
return true;
3446-
}
3434+
if (SrcMI.getOpcode() != TargetOpcode::G_CONSTANT)
3435+
return false;
3436+
3437+
MatchInfo = [=, &MI, &SrcMI, &MRI, &Observer](MachineIRBuilder &B) {
3438+
auto NewConstant = B.buildConstant(
3439+
LLT::scalar(20),
3440+
*getIConstantVRegSExtVal(SrcMI.getOperand(0).getReg(), MRI));
3441+
Register FromReg = MI.getOperand(0).getReg();
3442+
Observer.changingAllUsesOfReg(MRI, FromReg);
3443+
MRI.replaceRegWith(FromReg, NewConstant->getOperand(0).getReg());
3444+
Observer.finishedChangingAllUsesOfReg();
3445+
MI.eraseFromParent();
3446+
};
3447+
3448+
return true;
3449+
}
3450+
3451+
/// Narrow operations that are feeding truncations to s20.
3452+
/// Covers G_LOAD.
3453+
bool llvm::matchNarrowTruncLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
3454+
CombinerHelper &Helper,
3455+
GISelChangeObserver &Observer,
3456+
BuildFnTy &MatchInfo) {
34473457

3448-
// Ideally, we could allow more users, provided that they are all TRUNCs.
3449-
// However, if we have more users, the live range of this register could
3450-
// spread through more blocks, and this could lead to more register pressure
3451-
// on s20 registers.
3452-
if (!MRI.hasOneNonDBGUse(SrcReg) || !isUsedByLikelyLegalS20User(MRI, MI))
3458+
assert(MI.getOpcode() == TargetOpcode::G_LOAD);
3459+
3460+
const LLT S20 = LLT::scalar(20);
3461+
auto IsProfitableTruncToS20 = [&](const MachineInstr &MaybeTruncMI) {
3462+
if (MaybeTruncMI.getOpcode() != TargetOpcode::G_TRUNC)
3463+
return false;
3464+
const Register DstReg = MaybeTruncMI.getOperand(0).getReg();
3465+
if (MRI.getType(DstReg) != S20)
3466+
return false;
3467+
return isUsedByLikelyLegalS20User(MRI, MaybeTruncMI);
3468+
};
3469+
3470+
// We should have a G_LOAD feeding interesting truncations.
3471+
const Register DstReg = MI.getOperand(0).getReg();
3472+
if (!all_of(MRI.use_instructions(DstReg), IsProfitableTruncToS20))
34533473
return false;
34543474

3455-
if (SrcMI.getOpcode() == TargetOpcode::G_LOAD) {
3456-
MatchInfo = [=, &MI, &SrcMI, &MRI, &Observer](MachineIRBuilder &B) {
3457-
Observer.changingInstr(SrcMI);
3458-
changeLoadStoreDataRegister(SrcMI, MI.getOperand(0).getReg(), MRI);
3459-
Observer.changedInstr(SrcMI);
3460-
MI.eraseFromParent();
3461-
};
3462-
return true;
3463-
}
3475+
MatchInfo = [=, &MI, &MRI, &Observer](MachineIRBuilder &B) {
3476+
const Register NewDstReg = MRI.createGenericVirtualRegister(S20);
3477+
Observer.changingInstr(MI);
3478+
changeLoadStoreDataRegister(MI, NewDstReg, MRI);
3479+
Observer.changedInstr(MI);
3480+
// Build Zext after the load, not before.
3481+
MachineBasicBlock &MBB = *MI.getParent();
3482+
B.setInsertPt(MBB, MI.getNextNode() ? MI.getNextNode() : MBB.end());
3483+
Observer.createdInstr(*B.buildZExt(DstReg, NewDstReg));
3484+
};
34643485

3465-
return false;
3486+
return true;
34663487
}
34673488

34683489
/// Narrow operations that are fed by zext from s20.

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,9 @@ bool matchNarrowPhi(MachineInstr &Phi, MachineRegisterInfo &MRI,
265265
CombinerHelper &Helper, GISelChangeObserver &Observer,
266266
BuildFnTy &MatchInfo);
267267

268-
bool matchNarrowTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
269-
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
268+
bool matchNarrowTruncConstant(MachineInstr &MI, MachineRegisterInfo &MRI,
269+
GISelChangeObserver &Observer,
270+
BuildFnTy &MatchInfo);
270271

271272
bool matchNarrowZext(MachineInstr &MI, MachineRegisterInfo &MRI,
272273
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
@@ -298,6 +299,10 @@ bool matchPeelMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
298299

299300
bool matchSequentialStores(GStore &MI, MachineRegisterInfo &MRI,
300301
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
302+
303+
bool matchNarrowTruncLoad(MachineInstr &Phi, MachineRegisterInfo &MRI,
304+
CombinerHelper &Helper, GISelChangeObserver &Observer,
305+
BuildFnTy &MatchInfo);
301306
} // namespace llvm
302307

303308
#endif

llvm/test/CodeGen/AIE/GlobalISel/combine-trunc-to-s20.mir

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ body: |
110110
; CHECK-NEXT: bb.2:
111111
; CHECK-NEXT: successors: %bb.3(0x80000000)
112112
; CHECK-NEXT: {{ $}}
113-
; CHECK-NEXT: %phi3:_(s20) = G_LOAD [[COPY1]](p0) :: (load (s20), align 4)
113+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s20) = G_LOAD [[COPY1]](p0) :: (load (s20), align 4)
114114
; CHECK-NEXT: {{ $}}
115115
; CHECK-NEXT: bb.3:
116-
; CHECK-NEXT: %phi:_(s20) = G_PHI %phi2(s20), %bb.1, %phi3(s20), %bb.2
116+
; CHECK-NEXT: %phi:_(s20) = G_PHI %phi2(s20), %bb.1, [[LOAD]](s20), %bb.2
117117
; CHECK-NEXT: PseudoRET implicit $lr, implicit %phi(s20)
118118
bb.1.entry:
119119
liveins: $r0, $r1, $p0
@@ -184,7 +184,7 @@ body: |
184184
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
185185
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
186186
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 0
187-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p0) :: (load (s32) from `ptr null`, align 32)
187+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s20) = G_LOAD [[C]](p0) :: (load (s20) from `ptr null`, align 32)
188188
; CHECK-NEXT: G_BRCOND [[TRUNC]](s1), %bb.3
189189
; CHECK-NEXT: G_BR %bb.2
190190
; CHECK-NEXT: {{ $}}
@@ -194,15 +194,13 @@ body: |
194194
; CHECK-NEXT: bb.2:
195195
; CHECK-NEXT: successors: %bb.1(0x80000000)
196196
; CHECK-NEXT: {{ $}}
197-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s20) = G_TRUNC [[LOAD]](s32)
198-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(<32 x s32>), [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s20), [[INT4:%[0-9]+]]:_(s20), [[INT5:%[0-9]+]]:_(<64 x s8>), [[INT6:%[0-9]+]]:_(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.pop.576.3d.bfp16), [[C1]](p0), [[DEF]](<32 x s32>), [[C2]](s32), [[C3]](s20), [[C3]](s20), [[TRUNC1]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20) :: (load unknown-size from `ptr addrspace(5) null`, align 1, addrspace 5)
197+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(<32 x s32>), [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s20), [[INT4:%[0-9]+]]:_(s20), [[INT5:%[0-9]+]]:_(<64 x s8>), [[INT6:%[0-9]+]]:_(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.pop.576.3d.bfp16), [[C1]](p0), [[DEF]](<32 x s32>), [[C2]](s32), [[C3]](s20), [[C3]](s20), [[LOAD]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20) :: (load unknown-size from `ptr addrspace(5) null`, align 1, addrspace 5)
199198
; CHECK-NEXT: G_BR %bb.1
200199
; CHECK-NEXT: {{ $}}
201200
; CHECK-NEXT: bb.3:
202201
; CHECK-NEXT: successors: %bb.1(0x80000000)
203202
; CHECK-NEXT: {{ $}}
204-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s20) = G_TRUNC [[LOAD]](s32)
205-
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(p0), [[INT8:%[0-9]+]]:_(<32 x s32>), [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s20), [[INT11:%[0-9]+]]:_(s20), [[INT12:%[0-9]+]]:_(<64 x s8>), [[INT13:%[0-9]+]]:_(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.pop.576.3d.bfp16), [[C1]](p0), [[DEF]](<32 x s32>), [[C2]](s32), [[C3]](s20), [[C3]](s20), [[TRUNC2]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20) :: (load unknown-size from `ptr addrspace(5) null`, align 1, addrspace 5)
203+
; CHECK-NEXT: [[INT7:%[0-9]+]]:_(p0), [[INT8:%[0-9]+]]:_(<32 x s32>), [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s20), [[INT11:%[0-9]+]]:_(s20), [[INT12:%[0-9]+]]:_(<64 x s8>), [[INT13:%[0-9]+]]:_(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.ld.pop.576.3d.bfp16), [[C1]](p0), [[DEF]](<32 x s32>), [[C2]](s32), [[C3]](s20), [[C3]](s20), [[LOAD]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20), [[C3]](s20) :: (load unknown-size from `ptr addrspace(5) null`, align 1, addrspace 5)
206204
; CHECK-NEXT: G_BR %bb.1
207205
bb.1:
208206
successors: %bb.4(0x40000000), %bb.3(0x40000000)

0 commit comments

Comments
 (0)