Skip to content

Commit c9161b7

Browse files
Combine vextract and vbroadcast into VEXTBCST
1 parent 86c577a commit c9161b7

File tree

2 files changed

+157
-0
lines changed

2 files changed

+157
-0
lines changed

llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp

+56
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
7575

7676
bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const;
7777

78+
bool tryToCombineExtBcst(MachineInstr &MI) const;
79+
7880
bool tryToCombineIntrinsic(MachineInstr &MI) const;
7981

8082
private:
@@ -243,6 +245,54 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts(
243245
return true;
244246
}
245247

248+
// Combines vextract and vbroadcast into vextract_broadcast
249+
bool AIE2PreLegalizerCombinerImpl::tryToCombineExtBcst(MachineInstr &MI) const {
250+
// Returns the combined intrinsicID for matching broadcast and extract ops
251+
auto getExtBcstIntrinsicID = [](unsigned BcastID,
252+
unsigned ExtID) -> std::optional<unsigned> {
253+
switch (BcastID) {
254+
case Intrinsic::aie2_vbroadcast8_I512:
255+
if (ExtID == Intrinsic::aie2_vextract_elem8_I512)
256+
return Intrinsic::aie2_vextract_broadcast8_I512;
257+
break;
258+
case Intrinsic::aie2_vbroadcast16_I512:
259+
if (ExtID == Intrinsic::aie2_vextract_elem16_I512)
260+
return Intrinsic::aie2_vextract_broadcast16_I512;
261+
break;
262+
case Intrinsic::aie2_vbroadcast32_I512:
263+
if (ExtID == Intrinsic::aie2_vextract_elem32_I512)
264+
return Intrinsic::aie2_vextract_broadcast32_I512;
265+
break;
266+
}
267+
return std::nullopt;
268+
};
269+
assert(isa<GIntrinsic>(MI) && "this combine only supports instrinsics");
270+
const Register DstReg = MI.getOperand(0).getReg();
271+
MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
272+
if (!isa<GIntrinsic>(*ExtMI))
273+
return false;
274+
// Checks for single use of extracted element
275+
if (!MRI.hasOneNonDBGUse(ExtMI->getOperand(0).getReg()))
276+
return false;
277+
278+
const unsigned BcstID = cast<GIntrinsic>(MI).getIntrinsicID();
279+
const unsigned ExtID = cast<GIntrinsic>(*ExtMI).getIntrinsicID();
280+
const std::optional<unsigned> ExtBcstIntrinsicID =
281+
getExtBcstIntrinsicID(BcstID, ExtID);
282+
if (!ExtBcstIntrinsicID)
283+
return false;
284+
285+
const Register SrcReg = ExtMI->getOperand(2).getReg();
286+
const Register IdxReg = ExtMI->getOperand(3).getReg();
287+
MachineIRBuilder MIRBuilder(MI);
288+
MIRBuilder.buildIntrinsic(*ExtBcstIntrinsicID, DstReg, false, false)
289+
.addUse(SrcReg)
290+
.addUse(IdxReg);
291+
MI.eraseFromParent();
292+
293+
return true;
294+
}
295+
246296
bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
247297
MachineInstr &MI) const {
248298
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
@@ -262,6 +312,12 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
262312
case Intrinsic::aie2_vinsert32_I512: {
263313
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
264314
}
315+
case Intrinsic::aie2_vbroadcast8_I512:
316+
case Intrinsic::aie2_vbroadcast16_I512:
317+
case Intrinsic::aie2_vbroadcast32_I512:
318+
case Intrinsic::aie2_vbroadcast64_I512: {
319+
return tryToCombineExtBcst(MI);
320+
}
265321
default:
266322
break;
267323
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
9+
10+
---
11+
name: vextract_broadcast8
12+
body: |
13+
bb.1.entry:
14+
; CHECK-LABEL: name: vextract_broadcast8
15+
; CHECK: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x2
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
17+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<64 x s8>) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.broadcast8.I512), [[COPY]](<64 x s8>), [[COPY1]](s32)
18+
; CHECK-NEXT: $x0 = COPY [[INT]](<64 x s8>)
19+
%1:_(<64 x s8>) = COPY $x2
20+
%2:_(s32) = COPY $r0
21+
%4:_(s32) = G_CONSTANT i32 1
22+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %1(<64 x s8>), %2(s32), %4(s32)
23+
%0:_(<64 x s8>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast8.I512), %3(s32)
24+
$x0 = COPY %0(<64 x s8>)
25+
...
26+
27+
---
28+
name: vextract_broadcast16
29+
body: |
30+
bb.1.entry:
31+
; CHECK-LABEL: name: vextract_broadcast16
32+
; CHECK: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x2
33+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
34+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.broadcast16.I512), [[COPY]](<32 x s16>), [[COPY1]](s32)
35+
; CHECK-NEXT: $x0 = COPY [[INT]](<32 x s16>)
36+
%1:_(<32 x s16>) = COPY $x2
37+
%2:_(s32) = COPY $r0
38+
%4:_(s32) = G_CONSTANT i32 1
39+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %1(<32 x s16>), %2(s32), %4(s32)
40+
%0:_(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast16.I512), %3(s32)
41+
$x0 = COPY %0(<32 x s16>)
42+
...
43+
44+
---
45+
name: vextract_broadcast32
46+
body: |
47+
bb.1.entry:
48+
; CHECK-LABEL: name: vextract_broadcast32
49+
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2
50+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
51+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.broadcast32.I512), [[COPY]](<16 x s32>), [[COPY1]](s32)
52+
; CHECK-NEXT: $x0 = COPY [[INT]](<16 x s32>)
53+
%1:_(<16 x s32>) = COPY $x2
54+
%2:_(s32) = COPY $r0
55+
%4:_(s32) = G_CONSTANT i32 1
56+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %1(<16 x s32>), %2(s32), %4(s32)
57+
%0:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast32.I512), %3(s32)
58+
$x0 = COPY %0(<16 x s32>)
59+
...
60+
61+
# Negative test case: Cannot be combined into vextract_broadcast due to type mismatch
62+
---
63+
name: vextract_broadcast_type_mismatch
64+
body: |
65+
bb.1.entry:
66+
; CHECK-LABEL: name: vextract_broadcast_type_mismatch
67+
; CHECK: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x2
68+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
69+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
70+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[COPY1]](s32), [[C]](s32)
71+
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast32.I512), [[INT]](s32)
72+
; CHECK-NEXT: $x0 = COPY [[INT1]](<16 x s32>)
73+
%1:_(<32 x s16>) = COPY $x2
74+
%2:_(s32) = COPY $r0
75+
%4:_(s32) = G_CONSTANT i32 1
76+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %1(<32 x s16>), %2(s32), %4(s32)
77+
%0:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast32.I512), %3(s32)
78+
$x0 = COPY %0(<16 x s32>)
79+
...
80+
81+
# Negative test case: Cannot be combined into vextract_broadcast because extracted element has been used by instruction other than broadcast
82+
---
83+
name: vextract_broadcast_multiuse
84+
body: |
85+
bb.1.entry:
86+
; CHECK-LABEL: name: vextract_broadcast_multiuse
87+
; CHECK: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x2
88+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
89+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
90+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[COPY1]](s32), [[C]](s32)
91+
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast16.I512), [[INT]](s32)
92+
; CHECK-NEXT: $x0 = COPY [[INT1]](<32 x s16>)
93+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
94+
%1:_(<32 x s16>) = COPY $x2
95+
%2:_(s32) = COPY $r0
96+
%4:_(s32) = G_CONSTANT i32 1
97+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %1(<32 x s16>), %2(s32), %4(s32)
98+
%0:_(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.vbroadcast16.I512), %3(s32)
99+
$x0 = COPY %0(<32 x s16>)
100+
PseudoRET implicit $lr, implicit %3
101+
...

0 commit comments

Comments
 (0)