Skip to content

Commit 17b26e2

Browse files
[AIE2][AIE2P] Support 128-bit G_BUILD_VECTOR combines
1 parent 97a83fc commit 17b26e2

File tree

3 files changed

+159
-18
lines changed

3 files changed

+159
-18
lines changed

llvm/lib/Target/AIE/AIECombinerHelper.cpp

+20-18
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,7 @@ bool llvm::matchSplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
11721172
const unsigned DstVecSize = DstVecTy.getSizeInBits();
11731173

11741174
switch (DstVecSize) {
1175+
case 128:
11751176
case 256:
11761177
case 512:
11771178
case 1024:
@@ -1213,6 +1214,20 @@ bool llvm::matchSplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
12131214
return true;
12141215
}
12151216

1217+
static void buildUnmergeVector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
1218+
Register DstReg, Register SrcReg,
1219+
unsigned NumSubVectors, unsigned SubIdx) {
1220+
const LLT DstTy = MRI.getType(DstReg);
1221+
SmallVector<Register, 4> SubVecs;
1222+
for (unsigned I = 0; I < NumSubVectors; I++) {
1223+
if (I == SubIdx)
1224+
SubVecs.push_back(DstReg);
1225+
else
1226+
SubVecs.push_back(MRI.createGenericVirtualRegister(DstTy));
1227+
}
1228+
B.buildUnmerge(SubVecs, SrcReg);
1229+
}
1230+
12161231
static void buildBroadcastVector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
12171232
Register SrcReg, Register DstVecReg) {
12181233
const AIEBaseInstrInfo &AIETII = (const AIEBaseInstrInfo &)B.getTII();
@@ -1257,10 +1272,10 @@ static void buildBroadcastVector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
12571272
// Build the G_AIE_BROADCAST_VECTOR instruction for the 512-bit vector.
12581273
B.buildInstr(AIETII.getGenericBroadcastVectorOpcode(), {DstVec512BitReg},
12591274
{SrcReg});
1260-
if (DstVecSize == 256) {
1261-
const Register UnusedSubReg = MRI.createGenericVirtualRegister(DstVecTy);
1262-
// Unmerge the 512-bit vector into the 256-bit destination vector.
1263-
B.buildUnmerge({DstVecReg, UnusedSubReg}, DstVec512BitReg);
1275+
if (DstVecSize == 128 || DstVecSize == 256) {
1276+
const unsigned NumSubVectors = 512 / DstVecSize;
1277+
// Unmerge the 512-bit vector into the 128/256-bit destination vector.
1278+
buildUnmergeVector(B, MRI, DstVecReg, DstVec512BitReg, NumSubVectors, 0);
12641279
} else if (DstVecSize == 1024) {
12651280
// Concatenate two 512-bit vectors to form a 1024-bit destination vector.
12661281
B.buildConcatVectors({DstVecReg}, {DstVec512BitReg, DstVec512BitReg});
@@ -1299,6 +1314,7 @@ bool llvm::matchSingleDiffLaneBuildVector(
12991314
const unsigned DstVecSize = DstVecTy.getSizeInBits();
13001315

13011316
switch (DstVecSize) {
1317+
case 128:
13021318
case 256:
13031319
case 512:
13041320
case 1024:
@@ -1961,20 +1977,6 @@ bool llvm::matchBroadcastElement(MachineInstr &MI, MachineRegisterInfo &MRI,
19611977
return true;
19621978
}
19631979

1964-
static void buildUnmergeVector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
1965-
Register DstReg, Register SrcReg,
1966-
unsigned NumSubVectors, unsigned SubIdx) {
1967-
const LLT DstTy = MRI.getType(DstReg);
1968-
SmallVector<Register, 4> SubVecs;
1969-
for (unsigned I = 0; I < NumSubVectors; I++) {
1970-
if (I == SubIdx)
1971-
SubVecs.push_back(DstReg);
1972-
else
1973-
SubVecs.push_back(MRI.createGenericVirtualRegister(DstTy));
1974-
}
1975-
B.buildUnmerge(SubVecs, SrcReg);
1976-
}
1977-
19781980
/// \returns true if it is possible to combine the shuffle vector to VSEL.
19791981
/// E.g.:
19801982
/// From : %0:_(<16 x s32>) = COPY $x0

llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-single-diff-build-vec.mir

+60
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,66 @@
88
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
99
# RUN: llc -mtriple aie2p -run-pass=aie2p-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
1010

11+
---
12+
name: test_build_vector_128_8bit_scl
13+
body: |
14+
bb.1.entry:
15+
; CHECK-LABEL: name: test_build_vector_128_8bit_scl
16+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
17+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
18+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
19+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<64 x s8>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
20+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<64 x s8>)
21+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
22+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x s8>) = G_INSERT_VECTOR_ELT [[AIE_UNPAD_VECTOR]], [[TRUNC]](s8), [[C]](s32)
23+
; CHECK-NEXT: $q0 = COPY [[IVEC]](<16 x s8>)
24+
%0:_(s32) = COPY $r0
25+
%1:_(s8) = G_TRUNC %0:_(s32)
26+
%2:_(s32) = COPY $r1
27+
%3:_(s8) = G_TRUNC %2:_(s32)
28+
%4:_(<16 x s8>) = G_BUILD_VECTOR %1:_(s8), %1:_(s8), %3:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8)
29+
$q0 = COPY %4:_(<16 x s8>)
30+
...
31+
32+
---
33+
name: test_build_vector_128_16bit_scl
34+
body: |
35+
bb.1.entry:
36+
; CHECK-LABEL: name: test_build_vector_128_16bit_scl
37+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
38+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
39+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
40+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
41+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<32 x s16>)
42+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
43+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<8 x s16>) = G_INSERT_VECTOR_ELT [[AIE_UNPAD_VECTOR]], [[TRUNC]](s16), [[C]](s32)
44+
; CHECK-NEXT: $q0 = COPY [[IVEC]](<8 x s16>)
45+
%0:_(s32) = COPY $r0
46+
%1:_(s16) = G_TRUNC %0:_(s32)
47+
%2:_(s32) = COPY $r1
48+
%3:_(s16) = G_TRUNC %2:_(s32)
49+
%4:_(<8 x s16>) = G_BUILD_VECTOR %3:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16)
50+
$q0 = COPY %4:_(<8 x s16>)
51+
...
52+
53+
---
54+
name: test_build_vector_128_32bit_scl
55+
body: |
56+
bb.1.entry:
57+
; CHECK-LABEL: name: test_build_vector_128_32bit_scl
58+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
59+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
60+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
61+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
62+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
63+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[AIE_UNPAD_VECTOR]], [[COPY1]](s32), [[C]](s32)
64+
; CHECK-NEXT: $q0 = COPY [[IVEC]](<4 x s32>)
65+
%1:_(s32) = COPY $r0
66+
%2:_(s32) = COPY $r1
67+
%3:_(<4 x s32>) = G_BUILD_VECTOR %1:_(s32), %2:_(s32), %1:_(s32), %1:_(s32)
68+
$q0 = COPY %3:_(<4 x s32>)
69+
...
70+
1171
---
1272
name: test_build_vector_256_8bit_scl
1373
body: |

llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-splat-vector.mir

+79
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,85 @@
88
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
99
# RUN: llc -mtriple aie2p -run-pass=aie2p-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
1010

11+
---
12+
name: test_build_vector_128_8bit_scl
13+
body: |
14+
bb.1.entry:
15+
; CHECK-LABEL: name: test_build_vector_128_8bit_scl
16+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
17+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<64 x s8>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
18+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<64 x s8>)
19+
; CHECK-NEXT: $q0 = COPY [[AIE_UNPAD_VECTOR]](<16 x s8>)
20+
%0:_(s32) = COPY $r0
21+
%1:_(s8) = G_TRUNC %0:_(s32)
22+
%2:_(<16 x s8>) = G_BUILD_VECTOR %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8), %1:_(s8)
23+
$q0 = COPY %2:_(<16 x s8>)
24+
...
25+
26+
---
27+
name: test_build_vector_128_16bit_scl
28+
body: |
29+
bb.1.entry:
30+
; CHECK-LABEL: name: test_build_vector_128_16bit_scl
31+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
32+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<32 x s16>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
33+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<32 x s16>)
34+
; CHECK-NEXT: $q0 = COPY [[AIE_UNPAD_VECTOR]](<8 x s16>)
35+
%0:_(s32) = COPY $r0
36+
%1:_(s16) = G_TRUNC %0:_(s32)
37+
%2:_(<8 x s16>) = G_BUILD_VECTOR %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16), %1:_(s16)
38+
$q0 = COPY %2:_(<8 x s16>)
39+
...
40+
41+
---
42+
name: test_build_vector_128_32bit_scl
43+
body: |
44+
bb.1.entry:
45+
; CHECK-LABEL: name: test_build_vector_128_32bit_scl
46+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
47+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32)
48+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
49+
; CHECK-NEXT: $q0 = COPY [[AIE_UNPAD_VECTOR]](<4 x s32>)
50+
%1:_(s32) = COPY $r0
51+
%2:_(<4 x s32>) = G_BUILD_VECTOR %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32)
52+
$q0 = COPY %2:_(<4 x s32>)
53+
...
54+
55+
---
56+
name: test_build_vector_128_64bit_scl
57+
body: |
58+
bb.1.entry:
59+
; CHECK-LABEL: name: test_build_vector_128_64bit_scl
60+
; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
61+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $l0
62+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64)
63+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_AIE_BROADCAST_VECTOR [[BITCAST]](<2 x s32>)
64+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<8 x s64>)
65+
; CHECK-NEXT: G_STORE [[AIE_UNPAD_VECTOR]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>))
66+
%0:_(p0) = G_IMPLICIT_DEF
67+
%1:_(s64) = COPY $l0
68+
%2:_(<2 x s64>) = G_BUILD_VECTOR %1:_(s64), %1:_(s64)
69+
G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>))
70+
...
71+
72+
# Invalid Vector for broadcast.
73+
# As two of the elements in G_BUILD_VECTOR are different from others,
74+
# ideally this should not be converted into G_AIE_BROADCAST_VECTOR
75+
---
76+
name: test_build_vector_128_32bit_scl_invalid
77+
body: |
78+
bb.1.entry:
79+
; CHECK-LABEL: name: test_build_vector_128_32bit_scl_invalid
80+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
81+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
82+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32)
83+
; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
84+
%1:_(s32) = COPY $r0
85+
%2:_(s32) = COPY $r1
86+
%3:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %1:_(s32), %1:_(s32), %2:_(s32)
87+
$q0 = COPY %3:_(<4 x s32>)
88+
...
89+
1190
---
1291
name: test_build_vector_256_8bit_scl
1392
body: |

0 commit comments

Comments
 (0)