Skip to content

Commit 9e257cd

Browse files
[AIE2][AIE2P] Allow single splat in symmetric build vector combine
1 parent 9dc50cc commit 9e257cd

6 files changed

+179
-87
lines changed

Diff for: llvm/lib/Target/AIE/AIECombinerHelper.cpp

+50-50
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,40 @@ void llvm::applyExtractVecEltAndExt(
12281228
MatchMI->eraseFromParent();
12291229
}
12301230

1231+
static std::optional<Register>
1232+
getSplatVectorSrcReg(const MachineInstr &MI, const MachineRegisterInfo &MRI,
1233+
std::pair<unsigned, unsigned> Range) {
1234+
auto IsUndef = [&](const MachineOperand &Op) {
1235+
const MachineInstr *Undef = MRI.getVRegDef(Op.getReg());
1236+
return Undef && Undef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
1237+
};
1238+
const unsigned Start = Range.first;
1239+
const unsigned End = Range.second;
1240+
// First non-undef operand.
1241+
Register SrcReg = 0;
1242+
bool FoundSrc = false;
1243+
bool AllUndef = true;
1244+
1245+
// Find the first non-undef operand as the reference.
1246+
for (unsigned I = Start; I < End; I++) {
1247+
const MachineOperand &Op = MI.getOperand(I);
1248+
if (!IsUndef(Op)) {
1249+
if (!FoundSrc) {
1250+
SrcReg = Op.getReg();
1251+
FoundSrc = true;
1252+
} else if (Op.getReg() != SrcReg) {
1253+
return std::nullopt;
1254+
}
1255+
AllUndef = false;
1256+
}
1257+
}
1258+
1259+
if (AllUndef)
1260+
SrcReg = MI.getOperand(1).getReg();
1261+
1262+
return SrcReg;
1263+
}
1264+
12311265
// Match something like:
12321266
// %0:_(<32 x s16>) = G_BUILD_VECTOR %1:_(s16), ... x32
12331267
//
@@ -1255,34 +1289,12 @@ bool llvm::matchSplatVector(MachineInstr &MI, MachineRegisterInfo &MRI,
12551289
return false;
12561290
}
12571291

1258-
auto IsUndef = [&](const MachineOperand &Op) {
1259-
const MachineInstr *Undef = MRI.getVRegDef(Op.getReg());
1260-
return Undef && Undef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
1261-
};
12621292
const unsigned NumOps = MI.getNumOperands();
1263-
// First non-undef operand.
1264-
unsigned SrcReg = 0;
1265-
bool FoundSrc = false;
1266-
bool AllUndef = true;
1267-
1268-
// Find the first non-undef operand as the reference.
1269-
for (unsigned I = 1; I < NumOps; I++) {
1270-
const MachineOperand &Op = MI.getOperand(I);
1271-
if (!IsUndef(Op)) {
1272-
if (!FoundSrc) {
1273-
SrcReg = Op.getReg();
1274-
FoundSrc = true;
1275-
} else if (Op.getReg() != SrcReg) {
1276-
return false;
1277-
}
1278-
AllUndef = false;
1279-
}
1280-
}
1281-
1282-
if (AllUndef)
1283-
SrcReg = MI.getOperand(1).getReg();
1293+
auto SrcReg = getSplatVectorSrcReg(MI, MRI, std::make_pair(1, NumOps));
1294+
if (!SrcReg)
1295+
return false;
12841296

1285-
MatchInfo = {DstVecReg, SrcReg};
1297+
MatchInfo = {DstVecReg, *SrcReg};
12861298
return true;
12871299
}
12881300

@@ -1466,6 +1478,7 @@ bool llvm::applySingleDiffLaneBuildVector(
14661478
// %0:_(<32 x s16>) = G_CONCAT_VECTORS %3:_(<16 x s16>), %4:_(<16 x s16>)
14671479
// These sub-G_BUILD_VECTOR instructions may later be combined into broadcast
14681480
// instructions by combine_splat_vector.
1481+
// TODO: Remove the original splat vector match and implement the same here.
14691482
bool llvm::matchSymmetricBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
14701483
GISelChangeObserver &Observer,
14711484
BuildFnTy &MatchInfo) {
@@ -1487,30 +1500,16 @@ bool llvm::matchSymmetricBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
14871500
return false;
14881501
}
14891502

1490-
auto getSrcOperand = [&](unsigned I) { return MI.getOperand(I + 1); };
1491-
1492-
const unsigned NumElts = MI.getNumOperands() - 1;
1493-
const unsigned HalfNumElts = NumElts / 2;
1494-
const MachineOperand FirstOp = getSrcOperand(0);
1495-
const MachineOperand SecondOp = getSrcOperand(HalfNumElts);
1496-
1497-
// Ensures that each operand in the first half matches FirstOp, and each
1498-
// operand in the second half matches SecondOp.
1499-
for (unsigned i = 0; i < HalfNumElts; i++) {
1500-
if (!getSrcOperand(i).isIdenticalTo(FirstOp)) {
1501-
return false;
1502-
}
1503-
if (!getSrcOperand(HalfNumElts + i).isIdenticalTo(SecondOp)) {
1504-
return false;
1505-
}
1506-
}
1507-
1508-
// If both halves are the same register, it's effectively a splat, and the
1509-
// splat vector combine already handles that case.
1510-
if (FirstOp.isIdenticalTo(SecondOp))
1511-
return false;
1503+
// TODO: Split the G_BUILD_VECTOR either into 3/4 and 1/4 parts,
1504+
// or 1/4 and 3/4 parts, and then check if any part qualifies as a splat.
1505+
const unsigned NumOps = MI.getNumOperands();
1506+
const unsigned HalfNumElts = NumOps / 2 + 1;
1507+
auto FirstHalfSrcReg =
1508+
getSplatVectorSrcReg(MI, MRI, std::make_pair(1, HalfNumElts));
1509+
auto SecondHalfSrcReg =
1510+
getSplatVectorSrcReg(MI, MRI, std::make_pair(HalfNumElts, NumOps));
15121511

1513-
MatchInfo = [&MI, &Observer, &MRI, DstVecTy](MachineIRBuilder &B) {
1512+
MatchInfo = [&MI, &Observer, DstVecTy](MachineIRBuilder &B) {
15141513
B.setInstrAndDebugLoc(MI);
15151514
LegalizerHelper Helper(B.getMF(), Observer, B);
15161515
// Splits the G_BUILD_VECTOR into two half-sized G_BUILD_VECTOR operations
@@ -1520,7 +1519,8 @@ bool llvm::matchSymmetricBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
15201519
DstVecTy.changeElementCount(
15211520
DstVecTy.getElementCount().divideCoefficientBy(2)));
15221521
};
1523-
return true;
1522+
1523+
return (FirstHalfSrcReg.has_value() || SecondHalfSrcReg.has_value());
15241524
}
15251525

15261526
// Match something like:

Diff for: llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-pad-vector.mir

+20-8
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,11 @@ body: |
5050
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
5151
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
5252
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
53-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
54-
; CHECK-NEXT: $wl0 = COPY [[BUILD_VECTOR]](<8 x s32>)
53+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32)
54+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[DEF]](s32)
55+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
56+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR]](<4 x s32>)
57+
; CHECK-NEXT: $wl0 = COPY [[CONCAT_VECTORS]](<8 x s32>)
5558
%10:_(<4 x s32>) = COPY $q0
5659
%0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %10
5760
%4:_(s32) = G_IMPLICIT_DEF
@@ -68,8 +71,11 @@ body: |
6871
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
6972
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
7073
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
71-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
72-
; CHECK-NEXT: $wl0 = COPY [[BUILD_VECTOR]](<8 x s32>)
74+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32)
75+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[DEF]](s32)
76+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
77+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR]](<4 x s32>)
78+
; CHECK-NEXT: $wl0 = COPY [[CONCAT_VECTORS]](<8 x s32>)
7379
%10:_(<4 x s32>) = COPY $q0
7480
%0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %10
7581
%4:_(s32) = G_IMPLICIT_DEF
@@ -86,8 +92,11 @@ body: |
8692
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
8793
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
8894
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
89-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
90-
; CHECK-NEXT: $wl0 = COPY [[BUILD_VECTOR]](<8 x s32>)
95+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32)
96+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[C]](s32)
97+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
98+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR]](<4 x s32>)
99+
; CHECK-NEXT: $wl0 = COPY [[CONCAT_VECTORS]](<8 x s32>)
91100
%10:_(<4 x s32>) = COPY $q0
92101
%0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %10
93102
%4:_(s32) = G_CONSTANT i32 1
@@ -104,8 +113,11 @@ body: |
104113
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wh0
105114
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
106115
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
107-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
108-
; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>)
116+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV2]](s32), [[UV1]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
117+
; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[DEF]](s32)
118+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[AIE_BROADCAST_VECTOR]](<16 x s32>)
119+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<8 x s32>), [[AIE_UNPAD_VECTOR]](<8 x s32>)
120+
; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>)
109121
%10:_(<8 x s32>) = COPY $wh0
110122
%0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %10
111123
%8:_(s32) = G_IMPLICIT_DEF

Diff for: llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-splat-vector.mir

+4-4
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,11 @@ body: |
156156
; CHECK-LABEL: name: test_build_vector_256_32bit_scl_invalid
157157
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
158158
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
159-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
159+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32)
160160
; CHECK-NEXT: $wl0 = COPY [[BUILD_VECTOR]](<8 x s32>)
161161
%1:_(s32) = COPY $r0
162162
%2:_(s32) = COPY $r1
163-
%3:_(<8 x s32>) = G_BUILD_VECTOR %2:_(s32), %1:_(s32), %1:_(s32), %2:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32)
163+
%3:_(<8 x s32>) = G_BUILD_VECTOR %2:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %2:_(s32), %1:_(s32)
164164
$wl0 = COPY %3:_(<8 x s32>)
165165
...
166166

@@ -229,11 +229,11 @@ body: |
229229
; CHECK-LABEL: name: test_build_vector_512_32bit_scl_invalid
230230
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
231231
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
232-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
232+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY]](s32)
233233
; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>)
234234
%1:_(s32) = COPY $r0
235235
%2:_(s32) = COPY $r1
236-
%3:_(<16 x s32>) = G_BUILD_VECTOR %1:_(s32), %2:_(s32), %1:_(s32), %2:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32)
236+
%3:_(<16 x s32>) = G_BUILD_VECTOR %1:_(s32), %1:_(s32), %1:_(s32), %2:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %1:_(s32), %2:_(s32), %1:_(s32), %1:_(s32)
237237
$x0 = COPY %3:_(<16 x s32>)
238238
...
239239

0 commit comments

Comments
 (0)