Skip to content

Commit cc0f633

Browse files
[Generic][AIE2] Combiner for shufflevectors that use build vector
Transforms a shufflevector that uses a build vector or undefined into just a build vector. This can be done is because a shuffle vector lowering is an unmerge and then merge. Since build is a merge, the merge and unmerge cancel each other out and we can just merge the vector directly. Example: ``` %1:_(s32) = COPY $r0 %3:_(<8 x s32>) = G_IMPLICIT_DEF %5:_(s32) = G_IMPLICIT_DEF %2:_(<8 x s32>) = G_BUILD_VECTOR %1(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32) %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %2(<8 x s32>), %3, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) ===> %2:_(<8 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32), %1(s32), %1(s32), %1(s32), %1(s32) ```
1 parent 5654047 commit cc0f633

File tree

5 files changed

+415
-2
lines changed

5 files changed

+415
-2
lines changed

Diff for: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

+9
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,15 @@ class CombinerHelper {
352352
applyCombineUnmergeMergeToPlainValues(MachineInstr &MI,
353353
SmallVectorImpl<Register> &Operands);
354354

355+
/// Transform <ty, ...> G_SHUFFLE_VECTOR(G_MERGE ty X Y Z) -> G_MERGE ty X,Y,Z
356+
bool
357+
matchCombineShuffleVectorBuildVector(MachineInstr &MI,
358+
SmallVectorImpl<Register> &Operands);
359+
360+
void
361+
applyCombineShuffleVectorBuildVector(MachineInstr &MI,
362+
SmallVectorImpl<Register> &Operands);
363+
355364
/// Transform G_UNMERGE Constant -> Constant1, Constant2, ...
356365
bool matchCombineUnmergeConstant(MachineInstr &MI,
357366
SmallVectorImpl<APInt> &Csts);

Diff for: llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

+19
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7+
// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
8+
// affiliates
9+
//
710
//===----------------------------------------------------------------------===//
811
/// \file
912
/// Declares convenience wrapper classes for interpreting MachineInstr instances
@@ -225,6 +228,22 @@ class GUnmerge : public GenericMachineInstr {
225228
}
226229
};
227230

231+
/// Represents a G_SHUFFLE_VECTOR.
232+
class GShuffleVector : public GenericMachineInstr {
233+
public:
234+
/// Returns the number of source registers.
235+
unsigned getNumSources() const { return getNumOperands() - 2; }
236+
/// Returns the I'th source register.
237+
Register getSourceReg(unsigned I) const {
238+
assert(I + 1 <= getNumSources());
239+
return getReg(I + 1);
240+
}
241+
242+
static bool classof(const MachineInstr *MI) {
243+
return MI->getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR;
244+
}
245+
};
246+
228247
/// Represents G_BUILD_VECTOR, G_CONCAT_VECTORS or G_MERGE_VALUES.
229248
/// All these have the common property of generating a single value from
230249
/// multiple sources.

Diff for: llvm/include/llvm/Target/GlobalISel/Combine.td

+12-2
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ def fneg_fneg_fold: GICombineRule <
749749
(apply (GIReplaceReg $dst, $src))
750750
>;
751751

752-
// Fold (unmerge(merge x, y, z)) -> z, y, z.
752+
// Fold (unmerge(merge x, y, z)) -> x, y, z.
753753
def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector<Register, 8>">;
754754
def unmerge_merge : GICombineRule<
755755
(defs root:$d, unmerge_merge_matchinfo:$info),
@@ -758,6 +758,16 @@ def unmerge_merge : GICombineRule<
758758
(apply [{ Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }])
759759
>;
760760

761+
// Fold (unmerge(merge x, y, z)) -> z, y, z.
762+
def shufflevector_merge_matchinfo : GIDefMatchData<"SmallVector<Register, 8>">;
763+
def shufflevector_merge : GICombineRule<
764+
(defs root:$d, shufflevector_merge_matchinfo:$info),
765+
(match (wip_match_opcode G_SHUFFLE_VECTOR): $d,
766+
[{ return Helper.matchCombineShuffleVectorBuildVector(*${d}, ${info}); }]),
767+
(apply [{ Helper.applyCombineShuffleVectorBuildVector(*${d}, ${info}); }])
768+
>;
769+
770+
761771
// Fold merge(unmerge).
762772
def merge_unmerge : GICombineRule<
763773
(defs root:$d, register_matchinfo:$matchinfo),
@@ -1324,7 +1334,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
13241334
intdiv_combines, mulh_combines, redundant_neg_operands,
13251335
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
13261336
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
1327-
fsub_to_fneg, commute_constant_to_rhs]>;
1337+
fsub_to_fneg, commute_constant_to_rhs, shufflevector_merge]>;
13281338

13291339
// A combine group used to for prelegalizer combiners at -O0. The combines in
13301340
// this group have been selected based on experiments to balance code size and

Diff for: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/ADT/STLExtras.h"
1414
#include "llvm/ADT/SetVector.h"
1515
#include "llvm/ADT/SmallBitVector.h"
16+
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1718
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1819
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -26,6 +27,7 @@
2627
#include "llvm/CodeGen/MachineDominators.h"
2728
#include "llvm/CodeGen/MachineFunction.h"
2829
#include "llvm/CodeGen/MachineInstr.h"
30+
#include "llvm/CodeGen/MachineInstrBuilder.h"
2931
#include "llvm/CodeGen/MachineMemOperand.h"
3032
#include "llvm/CodeGen/MachineRegisterInfo.h"
3133
#include "llvm/CodeGen/RegisterBankInfo.h"
@@ -2147,6 +2149,80 @@ static Register peekThroughBitcast(Register Reg,
21472149
return Reg;
21482150
}
21492151

2152+
bool CombinerHelper::matchCombineShuffleVectorBuildVector(
2153+
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2154+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
2155+
"Expected a shuffle vector");
2156+
auto &ShuffleVector = cast<GShuffleVector>(MI);
2157+
Register SrcReg1 = peekThroughBitcast(ShuffleVector.getSourceReg(0), MRI);
2158+
Register SrcReg2 = peekThroughBitcast(ShuffleVector.getSourceReg(1), MRI);
2159+
2160+
// Check if the Source registers are either merges or implicit definitions
2161+
auto *SrcInstr1 = getOpcodeDef<GBuildVector>(SrcReg1, MRI);
2162+
auto *SrcInstr2 = getOpcodeDef<GBuildVector>(SrcReg2, MRI);
2163+
auto *IsUndef1 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg1, MRI);
2164+
auto *IsUndef2 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg2, MRI);
2165+
2166+
// Our inputs need to be either be build vectors or undefined, register inputs
2167+
// break this optimization. You could maybe do something clever were you
2168+
// concatenate vectors to save half a build vector.
2169+
if ((!SrcInstr1 && !IsUndef1) || (!SrcInstr2 && !IsUndef2))
2170+
return false;
2171+
2172+
if (IsUndef1 && IsUndef2)
2173+
return true;
2174+
2175+
Register UndefReg;
2176+
if (SrcInstr1 || SrcInstr2)
2177+
UndefReg = MRI.createGenericVirtualRegister(MRI.getType(SrcReg1));
2178+
2179+
// Since our inputs to shufflevector must be of the same size, we can reuse
2180+
// the size of the defined register.
2181+
const unsigned NumElements = (SrcInstr1 != 0) ? SrcInstr1->getNumSources()
2182+
: SrcInstr2->getNumSources();
2183+
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2184+
const Register Elt =
2185+
(SrcInstr1 != 0) ? SrcInstr1->getSourceReg(Idx) : UndefReg;
2186+
Operands.push_back(Elt);
2187+
}
2188+
2189+
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2190+
const Register Elt =
2191+
(SrcInstr2 != 0) ? SrcInstr2->getSourceReg(Idx) : UndefReg;
2192+
Operands.push_back(Elt);
2193+
}
2194+
2195+
return true;
2196+
}
2197+
2198+
void CombinerHelper::applyCombineShuffleVectorBuildVector(
2199+
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2200+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
2201+
"Expected a shuffle vector");
2202+
auto &ShuffleVector = cast<GShuffleVector>(MI);
2203+
Register SrcReg1 = peekThroughBitcast(ShuffleVector.getSourceReg(0), MRI);
2204+
Register SrcReg2 = peekThroughBitcast(ShuffleVector.getSourceReg(1), MRI);
2205+
2206+
// Check if the Source registers are either merges or implicit definitions
2207+
auto *IsUndef1 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg1, MRI);
2208+
auto *IsUndef2 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg2, MRI);
2209+
2210+
// If they're both undefined, we will just return an undefined as well.
2211+
if (IsUndef1 && IsUndef2) {
2212+
Builder.buildUndef(ShuffleVector.getReg(0));
2213+
MI.eraseFromParent();
2214+
return;
2215+
}
2216+
2217+
const ArrayRef<int> ShiftMask = MI.getOperand(3).getShuffleMask();
2218+
SmallVector<Register, 8> Arguments;
2219+
for (int Index : ShiftMask)
2220+
Arguments.push_back(Operands[Index]);
2221+
2222+
Builder.buildBuildVector(ShuffleVector.getOperand(0), Arguments);
2223+
MI.eraseFromParent();
2224+
}
2225+
21502226
bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
21512227
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
21522228
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&

0 commit comments

Comments
 (0)