Skip to content

Commit aec1600

Browse files
[AIE2] Implement vshuffle selection
1 parent d1bd94a commit aec1600

File tree

4 files changed

+186
-32
lines changed

4 files changed

+186
-32
lines changed

Diff for: llvm/lib/Target/AIE/AIE2InstrPatterns.td

+12
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,18 @@ def : Pat<(int_aie2_vshuffle VEC512:$s1, VEC512:$s2, eR:$mod),
597597
def : Pat<(int_aie2_vshuffle_bf16 VEC512:$s1, VEC512:$s2, eR:$mod),
598598
(VSHUFFLE VEC512:$s1, VEC512:$s2, eR:$mod)>;
599599

600+
// VSHUFFLE generic opcodes translation
601+
def vshuffle_node : SDNode<"AIE2::G_AIE_VSHUFFLE",
602+
SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>]>>;
603+
def : GINodeEquiv<G_AIE_VSHUFFLE, vshuffle_node>;
604+
605+
def : Pat<(v16i32 (vshuffle_node (v16i32 VEC512:$v0), (v16i32 VEC512:$v1), (i32 eR:$mode))),
606+
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;
607+
def : Pat<(v32i16 (vshuffle_node (v32i16 VEC512:$v0), (v32i16 VEC512:$v1), (i32 eR:$mode))),
608+
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;
609+
def : Pat<(v64i8 (vshuffle_node (v64i8 VEC512:$v0), (v64i8 VEC512:$v1), (i32 eR:$mode))),
610+
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;
611+
600612
// VSHIFT Intrinsic (shift/shiftx/shift_bytes)
601613
def : Pat<(int_aie2_vshift_I512_I512 VEC512:$s1, VEC512:$s2, 0x0, eR:$shift),
602614
(VSHIFT VEC512:$s1, VEC512:$s2, eR:$shift)>;

Diff for: llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp

+42-28
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,39 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
152152
return false;
153153
}
154154

155+
bool createVShuffle(MachineInstr &MI, const LLT TargetTy, const uint8_t Mode) {
156+
MachineIRBuilder MIB(MI);
157+
MachineRegisterInfo &MRI = *MIB.getMRI();
158+
const Register DstReg = MI.getOperand(0).getReg();
159+
const LLT DstTy = MRI.getType(DstReg);
160+
161+
if (DstTy != TargetTy)
162+
return false;
163+
164+
const Register Src1 = MI.getOperand(1).getReg();
165+
const Register Src2 = MI.getOperand(2).getReg();
166+
const Register ShuffleModeReg =
167+
MRI.createGenericVirtualRegister(LLT::scalar(32));
168+
169+
// This combiner only cares about the lower bits, so we can pad the
170+
// vector to cover the case where two separate vectors are shuffled.
171+
// together
172+
MIB.buildConstant(ShuffleModeReg, Mode);
173+
if (MRI.getType(Src1) == TargetTy) {
174+
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
175+
{Src1, Src2, ShuffleModeReg});
176+
} else {
177+
// We reuse the same register since we ignore the high part of the vector
178+
const Register TmpRegister = MRI.createGenericVirtualRegister(TargetTy);
179+
MIB.buildConcatVectors(TmpRegister, {Src1, Src2});
180+
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
181+
{TmpRegister, TmpRegister, ShuffleModeReg});
182+
}
183+
184+
MI.eraseFromParent();
185+
return true;
186+
}
187+
155188
CombinerHelper::GeneratorType sectionGenerator(const int32_t From,
156189
const int32_t To,
157190
const int32_t Partitions,
@@ -192,34 +225,15 @@ bool AIE2PreLegalizerCombinerImpl::tryCombineShuffleVector(
192225
const LLT V64S8 = LLT::fixed_vector(64, 8);
193226
CombinerHelper::GeneratorType FourPartitions =
194227
sectionGenerator(0, DstNumElts, 4, 1);
195-
if (Helper.matchCombineShuffleVector(MI, FourPartitions, DstNumElts)) {
196-
if (DstTy != V64S8)
197-
return false;
198-
199-
const Register Src1 = MI.getOperand(1).getReg();
200-
const Register Src2 = MI.getOperand(2).getReg();
201-
const Register ShuffleModeReg =
202-
MRI.createGenericVirtualRegister(LLT::scalar(32));
203-
204-
// This combiner only cares about the lower bits, so we can pad the
205-
// vector to cover the case where two separate vectors are shuffled.
206-
// together
207-
MIB.buildConstant(ShuffleModeReg, 35);
208-
209-
if (SrcTy == V64S8) {
210-
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
211-
{Src1, Src2, ShuffleModeReg});
212-
} else {
213-
// We reuse the same register since we ignore the high part of the vector
214-
const Register TmpRegister = MRI.createGenericVirtualRegister(V64S8);
215-
MIB.buildConcatVectors(TmpRegister, {Src1, Src2});
216-
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
217-
{TmpRegister, TmpRegister, ShuffleModeReg});
218-
}
219-
220-
MI.eraseFromParent();
221-
return true;
222-
}
228+
if (Helper.matchCombineShuffleVector(MI, FourPartitions, DstNumElts))
229+
return createVShuffle(MI, V64S8, 35);
230+
231+
const LLT V32S16 = LLT::fixed_vector(32, 16);
232+
CombinerHelper::GeneratorType FourPartitionByTwo =
233+
sectionGenerator(0, DstNumElts, 4, 2);
234+
if (Helper.matchCombineShuffleVector(MI, FourPartitionByTwo, DstNumElts))
235+
return createVShuffle(MI, V32S16, 29);
236+
223237
return false;
224238
}
225239

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
8+
#
9+
# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s
10+
11+
---
12+
name: vshuffle_32_m35
13+
legalized: true
14+
regBankSelected: true
15+
tracksRegLiveness: true
16+
stack:
17+
- { id: 0, name: "", size: 128, alignment: 32 }
18+
body: |
19+
bb.0.entry:
20+
liveins: $x2
21+
; CHECK-LABEL: name: vshuffle_32_m35
22+
; CHECK: liveins: $x2
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
25+
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
26+
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
27+
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
28+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
29+
%1:vregbank(<16 x s32>) = COPY $x2
30+
%2:gprregbank(s32) = G_CONSTANT i32 29
31+
%0:vregbank(<16 x s32>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
32+
$x0 = COPY %0:vregbank(<16 x s32>)
33+
PseudoRET implicit $lr, implicit $x0
34+
...
35+
36+
---
37+
name: vshuffle_16_m35
38+
legalized: true
39+
regBankSelected: true
40+
tracksRegLiveness: true
41+
stack:
42+
- { id: 0, name: "", size: 128, alignment: 32 }
43+
body: |
44+
bb.0.entry:
45+
liveins: $x2
46+
; CHECK-LABEL: name: vshuffle_16_m35
47+
; CHECK: liveins: $x2
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
50+
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
51+
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
52+
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
53+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
54+
%1:vregbank(<32 x s16>) = COPY $x2
55+
%2:gprregbank(s32) = G_CONSTANT i32 29
56+
%0:vregbank(<32 x s16>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
57+
$x0 = COPY %0:vregbank(<32 x s16>)
58+
PseudoRET implicit $lr, implicit $x0
59+
...
60+
61+
---
62+
name: vshuffle_8_m35
63+
legalized: true
64+
regBankSelected: true
65+
tracksRegLiveness: true
66+
stack:
67+
- { id: 0, name: "", size: 128, alignment: 32 }
68+
body: |
69+
bb.0.entry:
70+
liveins: $x2
71+
; CHECK-LABEL: name: vshuffle_8_m35
72+
; CHECK: liveins: $x2
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
75+
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
76+
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
77+
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
78+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
79+
%1:vregbank(<64 x s8>) = COPY $x2
80+
%2:gprregbank(s32) = G_CONSTANT i32 29
81+
%0:vregbank(<64 x s8>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
82+
$x0 = COPY %0:vregbank(<64 x s8>)
83+
PseudoRET implicit $lr, implicit $x0

Diff for: llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

+49-4
Original file line numberDiff line numberDiff line change
@@ -853,12 +853,12 @@ body: |
853853
...
854854

855855
---
856-
name: shuffle_vector_32
856+
name: shuffle_vector_32_4x4
857857
legalized: false
858858
body: |
859859
bb.1.entry:
860860
liveins: $x0, $x1
861-
; CHECK-LABEL: name: shuffle_vector_32
861+
; CHECK-LABEL: name: shuffle_vector_32_4x4
862862
; CHECK: liveins: $x0, $x1
863863
; CHECK-NEXT: {{ $}}
864864
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
@@ -874,12 +874,12 @@ body: |
874874
...
875875

876876
---
877-
name: shuffle_vector_16
877+
name: shuffle_vector_16_4x4
878878
legalized: false
879879
body: |
880880
bb.1.entry:
881881
liveins: $x0, $x1
882-
; CHECK-LABEL: name: shuffle_vector_16
882+
; CHECK-LABEL: name: shuffle_vector_16_4x4
883883
; CHECK: liveins: $x0, $x1
884884
; CHECK-NEXT: {{ $}}
885885
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
@@ -939,3 +939,48 @@ body: |
939939
$x2 = COPY %0:_(<64 x s8>)
940940
PseudoRET implicit $lr, implicit $x2
941941
...
942+
943+
---
944+
name: shuffle_vector_1024_4x8
945+
legalized: false
946+
body: |
947+
bb.1.entry:
948+
liveins: $x0, $x1
949+
; CHECK-LABEL: name: shuffle_vector_1024_4x8
950+
; CHECK: liveins: $x0, $x1
951+
; CHECK-NEXT: {{ $}}
952+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
953+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x1
954+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
955+
; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[COPY]], [[COPY1]], [[C]](s32)
956+
; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>)
957+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2
958+
%1:_(<32 x s16>) = COPY $x0
959+
%2:_(<32 x s16>) = COPY $x1
960+
%0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<32 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31)
961+
$x2 = COPY %0:_(<32 x s16>)
962+
PseudoRET implicit $lr, implicit $x2
963+
...
964+
965+
---
966+
name: shuffle_vector_512_4x8
967+
legalized: false
968+
body: |
969+
bb.1.entry:
970+
liveins: $wl0, $wl1
971+
; CHECK-LABEL: name: shuffle_vector_512_4x8
972+
; CHECK: liveins: $wl0, $wl1
973+
; CHECK-NEXT: {{ $}}
974+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $wl0
975+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s16>) = COPY $wl1
976+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
977+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[COPY]](<16 x s16>), [[COPY1]](<16 x s16>)
978+
; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[CONCAT_VECTORS]], [[CONCAT_VECTORS]], [[C]](s32)
979+
; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>)
980+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2
981+
%1:_(<16 x s16>) = COPY $wl0
982+
%2:_(<16 x s16>) = COPY $wl1
983+
%0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<16 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31)
984+
$x2 = COPY %0:_(<32 x s16>)
985+
PseudoRET implicit $lr, implicit $x2
986+
...

0 commit comments

Comments
 (0)