Skip to content

Commit ebe6489

Browse files
[GISel][CombinerHelper] Add a combiner to concatenate the first halfs of two vectors together
1 parent 38729a7 commit ebe6489

File tree

6 files changed

+353
-142
lines changed

6 files changed

+353
-142
lines changed

Diff for: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,44 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
556556
return true;
557557
}
558558

559+
// After this point, it is assumed our shufflevectors work on vectors that can
560+
// be splint into two
561+
if ((DstNumElts % 2) != 0)
562+
return false;
563+
564+
// {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES
565+
// Take the first halfs of the two vectors and concatenate them into one
566+
// vector.
567+
GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1);
568+
GeneratorType FirstEightB =
569+
adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1);
570+
571+
auto UnmergeMatcher = SmallVector<GeneratorType>{FirstEightA, FirstEightB};
572+
GeneratorType FirstAndThird = concatGenerators(UnmergeMatcher);
573+
if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) {
574+
if (DstNumElts <= 2)
575+
return false;
576+
const Register DstReg = MI.getOperand(0).getReg();
577+
const LLT HalfSrcTy =
578+
LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType());
579+
const Register HalfOfA = createUnmergeValue(
580+
MI, MI.getOperand(1).getReg(),
581+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
582+
const Register HalfOfB = createUnmergeValue(
583+
MI, MI.getOperand(2).getReg(),
584+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
585+
586+
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
587+
if (Mask[0] <= 0) {
588+
Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB});
589+
} else {
590+
Builder.buildMergeLikeInstr(DstReg, {HalfOfB, HalfOfA});
591+
}
592+
593+
MI.eraseFromParent();
594+
return true;
595+
}
596+
559597
return false;
560598
}
561599

Diff for: llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
22
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
3+
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
34

45
---
56
name: shuffle_concat_1
@@ -101,7 +102,9 @@ body: |
101102
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
102103
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
103104
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
104-
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(0, undef, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
105+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
106+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
107+
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
105108
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
106109
; CHECK-NEXT: RET_ReallyLR implicit $q0
107110
%p1:_(p0) = COPY $x0
@@ -179,7 +182,9 @@ body: |
179182
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
180183
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
181184
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
182-
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(undef, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
185+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
186+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
187+
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
183188
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
184189
; CHECK-NEXT: RET_ReallyLR implicit $q0
185190
%p1:_(p0) = COPY $x0

Diff for: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
3+
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
34

45
# Check that we canonicalize shuffle_vector(Src1, Src2, mask(0,1,2,3))
56
# into concat_vector(Src1, Src2).
@@ -270,8 +271,10 @@ body: |
270271
; CHECK-NEXT: {{ $}}
271272
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
272273
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
273-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
274-
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
274+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
275+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
276+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV2]](<2 x s32>), [[UV]](<2 x s32>)
277+
; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
275278
%0:_(<4 x s32>) = COPY $q0
276279
%1:_(<4 x s32>) = COPY $q1
277280
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)

Diff for: llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

+19-54
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
45

56
; CHECK-GI: warning: Instruction selection used fallback path for test_bitcastv2f32tov1f64
67
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_bitcastv1f64tov2f32
@@ -1776,19 +1777,10 @@ entry:
17761777
}
17771778

17781779
define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1779-
; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v16i8:
1780-
; CHECK-SD: // %bb.0: // %entry
1781-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1782-
; CHECK-SD-NEXT: ret
1783-
;
1784-
; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
1785-
; CHECK-GI: // %bb.0: // %entry
1786-
; CHECK-GI-NEXT: adrp x8, .LCPI126_0
1787-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1788-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0]
1789-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1790-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
1791-
; CHECK-GI-NEXT: ret
1780+
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1781+
; CHECK: // %bb.0: // %entry
1782+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
1783+
; CHECK-NEXT: ret
17921784
entry:
17931785
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
17941786
ret <16 x i8> %vecinit30
@@ -1803,9 +1795,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
18031795
;
18041796
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
18051797
; CHECK-GI: // %bb.0: // %entry
1806-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1807-
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
1808-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1798+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
18091799
; CHECK-GI-NEXT: mov b2, v0.b[1]
18101800
; CHECK-GI-NEXT: mov b3, v0.b[2]
18111801
; CHECK-GI-NEXT: mov b4, v0.b[3]
@@ -1814,14 +1804,13 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
18141804
; CHECK-GI-NEXT: mov b7, v0.b[6]
18151805
; CHECK-GI-NEXT: mov b16, v0.b[7]
18161806
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
1817-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI127_0]
18181807
; CHECK-GI-NEXT: mov v0.b[2], v3.b[0]
18191808
; CHECK-GI-NEXT: mov v0.b[3], v4.b[0]
18201809
; CHECK-GI-NEXT: mov v0.b[4], v5.b[0]
18211810
; CHECK-GI-NEXT: mov v0.b[5], v6.b[0]
18221811
; CHECK-GI-NEXT: mov v0.b[6], v7.b[0]
18231812
; CHECK-GI-NEXT: mov v0.b[7], v16.b[0]
1824-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
1813+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
18251814
; CHECK-GI-NEXT: ret
18261815
entry:
18271816
%vecext = extractelement <8 x i8> %x, i32 0
@@ -1999,19 +1988,10 @@ entry:
19991988
}
20001989

20011990
define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
2002-
; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v8i16:
2003-
; CHECK-SD: // %bb.0: // %entry
2004-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
2005-
; CHECK-SD-NEXT: ret
2006-
;
2007-
; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
2008-
; CHECK-GI: // %bb.0: // %entry
2009-
; CHECK-GI-NEXT: adrp x8, .LCPI130_0
2010-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
2011-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0]
2012-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2013-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2014-
; CHECK-GI-NEXT: ret
1991+
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1992+
; CHECK: // %bb.0: // %entry
1993+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
1994+
; CHECK-NEXT: ret
20151995
entry:
20161996
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
20171997
ret <8 x i16> %vecinit14
@@ -2026,17 +2006,14 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
20262006
;
20272007
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
20282008
; CHECK-GI: // %bb.0: // %entry
2029-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
2030-
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
2031-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2009+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
20322010
; CHECK-GI-NEXT: mov h2, v0.h[1]
20332011
; CHECK-GI-NEXT: mov h3, v0.h[2]
20342012
; CHECK-GI-NEXT: mov h4, v0.h[3]
20352013
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
2036-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0]
20372014
; CHECK-GI-NEXT: mov v0.h[2], v3.h[0]
20382015
; CHECK-GI-NEXT: mov v0.h[3], v4.h[0]
2039-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2016+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
20402017
; CHECK-GI-NEXT: ret
20412018
entry:
20422019
%vecext = extractelement <4 x i16> %x, i32 0
@@ -2142,19 +2119,10 @@ entry:
21422119
}
21432120

21442121
define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
2145-
; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v4i32:
2146-
; CHECK-SD: // %bb.0: // %entry
2147-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
2148-
; CHECK-SD-NEXT: ret
2149-
;
2150-
; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
2151-
; CHECK-GI: // %bb.0: // %entry
2152-
; CHECK-GI-NEXT: adrp x8, .LCPI134_0
2153-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
2154-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0]
2155-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2156-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2157-
; CHECK-GI-NEXT: ret
2122+
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
2123+
; CHECK: // %bb.0: // %entry
2124+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
2125+
; CHECK-NEXT: ret
21582126
entry:
21592127
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
21602128
ret <4 x i32> %vecinit6
@@ -2169,13 +2137,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
21692137
;
21702138
; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
21712139
; CHECK-GI: // %bb.0: // %entry
2172-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
2173-
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
2174-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2140+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
21752141
; CHECK-GI-NEXT: mov s2, v0.s[1]
21762142
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
2177-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0]
2178-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2143+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
21792144
; CHECK-GI-NEXT: ret
21802145
entry:
21812146
%vecext = extractelement <2 x i32> %x, i32 0

0 commit comments

Comments
 (0)