Skip to content

Commit 5488ad8

Browse files
authored
[X86] combineConcatVectorOps - add concatenation handling for X86ISD::VPERMILPV nodes (llvm#132355)
Concat the nodes if we can merge either of the operands for free.
1 parent 2e3fa4b commit 5488ad8

File tree

3 files changed

+26
-20
lines changed

3 files changed

+26
-20
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -58164,6 +58164,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5816458164
DAG.getTargetConstant(Idx, DL, MVT::i8));
5816558165
}
5816658166
break;
58167+
case X86ISD::VPERMILPV:
58168+
if (!IsSplat && (VT.is256BitVector() ||
58169+
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58170+
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58171+
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58172+
if (Concat0 || Concat1)
58173+
return DAG.getNode(Opcode, DL, VT,
58174+
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58175+
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
58176+
}
58177+
break;
5816758178
case X86ISD::PSHUFB:
5816858179
case X86ISD::PSADBW:
5816958180
case X86ISD::VPMADDUBSW:

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

+2-3
Original file line numberDiff line numberDiff line change
@@ -674,10 +674,9 @@ define <8 x i32> @concat_self_v8i32(<4 x i32> %x) {
674674
define <4 x double> @concat_vpermilvar_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1, <4 x i64> %m) {
675675
; CHECK-LABEL: concat_vpermilvar_v4f64_v2f64:
676676
; CHECK: # %bb.0:
677-
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
678-
; CHECK-NEXT: vpermilpd %xmm2, %xmm0, %xmm0
679-
; CHECK-NEXT: vpermilpd %xmm3, %xmm1, %xmm1
677+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
680678
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
679+
; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
681680
; CHECK-NEXT: ret{{[l|q]}}
682681
%m0 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
683682
%m1 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 2, i32 3>

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

+13-17
Original file line numberDiff line numberDiff line change
@@ -985,29 +985,26 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
985985
; X86-NEXT: movl %esp, %ebp
986986
; X86-NEXT: andl $-64, %esp
987987
; X86-NEXT: subl $64, %esp
988-
; X86-NEXT: vmovapd 8(%ebp), %xmm3
989-
; X86-NEXT: vpermilpd 72(%ebp), %xmm0, %xmm0
990-
; X86-NEXT: vpermilpd 88(%ebp), %xmm1, %xmm1
991-
; X86-NEXT: vpermilpd 104(%ebp), %xmm2, %xmm2
992-
; X86-NEXT: vpermilpd 120(%ebp), %xmm3, %xmm3
993-
; X86-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
988+
; X86-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
989+
; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
994990
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
995-
; X86-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
991+
; X86-NEXT: vinsertf128 $1, 8(%ebp), %ymm2, %ymm1
992+
; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
993+
; X86-NEXT: vpermilpd 72(%ebp), %zmm0, %zmm0
996994
; X86-NEXT: movl %ebp, %esp
997995
; X86-NEXT: popl %ebp
998996
; X86-NEXT: retl
999997
;
1000998
; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
1001999
; X64: # %bb.0:
1002-
; X64-NEXT: vextractf128 $1, %ymm4, %xmm5
1003-
; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm6
1004-
; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm7
1005-
; X64-NEXT: vpermilpd %xmm4, %xmm0, %xmm0
1006-
; X64-NEXT: vpermilpd %xmm5, %xmm1, %xmm1
1007-
; X64-NEXT: vpermilpd %xmm6, %xmm2, %xmm2
1008-
; X64-NEXT: vpermilpd %xmm7, %xmm3, %xmm3
1000+
; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1001+
; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm5
1002+
; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm6
1003+
; X64-NEXT: vpermilpd %xmm5, %xmm2, %xmm2
1004+
; X64-NEXT: vpermilpd %xmm6, %xmm3, %xmm3
10091005
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
10101006
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1007+
; X64-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
10111008
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
10121009
; X64-NEXT: retq
10131010
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
@@ -1027,10 +1024,9 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
10271024
define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1, <8 x i64> %m) nounwind {
10281025
; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
10291026
; CHECK: # %bb.0:
1030-
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
1031-
; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
1032-
; CHECK-NEXT: vpermilpd %ymm3, %ymm1, %ymm1
1027+
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
10331028
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1029+
; CHECK-NEXT: vpermilpd %zmm2, %zmm0, %zmm0
10341030
; CHECK-NEXT: ret{{[l|q]}}
10351031
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10361032
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>

0 commit comments

Comments
 (0)