@@ -2846,12 +2846,12 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask0(<8 x float> %vec
2846
2846
define <4 x float > @test_masked_8xfloat_to_4xfloat_perm_mask1 (<8 x float > %vec , <4 x float > %vec2 , <4 x float > %mask ) {
2847
2847
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask1:
2848
2848
; CHECK: # %bb.0:
2849
- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
2850
- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,3,5,0]
2851
- ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2852
- ; CHECK-NEXT: vcmpeqps %xmm4 , %xmm2 , %k1
2853
- ; CHECK-NEXT: vpermps %ymm0 , %ymm3 , %ymm1 {%k1}
2854
- ; CHECK-NEXT: vmovaps % xmm1, %xmm0
2849
+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,3,5,3]
2850
+ ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
2851
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
2852
+ ; CHECK-NEXT: vxorps %xmm3 , %xmm3 , %xmm3
2853
+ ; CHECK-NEXT: vcmpeqps %xmm3 , %xmm2 , %k1
2854
+ ; CHECK-NEXT: vblendmps %xmm0, % xmm1, %xmm0 {%k1}
2855
2855
; CHECK-NEXT: vzeroupper
2856
2856
; CHECK-NEXT: retq
2857
2857
%shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 0 >
@@ -2863,11 +2863,12 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec,
2863
2863
define <4 x float > @test_masked_z_8xfloat_to_4xfloat_perm_mask1 (<8 x float > %vec , <4 x float > %mask ) {
2864
2864
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask1:
2865
2865
; CHECK: # %bb.0:
2866
- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,3,5,0]
2867
- ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2868
- ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
2869
- ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
2870
- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2866
+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,3,5,3]
2867
+ ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
2868
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
2869
+ ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2870
+ ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
2871
+ ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
2871
2872
; CHECK-NEXT: vzeroupper
2872
2873
; CHECK-NEXT: retq
2873
2874
%shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 0 >
@@ -2878,12 +2879,12 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec
2878
2879
define <4 x float > @test_masked_8xfloat_to_4xfloat_perm_mask2 (<8 x float > %vec , <4 x float > %vec2 , <4 x float > %mask ) {
2879
2880
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask2:
2880
2881
; CHECK: # %bb.0:
2881
- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
2882
- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [3,2,7,0]
2883
- ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2884
- ; CHECK-NEXT: vcmpeqps %xmm4 , %xmm2 , %k1
2885
- ; CHECK-NEXT: vpermps %ymm0 , %ymm3 , %ymm1 {%k1}
2886
- ; CHECK-NEXT: vmovaps % xmm1, %xmm0
2882
+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm3 = [3,2,7,3]
2883
+ ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
2884
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
2885
+ ; CHECK-NEXT: vxorps %xmm3 , %xmm3 , %xmm3
2886
+ ; CHECK-NEXT: vcmpeqps %xmm3 , %xmm2 , %k1
2887
+ ; CHECK-NEXT: vblendmps %xmm0, % xmm1, %xmm0 {%k1}
2887
2888
; CHECK-NEXT: vzeroupper
2888
2889
; CHECK-NEXT: retq
2889
2890
%shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 7 , i32 0 >
@@ -2895,11 +2896,12 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec,
2895
2896
define <4 x float > @test_masked_z_8xfloat_to_4xfloat_perm_mask2 (<8 x float > %vec , <4 x float > %mask ) {
2896
2897
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask2:
2897
2898
; CHECK: # %bb.0:
2898
- ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [3,2,7,0]
2899
- ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2900
- ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
2901
- ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
2902
- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2899
+ ; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm2 = [3,2,7,3]
2900
+ ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
2901
+ ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
2902
+ ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2903
+ ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
2904
+ ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
2903
2905
; CHECK-NEXT: vzeroupper
2904
2906
; CHECK-NEXT: retq
2905
2907
%shuf = shufflevector <8 x float > %vec , <8 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 7 , i32 0 >
@@ -3885,10 +3887,12 @@ define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0(ptr %vp,
3885
3887
define <2 x double > @test_masked_4xdouble_to_2xdouble_perm_mem_mask1 (ptr %vp , <2 x double > %vec2 , <2 x double > %mask ) {
3886
3888
; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mem_mask1:
3887
3889
; CHECK: # %bb.0:
3888
- ; CHECK-NEXT: vmovapd 16(%rdi), %xmm2
3889
- ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
3890
- ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
3891
- ; CHECK-NEXT: vunpcklpd (%rdi), %xmm2, %xmm0 {%k1} # xmm0 {%k1} = xmm2[0],mem[0]
3890
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3891
+ ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3892
+ ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
3893
+ ; CHECK-NEXT: vpermpd $226, (%rdi), %ymm0 {%k1} # ymm0 {%k1} = mem[2,0,2,3]
3894
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3895
+ ; CHECK-NEXT: vzeroupper
3892
3896
; CHECK-NEXT: retq
3893
3897
%vec = load <4 x double >, ptr %vp
3894
3898
%shuf = shufflevector <4 x double > %vec , <4 x double > undef , <2 x i32 > <i32 2 , i32 0 >
@@ -3900,10 +3904,11 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask1(ptr %vp, <2
3900
3904
define <2 x double > @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask1 (ptr %vp , <2 x double > %mask ) {
3901
3905
; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mem_mask1:
3902
3906
; CHECK: # %bb.0:
3903
- ; CHECK-NEXT: vmovapd 16(%rdi), %xmm1
3904
- ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3905
- ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm0, %k1
3906
- ; CHECK-NEXT: vunpcklpd (%rdi), %xmm1, %xmm0 {%k1} {z} # xmm0 {%k1} {z} = xmm1[0],mem[0]
3907
+ ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
3908
+ ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
3909
+ ; CHECK-NEXT: vpermpd $226, (%rdi), %ymm0 {%k1} {z} # ymm0 {%k1} {z} = mem[2,0,2,3]
3910
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3911
+ ; CHECK-NEXT: vzeroupper
3907
3912
; CHECK-NEXT: retq
3908
3913
%vec = load <4 x double >, ptr %vp
3909
3914
%shuf = shufflevector <4 x double > %vec , <4 x double > undef , <2 x i32 > <i32 2 , i32 0 >
@@ -4130,38 +4135,42 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask5(<8 x double>
4130
4135
define <4 x double > @test_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec ) {
4131
4136
; CHECK-FAST-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
4132
4137
; CHECK-FAST: # %bb.0:
4133
- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm1 = [5,0,7,0]
4134
- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm1, %zmm0
4135
- ; CHECK-FAST-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4138
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,0]
4139
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm1, %zmm1
4140
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm2 = [7,0]
4141
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm0
4142
+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4136
4143
; CHECK-FAST-NEXT: retq
4137
4144
;
4138
4145
; CHECK-FAST-PERLANE-LABEL: test_8xdouble_to_4xdouble_perm_mask6:
4139
4146
; CHECK-FAST-PERLANE: # %bb.0:
4140
- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, % ymm1
4141
- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0 , %ymm0
4142
- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1], ymm0[1],ymm1[3],ymm0[3]
4147
+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm1 = [5,0,7,0]
4148
+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0 , %zmm1, %zmm0
4149
+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ ymm0 killed $ ymm0 killed $zmm0
4143
4150
; CHECK-FAST-PERLANE-NEXT: retq
4144
4151
%res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
4145
4152
ret <4 x double > %res
4146
4153
}
4147
4154
define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %vec2 , <4 x double > %mask ) {
4148
4155
; CHECK-FAST-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
4149
4156
; CHECK-FAST: # %bb.0:
4150
- ; CHECK-FAST-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4151
- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm3 = [5,0,7,0]
4152
- ; CHECK-FAST-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4153
- ; CHECK-FAST-NEXT: vcmpeqpd %ymm4, %ymm2, %k1
4154
- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
4155
- ; CHECK-FAST-NEXT: vmovapd %ymm1, %ymm0
4157
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm3 = [5,0]
4158
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm3
4159
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm4 = [7,0]
4160
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm4, %zmm0
4161
+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
4162
+ ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4163
+ ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
4164
+ ; CHECK-FAST-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
4156
4165
; CHECK-FAST-NEXT: retq
4157
4166
;
4158
4167
; CHECK-FAST-PERLANE-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask6:
4159
4168
; CHECK-FAST-PERLANE: # %bb.0:
4160
- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, %ymm3
4161
- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
4169
+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4170
+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm3 = [5,0,7,0]
4162
4171
; CHECK-FAST-PERLANE-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4163
4172
; CHECK-FAST-PERLANE-NEXT: vcmpeqpd %ymm4, %ymm2, %k1
4164
- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm3[1],ymm0[1],ymm3[3],ymm0[3]
4173
+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
4165
4174
; CHECK-FAST-PERLANE-NEXT: vmovapd %ymm1, %ymm0
4166
4175
; CHECK-FAST-PERLANE-NEXT: retq
4167
4176
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
@@ -4173,20 +4182,23 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %v
4173
4182
define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mask6 (<8 x double > %vec , <4 x double > %mask ) {
4174
4183
; CHECK-FAST-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
4175
4184
; CHECK-FAST: # %bb.0:
4176
- ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,0,7,0]
4177
- ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4178
- ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4179
- ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4180
- ; CHECK-FAST-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4185
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm2 = [5,0]
4186
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm2, %zmm2
4187
+ ; CHECK-FAST-NEXT: vpmovsxbq {{.*#+}} xmm3 = [7,0]
4188
+ ; CHECK-FAST-NEXT: vpermpd %zmm0, %zmm3, %zmm0
4189
+ ; CHECK-FAST-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
4190
+ ; CHECK-FAST-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4191
+ ; CHECK-FAST-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4192
+ ; CHECK-FAST-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
4181
4193
; CHECK-FAST-NEXT: retq
4182
4194
;
4183
4195
; CHECK-FAST-PERLANE-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask6:
4184
4196
; CHECK-FAST-PERLANE: # %bb.0:
4185
- ; CHECK-FAST-PERLANE-NEXT: vextractf64x4 $1, %zmm0, %ymm2
4186
- ; CHECK-FAST-PERLANE-NEXT: vbroadcastsd %xmm0, %ymm0
4197
+ ; CHECK-FAST-PERLANE-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,0,7,0]
4187
4198
; CHECK-FAST-PERLANE-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4188
4199
; CHECK-FAST-PERLANE-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4189
- ; CHECK-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm2[1],ymm0[1],ymm2[3],ymm0[3]
4200
+ ; CHECK-FAST-PERLANE-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
4201
+ ; CHECK-FAST-PERLANE-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
4190
4202
; CHECK-FAST-PERLANE-NEXT: retq
4191
4203
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 5 , i32 0 , i32 7 , i32 0 >
4192
4204
%cmp = fcmp oeq <4 x double > %mask , zeroinitializer
@@ -4493,9 +4505,12 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask2(ptr %vp,
4493
4505
define <4 x double > @test_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp ) {
4494
4506
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mem_mask3:
4495
4507
; CHECK: # %bb.0:
4496
- ; CHECK-NEXT: vmovapd (%rdi), %ymm1
4497
- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,2,1,0]
4498
- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm1, %ymm0
4508
+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,2]
4509
+ ; CHECK-NEXT: vmovaps (%rdi), %zmm1
4510
+ ; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm0
4511
+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm2 # xmm2 = mem[0,0]
4512
+ ; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4513
+ ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4499
4514
; CHECK-NEXT: retq
4500
4515
%vec = load <8 x double >, ptr %vp
4501
4516
%res = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
@@ -4504,12 +4519,15 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mem_mask3(ptr %vp) {
4504
4519
define <4 x double > @test_masked_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp , <4 x double > %vec2 , <4 x double > %mask ) {
4505
4520
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask3:
4506
4521
; CHECK: # %bb.0:
4507
- ; CHECK-NEXT: vmovapd (%rdi), %ymm2
4508
- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [4,2,1,0]
4509
- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm3
4510
- ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4511
- ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
4512
- ; CHECK-NEXT: vmovapd %ymm3, %ymm0 {%k1}
4522
+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm2 = [4,2]
4523
+ ; CHECK-NEXT: vmovapd (%rdi), %zmm3
4524
+ ; CHECK-NEXT: vpermpd %zmm3, %zmm2, %zmm2
4525
+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm4 # xmm4 = mem[0,0]
4526
+ ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
4527
+ ; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
4528
+ ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4529
+ ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm1, %k1
4530
+ ; CHECK-NEXT: vmovapd %ymm2, %ymm0 {%k1}
4513
4531
; CHECK-NEXT: retq
4514
4532
%vec = load <8 x double >, ptr %vp
4515
4533
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
@@ -4521,12 +4539,15 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask3(ptr %vp, <4
4521
4539
define <4 x double > @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask3 (ptr %vp , <4 x double > %mask ) {
4522
4540
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask3:
4523
4541
; CHECK: # %bb.0:
4524
- ; CHECK-NEXT: vmovapd (%rdi), %ymm2
4525
- ; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,2,1,0]
4526
- ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
4527
- ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm0, %k1
4528
- ; CHECK-NEXT: vpermi2pd 32(%rdi), %ymm2, %ymm1 {%k1} {z}
4529
- ; CHECK-NEXT: vmovapd %ymm1, %ymm0
4542
+ ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,2]
4543
+ ; CHECK-NEXT: vmovapd (%rdi), %zmm2
4544
+ ; CHECK-NEXT: vpermpd %zmm2, %zmm1, %zmm1
4545
+ ; CHECK-NEXT: vmovddup 8(%rdi), %xmm3 # xmm3 = mem[0,0]
4546
+ ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
4547
+ ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
4548
+ ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4549
+ ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm0, %k1
4550
+ ; CHECK-NEXT: vmovapd %ymm1, %ymm0 {%k1} {z}
4530
4551
; CHECK-NEXT: retq
4531
4552
%vec = load <8 x double >, ptr %vp
4532
4553
%shuf = shufflevector <8 x double > %vec , <8 x double > undef , <4 x i32 > <i32 4 , i32 2 , i32 1 , i32 0 >
0 commit comments