Skip to content

Commit a4760e9

Browse files
committed
[SelectionDAG] Convert to or mask if all insertions are -1
We did this for 0 and and, but we can do this with or and -1.
1 parent 4b1f1f7 commit a4760e9

File tree

4 files changed

+57
-163
lines changed

4 files changed

+57
-163
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -22974,7 +22974,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2297422974
}
2297522975

2297622976
// If all insertions are zero value, try to convert to AND mask.
22977-
// TODO: Do this for -1 with OR mask?
2297822977
if (!LegalOperations && llvm::isNullConstant(InVal) &&
2297922978
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
2298022979
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
@@ -22987,6 +22986,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2298722986
DAG.getBuildVector(VT, DL, Mask));
2298822987
}
2298922988

22989+
// If all insertions are -1, try to convert to OR mask.
22990+
if (!LegalOperations && llvm::isAllOnesConstant(InVal) &&
22991+
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22992+
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22993+
SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22994+
SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22995+
SmallVector<SDValue, 8> Mask(NumElts);
22996+
for (unsigned I = 0; I != NumElts; ++I)
22997+
Mask[I] = Ops[I] ? AllOnes : Zero;
22998+
return DAG.getNode(ISD::OR, DL, VT, CurVec,
22999+
DAG.getBuildVector(VT, DL, Mask));
23000+
}
23001+
2299023002
// Failed to find a match in the chain - bail.
2299123003
break;
2299223004
}

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

+5-11
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,13 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
101101
define i8 @test_v9i8(<9 x i8> %a) nounwind {
102102
; CHECK-LABEL: test_v9i8:
103103
; CHECK: // %bb.0:
104-
; CHECK-NEXT: mov v1.16b, v0.16b
105-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
106-
; CHECK-NEXT: mov v1.b[9], w8
107-
; CHECK-NEXT: mov v1.b[10], w8
108-
; CHECK-NEXT: mov v1.b[11], w8
109-
; CHECK-NEXT: mov v1.b[12], w8
110-
; CHECK-NEXT: mov v1.b[13], w8
111-
; CHECK-NEXT: mov v1.b[14], w8
112-
; CHECK-NEXT: mov v1.b[15], w8
104+
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff00
105+
; CHECK-NEXT: fmov x8, d0
106+
; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
113107
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
114108
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
115-
; CHECK-NEXT: fmov x8, d0
116-
; CHECK-NEXT: and x8, x8, x8, lsr #32
109+
; CHECK-NEXT: fmov x9, d0
110+
; CHECK-NEXT: and x8, x9, x8, lsr #32
117111
; CHECK-NEXT: and x8, x8, x8, lsr #16
118112
; CHECK-NEXT: lsr x9, x8, #8
119113
; CHECK-NEXT: and w0, w8, w9

llvm/test/CodeGen/X86/avx-cvt-3.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,13 @@ define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
4848
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
4949
; X86-LABEL: sitofp_insert_allbits_v8i32:
5050
; X86: # %bb.0:
51-
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
52-
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
53-
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
51+
; X86-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
5452
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
5553
; X86-NEXT: retl
5654
;
5755
; X64-LABEL: sitofp_insert_allbits_v8i32:
5856
; X64: # %bb.0:
59-
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
60-
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
61-
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
57+
; X64-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
6258
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
6359
; X64-NEXT: retq
6460
%1 = insertelement <8 x i32> %a0, i32 -1, i32 0

llvm/test/CodeGen/X86/insertelement-ones.ll

+37-145
Original file line numberDiff line numberDiff line change
@@ -150,59 +150,32 @@ define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
150150
define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151151
; SSE2-LABEL: insert_v8i32_x12345x7:
152152
; SSE2: # %bb.0:
153-
; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
154-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155-
; SSE2-NEXT: movl $-1, %eax
156-
; SSE2-NEXT: movd %eax, %xmm2
157-
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158-
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
153+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
154+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
159155
; SSE2-NEXT: retq
160156
;
161157
; SSE3-LABEL: insert_v8i32_x12345x7:
162158
; SSE3: # %bb.0:
163-
; SSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
164-
; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165-
; SSE3-NEXT: movl $-1, %eax
166-
; SSE3-NEXT: movd %eax, %xmm2
167-
; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168-
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
160+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
169161
; SSE3-NEXT: retq
170162
;
171163
; SSSE3-LABEL: insert_v8i32_x12345x7:
172164
; SSSE3: # %bb.0:
173-
; SSSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
174-
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175-
; SSSE3-NEXT: movl $-1, %eax
176-
; SSSE3-NEXT: movd %eax, %xmm2
177-
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178-
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
165+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
179167
; SSSE3-NEXT: retq
180168
;
181169
; SSE41-LABEL: insert_v8i32_x12345x7:
182170
; SSE41: # %bb.0:
183-
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
184-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185-
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
171+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
186173
; SSE41-NEXT: retq
187174
;
188-
; AVX1-LABEL: insert_v8i32_x12345x7:
189-
; AVX1: # %bb.0:
190-
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
191-
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
192-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193-
; AVX1-NEXT: retq
194-
;
195-
; AVX2-LABEL: insert_v8i32_x12345x7:
196-
; AVX2: # %bb.0:
197-
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
198-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199-
; AVX2-NEXT: retq
200-
;
201-
; AVX512-LABEL: insert_v8i32_x12345x7:
202-
; AVX512: # %bb.0:
203-
; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
204-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205-
; AVX512-NEXT: retq
175+
; AVX-LABEL: insert_v8i32_x12345x7:
176+
; AVX: # %bb.0:
177+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
178+
; AVX-NEXT: retq
206179
%1 = insertelement <8 x i32> %a, i32 -1, i32 0
207180
%2 = insertelement <8 x i32> %1, i32 -1, i32 6
208181
ret <8 x i32> %2
@@ -211,35 +184,27 @@ define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
211184
define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212185
; SSE2-LABEL: insert_v8i16_x12345x7:
213186
; SSE2: # %bb.0:
214-
; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
215-
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
216-
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
187+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
217188
; SSE2-NEXT: retq
218189
;
219190
; SSE3-LABEL: insert_v8i16_x12345x7:
220191
; SSE3: # %bb.0:
221-
; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
222-
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
223-
; SSE3-NEXT: pinsrw $6, %eax, %xmm0
192+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224193
; SSE3-NEXT: retq
225194
;
226195
; SSSE3-LABEL: insert_v8i16_x12345x7:
227196
; SSSE3: # %bb.0:
228-
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
229-
; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
230-
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
197+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231198
; SSSE3-NEXT: retq
232199
;
233200
; SSE41-LABEL: insert_v8i16_x12345x7:
234201
; SSE41: # %bb.0:
235-
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
236-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
202+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
237203
; SSE41-NEXT: retq
238204
;
239205
; AVX-LABEL: insert_v8i16_x12345x7:
240206
; AVX: # %bb.0:
241-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
242-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
207+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
243208
; AVX-NEXT: retq
244209
%1 = insertelement <8 x i16> %a, i16 -1, i32 0
245210
%2 = insertelement <8 x i16> %1, i16 -1, i32 6
@@ -249,60 +214,32 @@ define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
249214
define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250215
; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251216
; SSE2: # %bb.0:
252-
; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
253-
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
254-
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
255-
; SSE2-NEXT: pinsrw $7, %eax, %xmm1
217+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
218+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
256219
; SSE2-NEXT: retq
257220
;
258221
; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259222
; SSE3: # %bb.0:
260-
; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
261-
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
262-
; SSE3-NEXT: pinsrw $6, %eax, %xmm0
263-
; SSE3-NEXT: pinsrw $7, %eax, %xmm1
223+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
264225
; SSE3-NEXT: retq
265226
;
266227
; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267228
; SSSE3: # %bb.0:
268-
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
269-
; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
270-
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
271-
; SSSE3-NEXT: pinsrw $7, %eax, %xmm1
229+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
272231
; SSSE3-NEXT: retq
273232
;
274233
; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275234
; SSE41: # %bb.0:
276-
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
277-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278-
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
235+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
236+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
279237
; SSE41-NEXT: retq
280238
;
281-
; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282-
; AVX1: # %bb.0:
283-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [65535,0,0,0]
284-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
285-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
286-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
287-
; AVX1-NEXT: retq
288-
;
289-
; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
290-
; AVX2: # %bb.0:
291-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
292-
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
293-
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
294-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
295-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
296-
; AVX2-NEXT: retq
297-
;
298-
; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
299-
; AVX512: # %bb.0:
300-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
301-
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
302-
; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
303-
; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
304-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
305-
; AVX512-NEXT: retq
239+
; AVX-LABEL: insert_v16i16_x12345x789ABCDEx:
240+
; AVX: # %bb.0:
241+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
242+
; AVX-NEXT: retq
306243
%1 = insertelement <16 x i16> %a, i16 -1, i32 0
307244
%2 = insertelement <16 x i16> %1, i16 -1, i32 6
308245
%3 = insertelement <16 x i16> %2, i16 -1, i32 15
@@ -313,33 +250,26 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
313250
; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
314251
; SSE2: # %bb.0:
315252
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
316-
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
317253
; SSE2-NEXT: retq
318254
;
319255
; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
320256
; SSE3: # %bb.0:
321257
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
322-
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
323258
; SSE3-NEXT: retq
324259
;
325260
; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
326261
; SSSE3: # %bb.0:
327262
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
328-
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
329263
; SSSE3-NEXT: retq
330264
;
331265
; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
332266
; SSE41: # %bb.0:
333-
; SSE41-NEXT: movl $255, %eax
334-
; SSE41-NEXT: pinsrb $0, %eax, %xmm0
335-
; SSE41-NEXT: pinsrb $15, %eax, %xmm0
267+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
336268
; SSE41-NEXT: retq
337269
;
338270
; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
339271
; AVX: # %bb.0:
340-
; AVX-NEXT: movl $255, %eax
341-
; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
342-
; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
272+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
343273
; AVX-NEXT: retq
344274
%1 = insertelement <16 x i8> %a, i8 -1, i32 0
345275
%2 = insertelement <16 x i8> %1, i8 -1, i32 15
@@ -350,69 +280,31 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
350280
; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
351281
; SSE2: # %bb.0:
352282
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
353-
; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
354-
; SSE2-NEXT: orps %xmm2, %xmm0
355283
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
356-
; SSE2-NEXT: orps %xmm2, %xmm1
357284
; SSE2-NEXT: retq
358285
;
359286
; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
360287
; SSE3: # %bb.0:
361288
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
362-
; SSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
363-
; SSE3-NEXT: orps %xmm2, %xmm0
364289
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
365-
; SSE3-NEXT: orps %xmm2, %xmm1
366290
; SSE3-NEXT: retq
367291
;
368292
; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
369293
; SSSE3: # %bb.0:
370294
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
371-
; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
372-
; SSSE3-NEXT: orps %xmm2, %xmm0
373295
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
374-
; SSSE3-NEXT: orps %xmm2, %xmm1
375296
; SSSE3-NEXT: retq
376297
;
377298
; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
378299
; SSE41: # %bb.0:
379-
; SSE41-NEXT: movl $255, %eax
380-
; SSE41-NEXT: pinsrb $0, %eax, %xmm0
381-
; SSE41-NEXT: pinsrb $15, %eax, %xmm0
382-
; SSE41-NEXT: pinsrb $14, %eax, %xmm1
383-
; SSE41-NEXT: pinsrb $15, %eax, %xmm1
300+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
301+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
384302
; SSE41-NEXT: retq
385303
;
386-
; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
387-
; AVX1: # %bb.0:
388-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [255,0,0,0]
389-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
390-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
391-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
392-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
393-
; AVX1-NEXT: retq
394-
;
395-
; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
396-
; AVX2: # %bb.0:
397-
; AVX2-NEXT: movl $255, %eax
398-
; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
399-
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
400-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
401-
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
402-
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
403-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
404-
; AVX2-NEXT: retq
405-
;
406-
; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
407-
; AVX512: # %bb.0:
408-
; AVX512-NEXT: movl $255, %eax
409-
; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
410-
; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
411-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
412-
; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
413-
; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
414-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
415-
; AVX512-NEXT: retq
304+
; AVX-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
305+
; AVX: # %bb.0:
306+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307+
; AVX-NEXT: retq
416308
%1 = insertelement <32 x i8> %a, i8 -1, i32 0
417309
%2 = insertelement <32 x i8> %1, i8 -1, i32 15
418310
%3 = insertelement <32 x i8> %2, i8 -1, i32 30

0 commit comments

Comments
 (0)