Skip to content

Commit db505c8

Browse files
committed
[AArch64] Custom lower v4i8 subreg extract.
A v4i8 extract will usually be scalarized. This prevents that during lowering, converting it to an anyext and larger v4i16 subvector extract. There are a few minor regressions that are fixed up in a followup.
1 parent c8dc571 commit db505c8

16 files changed

+492
-784
lines changed

Diff for: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14261426
setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
14271427
setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
14281428

1429+
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8, Custom);
1430+
14291431
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
14301432
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
14311433
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
@@ -27308,12 +27310,22 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
2730827310
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2730927311
SDValue In = N->getOperand(0);
2731027312
EVT InVT = In.getValueType();
27313+
SDLoc DL(N);
27314+
27315+
if (N->getValueType(0) == MVT::v4i8 &&
27316+
N->getOperand(0).getValueType() == MVT::v8i8 &&
27317+
(N->getConstantOperandVal(1) == 0 || N->getConstantOperandVal(1) == 4)) {
27318+
SDValue Ext =
27319+
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::v8i16, N->getOperand(0));
27320+
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
27321+
N->getOperand(1));
27322+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i8, Ext));
27323+
}
2731127324

2731227325
// Common code will handle these just fine.
2731327326
if (!InVT.isScalableVector() || !InVT.isInteger())
2731427327
return;
2731527328

27316-
SDLoc DL(N);
2731727329
EVT VT = N->getValueType(0);
2731827330

2731927331
// The following checks bail if this is not a halving operation.

Diff for: llvm/test/CodeGen/AArch64/aarch64-load-ext.ll

+3-6
Original file line numberDiff line numberDiff line change
@@ -273,18 +273,15 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
273273
; CHECK-LE-LABEL: fsext_v3i16:
274274
; CHECK-LE: // %bb.0:
275275
; CHECK-LE-NEXT: ldr s0, [x0]
276-
; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b
277-
; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8
278-
; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8
276+
; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
277+
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
279278
; CHECK-LE-NEXT: ret
280279
;
281280
; CHECK-BE-LABEL: fsext_v3i16:
282281
; CHECK-BE: // %bb.0:
283282
; CHECK-BE-NEXT: ldr s0, [x0]
284283
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
285-
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
286-
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
287-
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
284+
; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
288285
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
289286
; CHECK-BE-NEXT: ret
290287
%x = load <3 x i8>, ptr %a

Diff for: llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

+9-5
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
281281
; CHECK-NEXT: movi.2d v2, #0000000000000000
282282
; CHECK-NEXT: uaddlv.8b h1, v0
283283
; CHECK-NEXT: stp q0, q0, [x0, #32]
284-
; CHECK-NEXT: mov.h v2[0], v1[0]
284+
; CHECK-NEXT: mov.b v2[0], v1[0]
285+
; CHECK-NEXT: zip1.8b v2, v2, v2
285286
; CHECK-NEXT: bic.4h v2, #255, lsl #8
286287
; CHECK-NEXT: ushll.4s v2, v2, #0
287288
; CHECK-NEXT: ucvtf.4s v2, v2
@@ -303,8 +304,9 @@ define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) {
303304
; CHECK-NEXT: movi.2d v0, #0000000000000000
304305
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
305306
; CHECK-NEXT: uaddlv.8b h1, v0
306-
; CHECK-NEXT: mov.h v0[0], v1[0]
307-
; CHECK-NEXT: bic.4h v0, #7, lsl #8
307+
; CHECK-NEXT: mov.b v0[0], v1[0]
308+
; CHECK-NEXT: zip1.8b v0, v0, v0
309+
; CHECK-NEXT: bic.4h v0, #255, lsl #8
308310
; CHECK-NEXT: ushll.4s v0, v0, #0
309311
; CHECK-NEXT: ucvtf.4s v0, v0
310312
; CHECK-NEXT: str q0, [x0]
@@ -433,7 +435,8 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
433435
; CHECK-NEXT: movi.2d v1, #0000000000000000
434436
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
435437
; CHECK-NEXT: uaddlv.4s d0, v0
436-
; CHECK-NEXT: mov.h v1[0], v0[0]
438+
; CHECK-NEXT: mov.b v1[0], v0[0]
439+
; CHECK-NEXT: zip1.8b v1, v1, v1
437440
; CHECK-NEXT: bic.4h v1, #255, lsl #8
438441
; CHECK-NEXT: ushll.4s v1, v1, #0
439442
; CHECK-NEXT: ucvtf.4s v1, v1
@@ -457,7 +460,8 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
457460
; CHECK-NEXT: movi.2d v2, #0000000000000000
458461
; CHECK-NEXT: uaddlv.4s d0, v0
459462
; CHECK-NEXT: stp q2, q2, [x0, #32]
460-
; CHECK-NEXT: mov.h v1[0], v0[0]
463+
; CHECK-NEXT: mov.b v1[0], v0[0]
464+
; CHECK-NEXT: zip1.8b v1, v1, v1
461465
; CHECK-NEXT: bic.4h v1, #255, lsl #8
462466
; CHECK-NEXT: ushll.4s v1, v1, #0
463467
; CHECK-NEXT: ucvtf.4s v1, v1

Diff for: llvm/test/CodeGen/AArch64/add.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ define void @v3i8(ptr %p1, ptr %p2) {
9797
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
9898
; CHECK-SD-NEXT: ldr s0, [x0]
9999
; CHECK-SD-NEXT: ldr s1, [x1]
100-
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
101-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
102-
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
100+
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
103101
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
104102
; CHECK-SD-NEXT: umov w8, v0.h[2]
105103
; CHECK-SD-NEXT: str s1, [sp, #12]

Diff for: llvm/test/CodeGen/AArch64/andorxor.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ define void @and_v3i8(ptr %p1, ptr %p2) {
292292
; CHECK-SD-NEXT: ldr s0, [x0]
293293
; CHECK-SD-NEXT: ldr s1, [x1]
294294
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
295-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
295+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
296296
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
297297
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
298298
; CHECK-SD-NEXT: umov w8, v0.h[2]
@@ -340,7 +340,7 @@ define void @or_v3i8(ptr %p1, ptr %p2) {
340340
; CHECK-SD-NEXT: ldr s0, [x0]
341341
; CHECK-SD-NEXT: ldr s1, [x1]
342342
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
343-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
343+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
344344
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
345345
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
346346
; CHECK-SD-NEXT: umov w8, v0.h[2]
@@ -388,7 +388,7 @@ define void @xor_v3i8(ptr %p1, ptr %p2) {
388388
; CHECK-SD-NEXT: ldr s0, [x0]
389389
; CHECK-SD-NEXT: ldr s1, [x1]
390390
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
391-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
391+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
392392
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
393393
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
394394
; CHECK-SD-NEXT: umov w8, v0.h[2]

Diff for: llvm/test/CodeGen/AArch64/bitcast-extend.ll

+15-28
Original file line numberDiff line numberDiff line change
@@ -68,20 +68,14 @@ define <4 x i32> @z_i32_v4i32(i32 %x) {
6868
define <4 x i64> @z_i32_v4i64(i32 %x) {
6969
; CHECK-SD-LABEL: z_i32_v4i64:
7070
; CHECK-SD: // %bb.0:
71-
; CHECK-SD-NEXT: fmov s0, w0
72-
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
73-
; CHECK-SD-NEXT: umov w8, v0.b[2]
74-
; CHECK-SD-NEXT: umov w9, v0.b[0]
75-
; CHECK-SD-NEXT: umov w10, v0.b[3]
76-
; CHECK-SD-NEXT: umov w11, v0.b[1]
77-
; CHECK-SD-NEXT: fmov s0, w9
78-
; CHECK-SD-NEXT: fmov s2, w8
79-
; CHECK-SD-NEXT: mov v0.s[1], w11
80-
; CHECK-SD-NEXT: mov v2.s[1], w10
81-
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
82-
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
83-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
84-
; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b
71+
; CHECK-SD-NEXT: fmov s1, w0
72+
; CHECK-SD-NEXT: movi v0.2d, #0x000000000000ff
73+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
74+
; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
75+
; CHECK-SD-NEXT: ushll2 v2.2d, v1.4s, #0
76+
; CHECK-SD-NEXT: ushll v3.2d, v1.2s, #0
77+
; CHECK-SD-NEXT: and v1.16b, v2.16b, v0.16b
78+
; CHECK-SD-NEXT: and v0.16b, v3.16b, v0.16b
8579
; CHECK-SD-NEXT: ret
8680
;
8781
; CHECK-GI-LABEL: z_i32_v4i64:
@@ -112,9 +106,8 @@ define <4 x i16> @s_i32_v4i16(i32 %x) {
112106
; CHECK-SD-LABEL: s_i32_v4i16:
113107
; CHECK-SD: // %bb.0:
114108
; CHECK-SD-NEXT: fmov s0, w0
115-
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
116-
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
117-
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
109+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
110+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
118111
; CHECK-SD-NEXT: ret
119112
;
120113
; CHECK-GI-LABEL: s_i32_v4i16:
@@ -176,20 +169,14 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
176169
; CHECK-SD-LABEL: s_i32_v4i64:
177170
; CHECK-SD: // %bb.0:
178171
; CHECK-SD-NEXT: fmov s0, w0
179-
; CHECK-SD-NEXT: umov w8, v0.b[2]
180-
; CHECK-SD-NEXT: umov w9, v0.b[0]
181-
; CHECK-SD-NEXT: umov w10, v0.b[3]
182-
; CHECK-SD-NEXT: umov w11, v0.b[1]
183-
; CHECK-SD-NEXT: fmov s0, w9
184-
; CHECK-SD-NEXT: fmov s1, w8
185-
; CHECK-SD-NEXT: mov v0.s[1], w11
186-
; CHECK-SD-NEXT: mov v1.s[1], w10
172+
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
173+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
174+
; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
187175
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
188-
; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
189-
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
190176
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
191-
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
177+
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
192178
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #56
179+
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
193180
; CHECK-SD-NEXT: ret
194181
;
195182
; CHECK-GI-LABEL: s_i32_v4i64:

Diff for: llvm/test/CodeGen/AArch64/ctlz.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ define void @v3i8(ptr %p1) {
4242
; CHECK-SD: // %bb.0: // %entry
4343
; CHECK-SD-NEXT: sub sp, sp, #16
4444
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
45-
; CHECK-SD-NEXT: movi v0.4h, #8
4645
; CHECK-SD-NEXT: ldr s1, [x0]
47-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
46+
; CHECK-SD-NEXT: movi v0.4h, #8
47+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
4848
; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
4949
; CHECK-SD-NEXT: clz v1.4h, v1.4h
5050
; CHECK-SD-NEXT: sub v0.4h, v1.4h, v0.4h

Diff for: llvm/test/CodeGen/AArch64/extbinopload.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1366,11 +1366,11 @@ define <4 x i32> @atomic(ptr %p) {
13661366
; CHECK-LABEL: atomic:
13671367
; CHECK: // %bb.0:
13681368
; CHECK-NEXT: ldar w8, [x0]
1369-
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
13701369
; CHECK-NEXT: ldr s1, [x0, #4]
1370+
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
13711371
; CHECK-NEXT: fmov s2, w8
13721372
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1373-
; CHECK-NEXT: zip1 v2.8b, v2.8b, v0.8b
1373+
; CHECK-NEXT: zip1 v2.8b, v2.8b, v2.8b
13741374
; CHECK-NEXT: ushll v1.4s, v1.4h, #3
13751375
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
13761376
; CHECK-NEXT: and v0.16b, v2.16b, v0.16b

Diff for: llvm/test/CodeGen/AArch64/insert-subvector.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
465465
; CHECK-LABEL: load_v4i8_2_2:
466466
; CHECK: // %bb.0:
467467
; CHECK-NEXT: ldr h0, [x0]
468-
; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
468+
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
469469
; CHECK-NEXT: fmov d0, d1
470470
; CHECK-NEXT: mov v0.s[1], v2.s[0]
471471
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0

0 commit comments

Comments
 (0)