Skip to content

[AArch64] Custom lower v4i8 subreg extract. #133438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/davemgreen/a64-zipoverushll
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);

setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8, Custom);

setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
Expand Down Expand Up @@ -27308,12 +27310,22 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
SDLoc DL(N);

if (N->getValueType(0) == MVT::v4i8 &&
N->getOperand(0).getValueType() == MVT::v8i8 &&
(N->getConstantOperandVal(1) == 0 || N->getConstantOperandVal(1) == 4)) {
SDValue Ext =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::v8i16, N->getOperand(0));
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
N->getOperand(1));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i8, Ext));
}

// Common code will handle these just fine.
if (!InVT.isScalableVector() || !InVT.isInteger())
return;

SDLoc DL(N);
EVT VT = N->getValueType(0);

// The following checks bail if this is not a halving operation.
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,15 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v3i16:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ldr s0, [x0]
; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8
; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: fsext_v3i16:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ldr s0, [x0]
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%x = load <3 x i8>, ptr %a
Expand Down
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
; CHECK-NEXT: movi.2d v2, #0000000000000000
; CHECK-NEXT: uaddlv.8b h1, v0
; CHECK-NEXT: stp q0, q0, [x0, #32]
; CHECK-NEXT: mov.h v2[0], v1[0]
; CHECK-NEXT: mov.b v2[0], v1[0]
; CHECK-NEXT: zip1.8b v2, v2, v2
; CHECK-NEXT: bic.4h v2, #255, lsl #8
; CHECK-NEXT: ushll.4s v2, v2, #0
; CHECK-NEXT: ucvtf.4s v2, v2
Expand All @@ -303,8 +304,9 @@ define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) {
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
; CHECK-NEXT: uaddlv.8b h1, v0
; CHECK-NEXT: mov.h v0[0], v1[0]
; CHECK-NEXT: bic.4h v0, #7, lsl #8
; CHECK-NEXT: mov.b v0[0], v1[0]
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: ucvtf.4s v0, v0
; CHECK-NEXT: str q0, [x0]
Expand Down Expand Up @@ -433,7 +435,8 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
; CHECK-NEXT: movi.2d v1, #0000000000000000
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
; CHECK-NEXT: uaddlv.4s d0, v0
; CHECK-NEXT: mov.h v1[0], v0[0]
; CHECK-NEXT: mov.b v1[0], v0[0]
; CHECK-NEXT: zip1.8b v1, v1, v1
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: ucvtf.4s v1, v1
Expand All @@ -457,7 +460,8 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
; CHECK-NEXT: movi.2d v2, #0000000000000000
; CHECK-NEXT: uaddlv.4s d0, v0
; CHECK-NEXT: stp q2, q2, [x0, #32]
; CHECK-NEXT: mov.h v1[0], v0[0]
; CHECK-NEXT: mov.b v1[0], v0[0]
; CHECK-NEXT: zip1.8b v1, v1, v1
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: ucvtf.4s v1, v1
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ define void @v3i8(ptr %p1, ptr %p2) {
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ldr s1, [x1]
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: umov w8, v0.h[2]
; CHECK-SD-NEXT: str s1, [sp, #12]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/andorxor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ define void @and_v3i8(ptr %p1, ptr %p2) {
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ldr s1, [x1]
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: umov w8, v0.h[2]
Expand Down Expand Up @@ -340,7 +340,7 @@ define void @or_v3i8(ptr %p1, ptr %p2) {
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ldr s1, [x1]
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: umov w8, v0.h[2]
Expand Down Expand Up @@ -388,7 +388,7 @@ define void @xor_v3i8(ptr %p1, ptr %p2) {
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ldr s1, [x1]
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: umov w8, v0.h[2]
Expand Down
43 changes: 15 additions & 28 deletions llvm/test/CodeGen/AArch64/bitcast-extend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,14 @@ define <4 x i32> @z_i32_v4i32(i32 %x) {
define <4 x i64> @z_i32_v4i64(i32 %x) {
; CHECK-SD-LABEL: z_i32_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
; CHECK-SD-NEXT: umov w8, v0.b[2]
; CHECK-SD-NEXT: umov w9, v0.b[0]
; CHECK-SD-NEXT: umov w10, v0.b[3]
; CHECK-SD-NEXT: umov w11, v0.b[1]
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: fmov s2, w8
; CHECK-SD-NEXT: mov v0.s[1], w11
; CHECK-SD-NEXT: mov v2.s[1], w10
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b
; CHECK-SD-NEXT: fmov s1, w0
; CHECK-SD-NEXT: movi v0.2d, #0x000000000000ff
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-SD-NEXT: ushll2 v2.2d, v1.4s, #0
; CHECK-SD-NEXT: ushll v3.2d, v1.2s, #0
; CHECK-SD-NEXT: and v1.16b, v2.16b, v0.16b
; CHECK-SD-NEXT: and v0.16b, v3.16b, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: z_i32_v4i64:
Expand Down Expand Up @@ -112,9 +106,8 @@ define <4 x i16> @s_i32_v4i16(i32 %x) {
; CHECK-SD-LABEL: s_i32_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_i32_v4i16:
Expand Down Expand Up @@ -176,20 +169,14 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
; CHECK-SD-LABEL: s_i32_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: umov w8, v0.b[2]
; CHECK-SD-NEXT: umov w9, v0.b[0]
; CHECK-SD-NEXT: umov w10, v0.b[3]
; CHECK-SD-NEXT: umov w11, v0.b[1]
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: mov v0.s[1], w11
; CHECK-SD-NEXT: mov v1.s[1], w10
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #56
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_i32_v4i64:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ define void @v3i8(ptr %p1) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: movi v0.4h, #8
; CHECK-SD-NEXT: ldr s1, [x0]
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: movi v0.4h, #8
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
; CHECK-SD-NEXT: clz v1.4h, v1.4h
; CHECK-SD-NEXT: sub v0.4h, v1.4h, v0.4h
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/extbinopload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1366,11 +1366,11 @@ define <4 x i32> @atomic(ptr %p) {
; CHECK-LABEL: atomic:
; CHECK: // %bb.0:
; CHECK-NEXT: ldar w8, [x0]
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
; CHECK-NEXT: ldr s1, [x0, #4]
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: zip1 v2.8b, v2.8b, v0.8b
; CHECK-NEXT: zip1 v2.8b, v2.8b, v2.8b
; CHECK-NEXT: ushll v1.4s, v1.4h, #3
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
; CHECK-LABEL: load_v4i8_2_2:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: mov v0.s[1], v2.s[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
Expand Down
Loading
Loading