Skip to content

Commit 3ef67c1

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vshlq_v (#1042)
1 parent 723e78a commit 3ef67c1

File tree

2 files changed

+111
-66
lines changed

2 files changed

+111
-66
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -2452,6 +2452,11 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
24522452
: "llvm.aarch64.neon.srhadd";
24532453
break;
24542454
}
2455+
case NEON::BI__builtin_neon_vshlq_v: {
2456+
intrincsName = (intrinicId != altLLVMIntrinsic) ? "llvm.aarch64.neon.ushl"
2457+
: "llvm.aarch64.neon.sshl";
2458+
break;
2459+
}
24552460
case NEON::BI__builtin_neon_vhadd_v:
24562461
case NEON::BI__builtin_neon_vhaddq_v: {
24572462
intrincsName = (intrinicId != altLLVMIntrinsic) ? "llvm.aarch64.neon.uhadd"

clang/test/CIR/CodeGen/AArch64/neon.c

+106-66
Original file line numberDiff line numberDiff line change
@@ -3405,79 +3405,119 @@ int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
34053405
// return vshl_u64(a, b);
34063406
// }
34073407

3408-
// NYI-LABEL: @test_vshlq_s8(
3409-
// NYI: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3410-
// NYI: ret <16 x i8> [[VSHLQ_V_I]]
3411-
// int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3412-
// return vshlq_s8(a, b);
3413-
// }
3408+
int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3409+
return vshlq_s8(a, b);
34143410

3415-
// NYI-LABEL: @test_vshlq_s16(
3416-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3417-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3418-
// NYI: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3419-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3420-
// NYI: ret <8 x i16> [[VSHLQ_V2_I]]
3421-
// int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3422-
// return vshlq_s16(a, b);
3423-
// }
3411+
// CIR-LABEL: vshlq_s8
3412+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sshl" {{%.*}}, {{%.*}} :
3413+
// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
34243414

3425-
// NYI-LABEL: @test_vshlq_s32(
3426-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3427-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3428-
// NYI: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3429-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3430-
// NYI: ret <4 x i32> [[VSHLQ_V2_I]]
3431-
// int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3432-
// return vshlq_s32(a, b);
3433-
// }
3415+
// LLVM: {{.*}}test_vshlq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
3416+
// LLVM: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
3417+
// LLVM: ret <16 x i8> [[VSHLQ_V_I]]
3418+
}
34343419

3435-
// NYI-LABEL: @test_vshlq_s64(
3436-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3437-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3438-
// NYI: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3439-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3440-
// NYI: ret <2 x i64> [[VSHLQ_V2_I]]
3441-
// int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3442-
// return vshlq_s64(a, b);
3443-
// }
3420+
int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3421+
return vshlq_s16(a, b);
34443422

3445-
// NYI-LABEL: @test_vshlq_u8(
3446-
// NYI: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3447-
// NYI: ret <16 x i8> [[VSHLQ_V_I]]
3448-
// uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3449-
// return vshlq_u8(a, b);
3450-
// }
3423+
// CIR-LABEL: vshlq_s16
3424+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sshl" {{%.*}}, {{%.*}} :
3425+
// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
34513426

3452-
// NYI-LABEL: @test_vshlq_u16(
3453-
// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3454-
// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3455-
// NYI: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3456-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3457-
// NYI: ret <8 x i16> [[VSHLQ_V2_I]]
3458-
// uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3459-
// return vshlq_u16(a, b);
3460-
// }
3427+
// LLVM: {{.*}}test_vshlq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
3428+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
3429+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
3430+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
3431+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3432+
// LLVM: ret <8 x i16> [[VSHLQ_V2_I]]
3433+
}
34613434

3462-
// NYI-LABEL: @test_vshlq_u32(
3463-
// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3464-
// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3465-
// NYI: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3466-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3467-
// NYI: ret <4 x i32> [[VSHLQ_V2_I]]
3468-
// uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3469-
// return vshlq_u32(a, b);
3470-
// }
3435+
int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3436+
return vshlq_s32(a, b);
34713437

3472-
// NYI-LABEL: @test_vshlq_u64(
3473-
// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3474-
// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3475-
// NYI: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3476-
// NYI: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3477-
// NYI: ret <2 x i64> [[VSHLQ_V2_I]]
3478-
// uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3479-
// return vshlq_u64(a, b);
3480-
// }
3438+
// CIR-LABEL: vshlq_s32
3439+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sshl" {{%.*}}, {{%.*}} :
3440+
// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
3441+
3442+
// LLVM: {{.*}}test_vshlq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
3443+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
3444+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
3445+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
3446+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3447+
// LLVM: ret <4 x i32> [[VSHLQ_V2_I]]
3448+
}
3449+
3450+
int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3451+
return vshlq_s64(a, b);
3452+
3453+
// CIR-LABEL: vshlq_s64
3454+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sshl" {{%.*}}, {{%.*}} :
3455+
// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
3456+
3457+
// LLVM: {{.*}}test_vshlq_s64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
3458+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
3459+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
3460+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
3461+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3462+
// LLVM: ret <2 x i64> [[VSHLQ_V2_I]]
3463+
}
3464+
3465+
uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3466+
return vshlq_u8(a, b);
3467+
3468+
// CIR-LABEL: vshlq_u8
3469+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.ushl" {{%.*}}, {{%.*}} :
3470+
// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
3471+
3472+
// LLVM: {{.*}}test_vshlq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
3473+
// LLVM: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
3474+
// LLVM: ret <16 x i8> [[VSHLQ_V_I]]
3475+
}
3476+
3477+
uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3478+
return vshlq_u16(a, b);
3479+
3480+
// CIR-LABEL: vshlq_u16
3481+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.ushl" {{%.*}}, {{%.*}} :
3482+
// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
3483+
3484+
// LLVM: {{.*}}test_vshlq_u16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
3485+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
3486+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
3487+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
3488+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3489+
// LLVM: ret <8 x i16> [[VSHLQ_V2_I]]
3490+
}
3491+
3492+
uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3493+
return vshlq_u32(a, b);
3494+
3495+
// CIR-LABEL: vshlq_u32
3496+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.ushl" {{%.*}}, {{%.*}} :
3497+
// CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
3498+
3499+
// LLVM: {{.*}}test_vshlq_u32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
3500+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
3501+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
3502+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
3503+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3504+
// LLVM: ret <4 x i32> [[VSHLQ_V2_I]]
3505+
}
3506+
3507+
uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3508+
return vshlq_u64(a, b);
3509+
3510+
// CIR-LABEL: vshlq_u64
3511+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.ushl" {{%.*}}, {{%.*}} :
3512+
// CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
3513+
3514+
// LLVM: {{.*}}test_vshlq_u64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
3515+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
3516+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
3517+
// LLVM: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
3518+
// LLVM: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3519+
// LLVM: ret <2 x i64> [[VSHLQ_V2_I]]
3520+
}
34813521

34823522
// NYI-LABEL: @test_vqshl_s8(
34833523
// NYI: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)

0 commit comments

Comments
 (0)