Skip to content

Commit 3e5facc

Browse files
RuhungJuhung Limr-c
authored
neon riscv64: Enable RVV segment load/store only when we have __riscv_zvlsseg flag. (#1285)
Co-authored-by: Juhung Li <[email protected]> Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent 6450e25 commit 3e5facc

File tree

7 files changed

+183
-168
lines changed

7 files changed

+183
-168
lines changed

simde/arm/neon/ld2.h

+30-28
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
5959
simde_vget_high_s8(q)
6060
};
6161
return u;
62-
#elif defined(SIMDE_RISCV_V_NATIVE)
62+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
6363
simde_int8x8_private a_[2];
6464
vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 8);
6565
a_[0].sv64 = __riscv_vget_v_i8m1x2_i8m1(dest, 0);
@@ -102,7 +102,7 @@ simde_int16x4x2_t
102102
simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
103103
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
104104
return vld2_s16(ptr);
105-
#elif defined(SIMDE_RISCV_V_NATIVE)
105+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
106106
simde_int16x4_private a_[2];
107107
vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 4);
108108
a_[0].sv64 = __riscv_vget_v_i16m1x2_i16m1(dest, 0);
@@ -152,7 +152,7 @@ simde_int32x2x2_t
152152
simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
153153
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
154154
return vld2_s32(ptr);
155-
#elif defined(SIMDE_RISCV_V_NATIVE)
155+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
156156
simde_int32x2_private a_[2];
157157
vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 2);
158158
a_[0].sv64 = __riscv_vget_v_i32m1x2_i32m1(dest, 0);
@@ -195,7 +195,7 @@ simde_int64x1x2_t
195195
simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
196196
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
197197
return vld2_s64(ptr);
198-
#elif defined(SIMDE_RISCV_V_NATIVE)
198+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
199199
simde_int64x1_private a_[2];
200200
vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 1);
201201
a_[0].sv64 = __riscv_vget_v_i64m1x2_i64m1(dest, 0);
@@ -249,7 +249,7 @@ simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
249249
simde_vget_high_u8(q)
250250
};
251251
return u;
252-
#elif defined(SIMDE_RISCV_V_NATIVE)
252+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
253253
simde_uint8x8_private a_[2];
254254
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8);
255255
a_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
@@ -292,7 +292,7 @@ simde_uint16x4x2_t
292292
simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
293293
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
294294
return vld2_u16(ptr);
295-
#elif defined(SIMDE_RISCV_V_NATIVE)
295+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
296296
simde_uint16x4_private a_[2];
297297
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4);
298298
a_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
@@ -342,7 +342,7 @@ simde_uint32x2x2_t
342342
simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
343343
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
344344
return vld2_u32(ptr);
345-
#elif defined(SIMDE_RISCV_V_NATIVE)
345+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
346346
simde_uint32x2_private a_[2];
347347
vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 2);
348348
a_[0].sv64 = __riscv_vget_v_u32m1x2_u32m1(dest, 0);
@@ -385,7 +385,7 @@ simde_uint64x1x2_t
385385
simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
386386
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
387387
return vld2_u64(ptr);
388-
#elif defined(SIMDE_RISCV_V_NATIVE)
388+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
389389
simde_uint64x1_private a_[2];
390390
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1);
391391
a_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
@@ -428,7 +428,8 @@ simde_float16x4x2_t
428428
simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
429429
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
430430
return vld2_f16(ptr);
431-
#elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
431+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \
432+
&& SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
432433
simde_float16x4_private r_[2];
433434
vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 4);
434435
r_[0].sv64 = __riscv_vget_v_f16m1x2_f16m1(dest, 0);
@@ -466,7 +467,7 @@ simde_float32x2x2_t
466467
simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
467468
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
468469
return vld2_f32(ptr);
469-
#elif defined(SIMDE_RISCV_V_NATIVE)
470+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
470471
simde_float32x2_private r_[2];
471472
vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 2);
472473
r_[0].sv64 = __riscv_vget_v_f32m1x2_f32m1(dest, 0);
@@ -509,7 +510,7 @@ simde_float64x1x2_t
509510
simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
510511
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
511512
return vld2_f64(ptr);
512-
#elif defined(SIMDE_RISCV_V_NATIVE)
513+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
513514
simde_float64x1_private r_[2];
514515
vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 1);
515516
r_[0].sv64 = __riscv_vget_v_f64m1x2_f64m1(dest, 0);
@@ -552,7 +553,7 @@ simde_int8x16x2_t
552553
simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
553554
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
554555
return vld2q_s8(ptr);
555-
#elif defined(SIMDE_RISCV_V_NATIVE)
556+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
556557
simde_int8x16_private a_[2];
557558
vint8m1x2_t dest = __riscv_vlseg2e8_v_i8m1x2(&ptr[0], 16);
558559
a_[0].sv128 = __riscv_vget_v_i8m1x2_i8m1(dest, 0);
@@ -602,7 +603,7 @@ simde_int32x4x2_t
602603
simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
603604
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
604605
return vld2q_s32(ptr);
605-
#elif defined(SIMDE_RISCV_V_NATIVE)
606+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
606607
simde_int32x4_private a_[2];
607608
vint32m1x2_t dest = __riscv_vlseg2e32_v_i32m1x2(&ptr[0], 4);
608609
a_[0].sv128 = __riscv_vget_v_i32m1x2_i32m1(dest, 0);
@@ -652,7 +653,7 @@ simde_int16x8x2_t
652653
simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
653654
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
654655
return vld2q_s16(ptr);
655-
#elif defined(SIMDE_RISCV_V_NATIVE)
656+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
656657
simde_int16x8_private r_[2];
657658
vint16m1x2_t dest = __riscv_vlseg2e16_v_i16m1x2(&ptr[0], 8);
658659
r_[0].sv128 = __riscv_vget_v_i16m1x2_i16m1(dest, 0);
@@ -702,7 +703,7 @@ simde_int64x2x2_t
702703
simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
703704
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
704705
return vld2q_s64(ptr);
705-
#elif defined(SIMDE_RISCV_V_NATIVE)
706+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
706707
simde_int64x2_private r_[2];
707708
vint64m1x2_t dest = __riscv_vlseg2e64_v_i64m1x2(&ptr[0], 2);
708709
r_[0].sv128 = __riscv_vget_v_i64m1x2_i64m1(dest, 0);
@@ -739,7 +740,7 @@ simde_uint8x16x2_t
739740
simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
740741
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
741742
return vld2q_u8(ptr);
742-
#elif defined(SIMDE_RISCV_V_NATIVE)
743+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
743744
simde_uint8x16_private r_[2];
744745
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16);
745746
r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
@@ -789,7 +790,7 @@ simde_uint16x8x2_t
789790
simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
790791
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
791792
return vld2q_u16(ptr);
792-
#elif defined(SIMDE_RISCV_V_NATIVE)
793+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
793794
simde_uint16x8_private r_[2];
794795
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8);
795796
r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
@@ -839,7 +840,7 @@ simde_uint32x4x2_t
839840
simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
840841
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
841842
return vld2q_u32(ptr);
842-
#elif defined(SIMDE_RISCV_V_NATIVE)
843+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
843844
simde_uint32x4_private r_[2];
844845
vuint32m1x2_t dest = __riscv_vlseg2e32_v_u32m1x2(&ptr[0], 4);
845846
r_[0].sv128 = __riscv_vget_v_u32m1x2_u32m1(dest, 0);
@@ -889,7 +890,7 @@ simde_uint64x2x2_t
889890
simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
890891
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
891892
return vld2q_u64(ptr);
892-
#elif defined(SIMDE_RISCV_V_NATIVE)
893+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
893894
simde_uint64x2_private r_[2];
894895
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2);
895896
r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
@@ -926,7 +927,8 @@ simde_float16x8x2_t
926927
simde_vld2q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
927928
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
928929
return vld2q_f16(ptr);
929-
#elif defined(SIMDE_RISCV_V_NATIVE) && SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
930+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG) \
931+
&& SIMDE_ARCH_RISCV_ZVFH && (SIMDE_NATURAL_VECTOR_SIZE >= 128)
930932
simde_float16x8_private r_[2];
931933
vfloat16m1x2_t dest = __riscv_vlseg2e16_v_f16m1x2((_Float16 *)&ptr[0], 8);
932934
r_[0].sv128 = __riscv_vget_v_f16m1x2_f16m1(dest, 0);
@@ -971,7 +973,7 @@ simde_float32x4x2_t
971973
simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
972974
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
973975
return vld2q_f32(ptr);
974-
#elif defined(SIMDE_RISCV_V_NATIVE)
976+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
975977
simde_float32x4_private r_[2];
976978
vfloat32m1x2_t dest = __riscv_vlseg2e32_v_f32m1x2(&ptr[0], 4);
977979
r_[0].sv128 = __riscv_vget_v_f32m1x2_f32m1(dest, 0);
@@ -1021,7 +1023,7 @@ simde_float64x2x2_t
10211023
simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
10221024
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
10231025
return vld2q_f64(ptr);
1024-
#elif defined(SIMDE_RISCV_V_NATIVE)
1026+
#elif defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
10251027
simde_float64x2_private r_[2];
10261028
vfloat64m1x2_t dest = __riscv_vlseg2e64_v_f64m1x2(&ptr[0], 2);
10271029
r_[0].sv128 = __riscv_vget_v_f64m1x2_f64m1(dest, 0);
@@ -1060,7 +1062,7 @@ simde_vld2_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
10601062
return vld2_p8(ptr);
10611063
#else
10621064
simde_poly8x8_private r_[2];
1063-
#if defined(SIMDE_RISCV_V_NATIVE)
1065+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
10641066
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 8);
10651067
r_[0].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
10661068
r_[1].sv64 = __riscv_vget_v_u8m1x2_u8m1(dest, 1);
@@ -1095,7 +1097,7 @@ simde_vld2_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
10951097
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
10961098
#endif
10971099
simde_poly16x4_private r_[2];
1098-
#if defined(SIMDE_RISCV_V_NATIVE)
1100+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
10991101
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 4);
11001102
r_[0].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
11011103
r_[1].sv64 = __riscv_vget_v_u16m1x2_u16m1(dest, 1);
@@ -1131,7 +1133,7 @@ simde_vld2_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
11311133
#else
11321134
simde_poly64x1_private r_[2];
11331135

1134-
#if defined(SIMDE_RISCV_V_NATIVE)
1136+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
11351137
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 1);
11361138
r_[0].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
11371139
r_[1].sv64 = __riscv_vget_v_u64m1x2_u64m1(dest, 1);
@@ -1168,7 +1170,7 @@ simde_vld2q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) {
11681170
#endif
11691171
simde_poly8x16_private r_[2];
11701172

1171-
#if defined(SIMDE_RISCV_V_NATIVE)
1173+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
11721174
vuint8m1x2_t dest = __riscv_vlseg2e8_v_u8m1x2(&ptr[0], 16);
11731175
r_[0].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 0);
11741176
r_[1].sv128 = __riscv_vget_v_u8m1x2_u8m1(dest, 1);
@@ -1208,7 +1210,7 @@ simde_vld2q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
12081210
#endif
12091211
simde_poly16x8_private r_[2];
12101212

1211-
#if defined(SIMDE_RISCV_V_NATIVE)
1213+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
12121214
vuint16m1x2_t dest = __riscv_vlseg2e16_v_u16m1x2(&ptr[0], 8);
12131215
r_[0].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 0);
12141216
r_[1].sv128 = __riscv_vget_v_u16m1x2_u16m1(dest, 1);
@@ -1244,7 +1246,7 @@ simde_vld2q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
12441246
#else
12451247
simde_poly64x2_private r_[2];
12461248

1247-
#if defined(SIMDE_RISCV_V_NATIVE)
1249+
#if defined(SIMDE_RISCV_V_NATIVE) && defined(SIMDE_ARCH_RISCV_ZVLSSEG)
12481250
vuint64m1x2_t dest = __riscv_vlseg2e64_v_u64m1x2(&ptr[0], 2);
12491251
r_[0].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 0);
12501252
r_[1].sv128 = __riscv_vget_v_u64m1x2_u64m1(dest, 1);

0 commit comments

Comments
 (0)