24
24
* 2020 Evan Nemerson <[email protected] >
25
25
* 2020 Sean Maher <[email protected] > (Copyright owned by Google, LLC)
26
26
* 2023 Yi-Yen Chung <[email protected] > (Copyright owned by Andes Technology)
27
+ * 2023 Yung-Cheng Su <[email protected] >
27
28
*/
28
29
29
30
#if !defined(SIMDE_ARM_NEON_MAX_H )
@@ -96,14 +97,27 @@ simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) {
96
97
a_ = simde_float32x2_to_private (a ),
97
98
b_ = simde_float32x2_to_private (b );
98
99
99
- SIMDE_VECTORIZE
100
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
100
+ #if defined(SIMDE_RISCV_V_NATIVE )
101
101
#if !defined(SIMDE_FAST_NANS )
102
- r_ .values [i ] = (a_ .values [i ] >= b_ .values [i ]) ? a_ .values [i ] : ((a_ .values [i ] < b_ .values [i ]) ? b_ .values [i ] : SIMDE_MATH_NANF );
102
+ vbool32_t va_mask = __riscv_vmseq_vx_u32m1_b32 (__riscv_vfclass_v_u32m1 (a_ .sv64 , 2 ) , 512 , 2 );
103
+ vbool32_t vb_mask = __riscv_vmseq_vx_u32m1_b32 (__riscv_vfclass_v_u32m1 (b_ .sv64 , 2 ) , 512 , 2 );
104
+ vbool32_t vab_mask = __riscv_vmnor_mm_b32 (va_mask , vb_mask , 2 );
105
+ vfloat32m1_t vnan = __riscv_vfmv_v_f_f32m1 (SIMDE_MATH_NANF , 2 );
106
+ r_ .sv64 = __riscv_vfmax_vv_f32m1_m (vab_mask , a_ .sv64 , b_ .sv64 , 2 );
107
+ r_ .sv64 = __riscv_vmerge_vvm_f32m1 (vnan , r_ .sv64 , vab_mask , 2 );
103
108
#else
104
- r_ .values [ i ] = (a_ .values [ i ] > b_ .values [ i ]) ? a_ . values [ i ] : b_ . values [ i ] ;
109
+ r_ .sv64 = __riscv_vfmax_vv_f32m1 (a_ .sv64 , b_ .sv64 , 2 ) ;
105
110
#endif
106
- }
111
+ #else
112
+ SIMDE_VECTORIZE
113
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
114
+ #if !defined(SIMDE_FAST_NANS )
115
+ r_ .values [i ] = (a_ .values [i ] >= b_ .values [i ]) ? a_ .values [i ] : ((a_ .values [i ] < b_ .values [i ]) ? b_ .values [i ] : SIMDE_MATH_NANF );
116
+ #else
117
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
118
+ #endif
119
+ }
120
+ #endif
107
121
108
122
return simde_float32x2_from_private (r_ );
109
123
#endif
@@ -124,14 +138,28 @@ simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) {
124
138
a_ = simde_float64x1_to_private (a ),
125
139
b_ = simde_float64x1_to_private (b );
126
140
127
- SIMDE_VECTORIZE
128
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
141
+ #if defined(SIMDE_RISCV_V_NATIVE )
129
142
#if !defined(SIMDE_FAST_NANS )
130
- r_ .values [i ] = (a_ .values [i ] >= b_ .values [i ]) ? a_ .values [i ] : ((a_ .values [i ] < b_ .values [i ]) ? b_ .values [i ] : SIMDE_MATH_NAN );
143
+ simde_float64 nan = SIMDE_MATH_NAN ;
144
+ vbool64_t va_mask = __riscv_vmseq_vx_u64m1_b64 (__riscv_vfclass_v_u64m1 (a_ .sv64 , 1 ) , 512 , 1 );
145
+ vbool64_t vb_mask = __riscv_vmseq_vx_u64m1_b64 (__riscv_vfclass_v_u64m1 (b_ .sv64 , 1 ) , 512 , 1 );
146
+ vbool64_t vab_mask = __riscv_vmnor_mm_b64 (va_mask , vb_mask , 1 );
147
+ vfloat64m1_t vnan = __riscv_vfmv_v_f_f64m1 (nan , 1 );
148
+ r_ .sv64 = __riscv_vfmax_vv_f64m1_m (vab_mask , a_ .sv64 , b_ .sv64 , 1 );
149
+ r_ .sv64 = __riscv_vmerge_vvm_f64m1 (vnan , r_ .sv64 , vab_mask , 1 );
131
150
#else
132
- r_ .values [ i ] = (a_ .values [ i ] > b_ .values [ i ]) ? a_ . values [ i ] : b_ . values [ i ] ;
151
+ r_ .sv64 = __riscv_vfmax_vv_f64m1 (a_ .sv64 , b_ .sv64 , 1 ) ;
133
152
#endif
134
- }
153
+ #else
154
+ SIMDE_VECTORIZE
155
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
156
+ #if !defined(SIMDE_FAST_NANS )
157
+ r_ .values [i ] = (a_ .values [i ] >= b_ .values [i ]) ? a_ .values [i ] : ((a_ .values [i ] < b_ .values [i ]) ? b_ .values [i ] : SIMDE_MATH_NAN );
158
+ #else
159
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
160
+ #endif
161
+ }
162
+ #endif
135
163
136
164
return simde_float64x1_from_private (r_ );
137
165
#endif
@@ -154,10 +182,14 @@ simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) {
154
182
a_ = simde_int8x8_to_private (a ),
155
183
b_ = simde_int8x8_to_private (b );
156
184
157
- SIMDE_VECTORIZE
158
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
159
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
160
- }
185
+ #if defined(SIMDE_RISCV_V_NATIVE )
186
+ r_ .sv64 = __riscv_vmax_vv_i8m1 (a_ .sv64 , b_ .sv64 , 8 );
187
+ #else
188
+ SIMDE_VECTORIZE
189
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
190
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
191
+ }
192
+ #endif
161
193
162
194
return simde_int8x8_from_private (r_ );
163
195
#endif
@@ -180,10 +212,14 @@ simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) {
180
212
a_ = simde_int16x4_to_private (a ),
181
213
b_ = simde_int16x4_to_private (b );
182
214
183
- SIMDE_VECTORIZE
184
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
185
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
186
- }
215
+ #if defined(SIMDE_RISCV_V_NATIVE )
216
+ r_ .sv64 = __riscv_vmax_vv_i16m1 (a_ .sv64 , b_ .sv64 , 4 );
217
+ #else
218
+ SIMDE_VECTORIZE
219
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
220
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
221
+ }
222
+ #endif
187
223
188
224
return simde_int16x4_from_private (r_ );
189
225
#endif
@@ -206,10 +242,14 @@ simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) {
206
242
a_ = simde_int32x2_to_private (a ),
207
243
b_ = simde_int32x2_to_private (b );
208
244
209
- SIMDE_VECTORIZE
210
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
211
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
212
- }
245
+ #if defined(SIMDE_RISCV_V_NATIVE )
246
+ r_ .sv64 = __riscv_vmax_vv_i32m1 (a_ .sv64 , b_ .sv64 , 2 );
247
+ #else
248
+ SIMDE_VECTORIZE
249
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
250
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
251
+ }
252
+ #endif
213
253
214
254
return simde_int32x2_from_private (r_ );
215
255
#endif
@@ -230,10 +270,14 @@ simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) {
230
270
a_ = simde_int64x1_to_private (a ),
231
271
b_ = simde_int64x1_to_private (b );
232
272
233
- SIMDE_VECTORIZE
234
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
235
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
236
- }
273
+ #if defined(SIMDE_RISCV_V_NATIVE )
274
+ r_ .sv64 = __riscv_vmax_vv_i64m1 (a_ .sv64 , b_ .sv64 , 1 );
275
+ #else
276
+ SIMDE_VECTORIZE
277
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
278
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
279
+ }
280
+ #endif
237
281
238
282
return simde_int64x1_from_private (r_ );
239
283
#endif
@@ -252,10 +296,14 @@ simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
252
296
a_ = simde_uint8x8_to_private (a ),
253
297
b_ = simde_uint8x8_to_private (b );
254
298
255
- SIMDE_VECTORIZE
256
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
257
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
258
- }
299
+ #if defined(SIMDE_RISCV_V_NATIVE )
300
+ r_ .sv64 = __riscv_vmaxu_vv_u8m1 (a_ .sv64 , b_ .sv64 , 8 );
301
+ #else
302
+ SIMDE_VECTORIZE
303
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
304
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
305
+ }
306
+ #endif
259
307
260
308
return simde_uint8x8_from_private (r_ );
261
309
#endif
@@ -281,6 +329,8 @@ simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
281
329
#if defined(SIMDE_X86_MMX_NATIVE )
282
330
/* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */
283
331
r_ .m64 = _mm_add_pi16 (b_ .m64 , _mm_subs_pu16 (a_ .m64 , b_ .m64 ));
332
+ #elif defined(SIMDE_RISCV_V_NATIVE )
333
+ r_ .sv64 = __riscv_vmaxu_vv_u16m1 (a_ .sv64 , b_ .sv64 , 4 );
284
334
#else
285
335
SIMDE_VECTORIZE
286
336
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
@@ -309,10 +359,14 @@ simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
309
359
a_ = simde_uint32x2_to_private (a ),
310
360
b_ = simde_uint32x2_to_private (b );
311
361
312
- SIMDE_VECTORIZE
313
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
314
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
315
- }
362
+ #if defined(SIMDE_RISCV_V_NATIVE )
363
+ r_ .sv64 = __riscv_vmaxu_vv_u32m1 (a_ .sv64 , b_ .sv64 , 2 );
364
+ #else
365
+ SIMDE_VECTORIZE
366
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
367
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
368
+ }
369
+ #endif
316
370
317
371
return simde_uint32x2_from_private (r_ );
318
372
#endif
@@ -333,10 +387,14 @@ simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
333
387
a_ = simde_uint64x1_to_private (a ),
334
388
b_ = simde_uint64x1_to_private (b );
335
389
336
- SIMDE_VECTORIZE
337
- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
338
- r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
339
- }
390
+ #if defined(SIMDE_RISCV_V_NATIVE )
391
+ r_ .sv64 = __riscv_vmaxu_vv_u64m1 (a_ .sv64 , b_ .sv64 , 1 );
392
+ #else
393
+ SIMDE_VECTORIZE
394
+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
395
+ r_ .values [i ] = (a_ .values [i ] > b_ .values [i ]) ? a_ .values [i ] : b_ .values [i ];
396
+ }
397
+ #endif
340
398
341
399
return simde_uint64x1_from_private (r_ );
342
400
#endif
@@ -414,6 +472,17 @@ simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) {
414
472
#endif
415
473
#elif defined(SIMDE_WASM_SIMD128_NATIVE )
416
474
r_ .v128 = wasm_f32x4_max (a_ .v128 , b_ .v128 );
475
+ #elif defined(SIMDE_RISCV_V_NATIVE )
476
+ #if !defined(SIMDE_FAST_NANS )
477
+ vbool32_t va_mask = __riscv_vmseq_vx_u32m1_b32 (__riscv_vfclass_v_u32m1 (a_ .sv128 , 4 ) , 512 , 4 );
478
+ vbool32_t vb_mask = __riscv_vmseq_vx_u32m1_b32 (__riscv_vfclass_v_u32m1 (b_ .sv128 , 4 ) , 512 , 4 );
479
+ vbool32_t vab_mask = __riscv_vmnor_mm_b32 (va_mask , vb_mask , 4 );
480
+ vfloat32m1_t vnan = __riscv_vfmv_v_f_f32m1 (SIMDE_MATH_NANF , 4 );
481
+ r_ .sv128 = __riscv_vfmax_vv_f32m1_m (vab_mask , a_ .sv128 , b_ .sv128 , 4 );
482
+ r_ .sv128 = __riscv_vmerge_vvm_f32m1 (vnan , r_ .sv128 , vab_mask , 4 );
483
+ #else
484
+ r_ .sv128 = __riscv_vfmax_vv_f32m1 (a_ .sv128 , b_ .sv128 , 4 );
485
+ #endif
417
486
#else
418
487
SIMDE_VECTORIZE
419
488
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
@@ -461,6 +530,18 @@ simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) {
461
530
#endif
462
531
#elif defined(SIMDE_WASM_SIMD128_NATIVE )
463
532
r_ .v128 = wasm_f64x2_max (a_ .v128 , b_ .v128 );
533
+ #elif defined(SIMDE_RISCV_V_NATIVE )
534
+ #if !defined(SIMDE_FAST_NANS )
535
+ simde_float64 nan = SIMDE_MATH_NAN ;
536
+ vbool64_t va_mask = __riscv_vmseq_vx_u64m1_b64 (__riscv_vfclass_v_u64m1 (a_ .sv128 , 2 ) , 512 , 2 );
537
+ vbool64_t vb_mask = __riscv_vmseq_vx_u64m1_b64 (__riscv_vfclass_v_u64m1 (b_ .sv128 , 2 ) , 512 , 2 );
538
+ vbool64_t vab_mask = __riscv_vmnor_mm_b64 (va_mask , vb_mask , 2 );
539
+ vfloat64m1_t vnan = __riscv_vfmv_v_f_f64m1 (nan , 2 );
540
+ r_ .sv128 = __riscv_vfmax_vv_f64m1_m (vab_mask , a_ .sv128 , b_ .sv128 , 2 );
541
+ r_ .sv128 = __riscv_vmerge_vvm_f64m1 (vnan , r_ .sv128 , vab_mask , 2 );
542
+ #else
543
+ r_ .sv128 = __riscv_vfmax_vv_f64m1 (a_ .sv128 , b_ .sv128 , 2 );
544
+ #endif
464
545
#else
465
546
SIMDE_VECTORIZE
466
547
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
@@ -504,6 +585,15 @@ simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) {
504
585
r_ .v128 = wasm_i8x16_max (a_ .v128 , b_ .v128 );
505
586
#endif
506
587
588
+ return simde_int8x16_from_private (r_ );
589
+ #elif defined(SIMDE_RISCV_V_NATIVE )
590
+ simde_int8x16_private
591
+ r_ ,
592
+ a_ = simde_int8x16_to_private (a ),
593
+ b_ = simde_int8x16_to_private (b );
594
+
595
+ r_ .sv128 = __riscv_vmax_vv_i8m1 (a_ .sv128 , b_ .sv128 , 16 );
596
+
507
597
return simde_int8x16_from_private (r_ );
508
598
#else
509
599
return simde_vbslq_s8 (simde_vcgtq_s8 (a , b ), a , b );
@@ -535,6 +625,15 @@ simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) {
535
625
r_ .v128 = wasm_i16x8_max (a_ .v128 , b_ .v128 );
536
626
#endif
537
627
628
+ return simde_int16x8_from_private (r_ );
629
+ #elif defined(SIMDE_RISCV_V_NATIVE )
630
+ simde_int16x8_private
631
+ r_ ,
632
+ a_ = simde_int16x8_to_private (a ),
633
+ b_ = simde_int16x8_to_private (b );
634
+
635
+ r_ .sv128 = __riscv_vmax_vv_i16m1 (a_ .sv128 , b_ .sv128 , 8 );
636
+
538
637
return simde_int16x8_from_private (r_ );
539
638
#else
540
639
return simde_vbslq_s16 (simde_vcgtq_s16 (a , b ), a , b );
@@ -566,6 +665,15 @@ simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) {
566
665
r_ .v128 = wasm_i32x4_max (a_ .v128 , b_ .v128 );
567
666
#endif
568
667
668
+ return simde_int32x4_from_private (r_ );
669
+ #elif defined(SIMDE_RISCV_V_NATIVE )
670
+ simde_int32x4_private
671
+ r_ ,
672
+ a_ = simde_int32x4_to_private (a ),
673
+ b_ = simde_int32x4_to_private (b );
674
+
675
+ r_ .sv128 = __riscv_vmax_vv_i32m1 (a_ .sv128 , b_ .sv128 , 4 );
676
+
569
677
return simde_int32x4_from_private (r_ );
570
678
#else
571
679
return simde_vbslq_s32 (simde_vcgtq_s32 (a , b ), a , b );
@@ -581,6 +689,15 @@ simde_int64x2_t
581
689
simde_x_vmaxq_s64 (simde_int64x2_t a , simde_int64x2_t b ) {
582
690
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE ) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE )
583
691
return vec_max (a , b );
692
+ #elif defined(SIMDE_RISCV_V_NATIVE )
693
+ simde_int64x2_private
694
+ r_ ,
695
+ a_ = simde_int64x2_to_private (a ),
696
+ b_ = simde_int64x2_to_private (b );
697
+
698
+ r_ .sv128 = __riscv_vmax_vv_i64m1 (a_ .sv128 , b_ .sv128 , 2 );
699
+
700
+ return simde_int64x2_from_private (r_ );
584
701
#else
585
702
return simde_vbslq_s64 (simde_vcgtq_s64 (a , b ), a , b );
586
703
#endif
@@ -607,6 +724,15 @@ simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
607
724
r_ .v128 = wasm_u8x16_max (a_ .v128 , b_ .v128 );
608
725
#endif
609
726
727
+ return simde_uint8x16_from_private (r_ );
728
+ #elif defined(SIMDE_RISCV_V_NATIVE )
729
+ simde_uint8x16_private
730
+ r_ ,
731
+ a_ = simde_uint8x16_to_private (a ),
732
+ b_ = simde_uint8x16_to_private (b );
733
+
734
+ r_ .sv128 = __riscv_vmaxu_vv_u8m1 (a_ .sv128 , b_ .sv128 , 16 );
735
+
610
736
return simde_uint8x16_from_private (r_ );
611
737
#else
612
738
return simde_vbslq_u8 (simde_vcgtq_u8 (a , b ), a , b );
@@ -641,6 +767,15 @@ simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
641
767
r_ .v128 = wasm_u16x8_max (a_ .v128 , b_ .v128 );
642
768
#endif
643
769
770
+ return simde_uint16x8_from_private (r_ );
771
+ #elif defined(SIMDE_RISCV_V_NATIVE )
772
+ simde_uint16x8_private
773
+ r_ ,
774
+ a_ = simde_uint16x8_to_private (a ),
775
+ b_ = simde_uint16x8_to_private (b );
776
+
777
+ r_ .sv128 = __riscv_vmaxu_vv_u16m1 (a_ .sv128 , b_ .sv128 , 8 );
778
+
644
779
return simde_uint16x8_from_private (r_ );
645
780
#else
646
781
return simde_vbslq_u16 (simde_vcgtq_u16 (a , b ), a , b );
@@ -672,6 +807,15 @@ simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
672
807
r_ .v128 = wasm_u32x4_max (a_ .v128 , b_ .v128 );
673
808
#endif
674
809
810
+ return simde_uint32x4_from_private (r_ );
811
+ #elif defined(SIMDE_RISCV_V_NATIVE )
812
+ simde_uint32x4_private
813
+ r_ ,
814
+ a_ = simde_uint32x4_to_private (a ),
815
+ b_ = simde_uint32x4_to_private (b );
816
+
817
+ r_ .sv128 = __riscv_vmaxu_vv_u32m1 (a_ .sv128 , b_ .sv128 , 4 );
818
+
675
819
return simde_uint32x4_from_private (r_ );
676
820
#else
677
821
return simde_vbslq_u32 (simde_vcgtq_u32 (a , b ), a , b );
@@ -687,6 +831,15 @@ simde_uint64x2_t
687
831
simde_x_vmaxq_u64 (simde_uint64x2_t a , simde_uint64x2_t b ) {
688
832
#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE ) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE )
689
833
return vec_max (a , b );
834
+ #elif defined(SIMDE_RISCV_V_NATIVE )
835
+ simde_uint64x2_private
836
+ r_ ,
837
+ a_ = simde_uint64x2_to_private (a ),
838
+ b_ = simde_uint64x2_to_private (b );
839
+
840
+ r_ .sv128 = __riscv_vmaxu_vv_u64m1 (a_ .sv128 , b_ .sv128 , 2 );
841
+
842
+ return simde_uint64x2_from_private (r_ );
690
843
#else
691
844
return simde_vbslq_u64 (simde_vcgtq_u64 (a , b ), a , b );
692
845
#endif
0 commit comments