@@ -413,6 +413,19 @@ definitions:
413413 ctype : ["float", "double"]
414414 lscpu_flags : ["avx512f", "avx512vl"]
415415 implementation : " return _mm_mask_add_{{ intrin_tp_full[ctype] }}(vec_a, tsl::to_integral<Vec>(mask), vec_a, vec_b);"
416+ # ARM NEON
417+ - target_extension : " neon"
418+ ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t"]
419+ lscpu_flags : ['neon']
420+ implementation : |
421+ return vaddq_{{ intrin_tp_full[ctype] }}(vec_a, tsl::binary_and<Vec>(mask, vec_b));
422+ - target_extension : " neon"
423+ ctype : ["int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
424+ lscpu_flags : ['neon']
425+ implementation : |
426+ using T = typename Vec::offset_base_type;
427+ using OffsetExt = typename Vec::template transform_extension<T>;
428+ return vaddq_{{ intrin_tp_full[ctype] }}(vec_a, tsl::reinterpret<OffsetExt, Vec>(tsl::binary_and<OffsetExt>(mask, tsl::reinterpret<Vec, OffsetExt>(vec_b))));
416429# SCALAR
417430 - target_extension : " scalar"
418431 ctype : ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"]
@@ -554,6 +567,19 @@ definitions:
554567 ctype : ["float", "double"]
555568 lscpu_flags : ["avx512f", "avx512vl"]
556569 implementation : " return _mm_mask_add_{{ intrin_tp_full[ctype] }}(vec_a, mask, vec_a, vec_b);"
570+ # ARM NEON
571+ - target_extension : " neon"
572+ ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t"]
573+ lscpu_flags : ['neon']
574+ implementation : |
575+ return vaddq_{{ intrin_tp_full[ctype] }}(vec_a, tsl::binary_and<Vec>(tsl::to_mask<Vec>(mask), vec_b));
576+ - target_extension : " neon"
577+ ctype : ["int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
578+ lscpu_flags : ['neon']
579+ implementation : |
580+ using T = typename Vec::offset_base_type;
581+ using OffsetExt = typename Vec::template transform_extension<T>;
582+ return vaddq_{{ intrin_tp_full[ctype] }}(vec_a, tsl::reinterpret<OffsetExt, Vec>(tsl::binary_and<OffsetExt>(tsl::to_mask<Vec>(mask), tsl::reinterpret<Vec, OffsetExt>(vec_b))));
557583# SCALAR
558584 - target_extension : " scalar"
559585 ctype : ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"]
0 commit comments