diff --git a/BUILD.bazel b/BUILD.bazel index 8ee9fffc4cb..3592b68d1ab 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -66,6 +66,7 @@ MICROKERNEL_DEFS = [ "src/f16-pavgpool/f16-pavgpool-minmax.h", "src/f16-qu8-vcvt/f16-qu8-vcvt.h", "src/f16-qs8-vcvt/f16-qs8-vcvt.h", + "src/f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h", "src/f16-vabs/f16-vabs.h", "src/f16-vbinary/f16-vadd.h", "src/f16-vbinary/f16-vaddc.h", @@ -113,6 +114,7 @@ MICROKERNEL_DEFS = [ "src/f32-qs8-vcvt/f32-qs8-vcvt.h", "src/f32-qu8-vcvt/f32-qu8-vcvt.h", "src/f32-raddextexp/f32-raddextexp.h", + "src/f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h", "src/f32-vabs/f32-vabs.h", "src/f32-vbinary/f32-vadd.h", "src/f32-vbinary/f32-vaddc.h", diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index 58e6de3133f..e86dde1606a 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -249,8 +249,8 @@ tools/generate-ibilinear-chw-test.py --spec test/f32-ibilinear-chw.yaml --output tools/generate-raddexpminusmax-test.py --spec test/f32-raddexpminusmax.yaml --output test/f32-raddexpminusmax.cc & ### Tests for RAddStoreExpMinusMax micro-kernels -tools/generate-raddstoreexpminusmax-test.py --spec test/f16-raddstoreexpminusmax.yaml --output test/f16-raddstoreexpminusmax.cc & -tools/generate-raddstoreexpminusmax-test.py --spec test/f32-raddstoreexpminusmax.yaml --output test/f32-raddstoreexpminusmax.cc & +tools/generate-raddstoreexpminusmax-test.py --tester RAddStoreExpMinusMaxMicrokernelTester --ukernel f16-raddstoreexpminusmax --output test/f16-raddstoreexpminusmax.cc & +tools/generate-raddstoreexpminusmax-test.py --tester RAddStoreExpMinusMaxMicrokernelTester --ukernel f32-raddstoreexpminusmax --output test/f32-raddstoreexpminusmax.cc & ### Tests for the portable SIMD wrappers. tools/xngen test/f32-simd.cc.in -D ARCH=scalar -D ARCH_MACRO="" -D TEST_REQUIRES="" -o test/f32-simd-scalar.cc & diff --git a/src/f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h b/src/f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h new file mode 100644 index 00000000000..614c467bc2a --- /dev/null +++ b/src/f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h @@ -0,0 +1,76 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16, 16, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2, 16, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32, 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2 , 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4, 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72, 72, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3 , 72, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6, 96, xnn_float16, struct xnn_f16_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32, 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2, 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4, 32, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5, 40, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3, 48, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4, 64, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72, 72, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3, 72, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5, 80, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3, 96, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fp16_arith, xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6, 96, xnn_float16, struct xnn_f16_default_params, NULL) +#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif \ No newline at end of file diff --git a/src/f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h b/src/f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h new file mode 100644 index 00000000000..ea7c29481d1 --- /dev/null +++ b/src/f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h @@ -0,0 +1,113 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx2, xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx256skx, xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx256skx, xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx256skx, xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx256skx, xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4, 32, float, struct xnn_f32_default_params, NULL) +#endif //XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) + +#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2, 64, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4, 64, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2, 64, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4, 64, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) + +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0,xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +#if XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_WASMRELAXEDSIMD + +#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON +XNN_UKERNEL_WITH_PARAMS(xnn_arch_hvx, xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32, 32, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_hvx, xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2, 64, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_hvx, xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2, 128, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_hvx, xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4, 128, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON + +#if XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV +XNN_UKERNEL_WITH_PARAMS(xnn_arch_riscv_vector, xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v, 2 * xnn_init_hardware_config()->vlenb / sizeof(float), float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_riscv_vector, xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v, 4 * xnn_init_hardware_config()->vlenb / sizeof(float), float, struct xnn_f32_default_params, NULL) +#endif // XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV + +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2, 8, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2, 16, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4, 16, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 + +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u1, 1, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2, 2, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u1, 1, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2, 2, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2, 4, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4, 4, float, struct xnn_f32_default_params, NULL) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif \ No newline at end of file diff --git a/src/xnnpack/raddstoreexpminusmax.h b/src/xnnpack/raddstoreexpminusmax.h index a348ea26222..157bdfed216 100644 --- a/src/xnnpack/raddstoreexpminusmax.h +++ b/src/xnnpack/raddstoreexpminusmax.h @@ -15,148 +15,17 @@ extern "C" { #endif -#define DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t n, \ - const xnn_float16* input, \ - const xnn_float16* max, \ - xnn_float16* output, \ - xnn_float16* sum, \ +#define XNN_UKERNEL(arch_flags, fn_name, element_tile, datatype) \ + XNN_INTERNAL void fn_name( \ + size_t n, \ + const datatype* input, \ + const datatype* max, \ + datatype* output, \ + datatype* sum, \ const void* params); - -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6) - -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3) -DECLARE_F16_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6) - - -#define DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t n, \ - const float* input, \ - const float* max, \ - float* output, \ - float* sum, \ - const void* params); - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u1) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4) - -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u1) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2) -DECLARE_F32_RADDSTOREEXPMINUSMAX_UKERNEL_FUNCTION(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4) +#include "f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h" +#include "f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h" +#undef XNN_UKERNEL #ifdef __cplusplus } /* extern "C" */ diff --git a/test/f16-raddstoreexpminusmax.cc b/test/f16-raddstoreexpminusmax.cc index 5b074041f6f..419d5df9cd9 100644 --- a/test/f16-raddstoreexpminusmax.cc +++ b/test/f16-raddstoreexpminusmax.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f16-raddstoreexpminusmax.yaml +// Microkernel: f16-raddstoreexpminusmax // Generator: tools/generate-raddstoreexpminusmax-test.py @@ -14,1631 +14,9 @@ #include "xnnpack/microparams-init.h" #include "xnnpack/raddstoreexpminusmax.h" #include "raddstoreexpminusmax-microkernel-tester.h" - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32, elements_eq_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32, elements_div_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32, elements_lt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32, elements_gt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC2, elements_div_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC4, elements_eq_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC4, elements_div_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC4, elements_lt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U32_ACC4, elements_gt_32) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40, elements_eq_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40, elements_div_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40, elements_lt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40, elements_gt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC2, elements_eq_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC2, elements_div_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC2, elements_lt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC2, elements_gt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC5, elements_eq_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC5, elements_div_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC5, elements_lt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U40_ACC5, elements_gt_40) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48, elements_eq_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48, elements_div_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48, elements_lt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48, elements_gt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC2, elements_eq_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC2, elements_div_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC2, elements_lt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC2, elements_gt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC3, elements_eq_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC3, elements_div_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC3, elements_lt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U48_ACC3, elements_gt_48) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64, elements_eq_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64, elements_div_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64, elements_lt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64, elements_gt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC2, elements_div_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC4, elements_eq_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC4, elements_div_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC4, elements_lt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U64_ACC4, elements_gt_64) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72, elements_eq_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(72) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72, elements_div_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72, elements_lt_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 72; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72, elements_gt_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 73; elements < 144; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72_ACC3, elements_eq_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(72) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72_ACC3, elements_div_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72_ACC3, elements_lt_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 72; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U72_ACC3, elements_gt_72) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 73; elements < 144; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80, elements_eq_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80, elements_div_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80, elements_lt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80, elements_gt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC2, elements_eq_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC2, elements_div_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC2, elements_lt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC2, elements_gt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC5, elements_eq_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC5, elements_div_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC5, elements_lt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U80_ACC5, elements_gt_80) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96, elements_eq_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96, elements_div_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96, elements_lt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96, elements_gt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC2, elements_eq_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC2, elements_div_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC2, elements_lt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC2, elements_gt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC3, elements_eq_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC3, elements_div_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC3, elements_lt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC3, elements_gt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC6, elements_eq_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC6, elements_div_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC6, elements_lt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__NEONFP16ARITH_RR2_P2_U96_ACC6, elements_gt_96) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6, nullptr); - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16, elements_eq_16) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16, elements_div_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16, elements_lt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16, elements_gt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16_ACC2, elements_div_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC4, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC4, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC4, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U32_ACC4, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40, elements_eq_40) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40, elements_div_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40, elements_lt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40, elements_gt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC2, elements_eq_40) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC2, elements_div_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC2, elements_lt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC2, elements_gt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC5, elements_eq_40) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(40) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC5, elements_div_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 80; elements < 400; elements += 40) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC5, elements_lt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 40; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U40_ACC5, elements_gt_40) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 41; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48, elements_eq_48) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48, elements_div_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48, elements_lt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48, elements_gt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC2, elements_eq_48) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC2, elements_div_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC2, elements_lt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC2, elements_gt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC3, elements_eq_48) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(48) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC3, elements_div_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 96; elements < 480; elements += 48) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC3, elements_lt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 48; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U48_ACC3, elements_gt_48) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 49; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC2, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC4, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC4, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC4, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U64_ACC4, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72, elements_eq_72) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(72) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72, elements_div_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72, elements_lt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 72; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72, elements_gt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 73; elements < 144; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72_ACC3, elements_eq_72) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(72) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72_ACC3, elements_div_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72_ACC3, elements_lt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 72; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U72_ACC3, elements_gt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 73; elements < 144; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC2, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC2, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC2, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC2, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC5, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(80) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC5, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC5, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U80_ACC5, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC2, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC2, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC2, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC2, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC3, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC3, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC3, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC3, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC6, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(96) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6, nullptr); - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC6, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC6, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6, nullptr); - } - } - - TEST(F16_RADDSTOREEXPMINUSMAX__AVX2_RR1_P2_U96_ACC6, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_EQ(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_DIV(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_LT(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_GT(ukernel, arch_flags, element_tile, datatype, params_type, init_params); +#include "f16-raddstoreexpminusmax/f16-raddstoreexpminusmax.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f16-raddstoreexpminusmax.yaml b/test/f16-raddstoreexpminusmax.yaml deleted file mode 100644 index 57f27867577..00000000000 --- a/test/f16-raddstoreexpminusmax.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2022 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARN NEON+FP16ARITH -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u32_acc4 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u40_acc5 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u48_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u64_acc4 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u72_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u80_acc5 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_u96_acc6 - -# x86 AVX2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u16_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u32_acc4 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u40_acc5 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u48_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u64_acc4 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u72_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u80_acc5 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc2 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc3 -- name: xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_u96_acc6 diff --git a/test/f32-raddstoreexpminusmax.cc b/test/f32-raddstoreexpminusmax.cc index c6c1b281e32..6cde41f8e19 100644 --- a/test/f32-raddstoreexpminusmax.cc +++ b/test/f32-raddstoreexpminusmax.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f32-raddstoreexpminusmax.yaml +// Microkernel: f32-raddstoreexpminusmax // Generator: tools/generate-raddstoreexpminusmax-test.py @@ -14,2190 +14,9 @@ #include "xnnpack/microparams-init.h" #include "xnnpack/raddstoreexpminusmax.h" #include "raddstoreexpminusmax-microkernel-tester.h" - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U4, elements_eq_4) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U4, elements_div_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U4, elements_lt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U4, elements_gt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U8_ACC2, elements_eq_8) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U8_ACC2, elements_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U8_ACC2, elements_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U8_ACC2, elements_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC2, elements_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC4, elements_eq_16) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC4, elements_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC4, elements_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_LUT64_P2_U16_ACC4, elements_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U4, elements_eq_4) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U4, elements_div_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U4, elements_lt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U4, elements_gt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U8_ACC2, elements_eq_8) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U8_ACC2, elements_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U8_ACC2, elements_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U8_ACC2, elements_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC4, elements_eq_16) { - TEST_REQUIRES_ARM_NEON; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC4, elements_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC4, elements_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEON_RR2_P5_U16_ACC4, elements_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U4, elements_eq_4) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U4, elements_div_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U4, elements_lt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U4, elements_gt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U8_ACC2, elements_eq_8) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U8_ACC2, elements_div_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U8_ACC2, elements_lt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U8_ACC2, elements_gt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC2, elements_div_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC4, elements_eq_16) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC4, elements_div_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC4, elements_lt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_LUT64_P2_U16_ACC4, elements_gt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U4, elements_eq_4) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U4, elements_div_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U4, elements_lt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U4, elements_gt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U8_ACC2, elements_eq_8) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U8_ACC2, elements_div_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U8_ACC2, elements_lt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U8_ACC2, elements_gt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC4, elements_eq_16) { - TEST_REQUIRES_ARM_NEON_FMA; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC4, elements_div_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC4, elements_lt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_RR1_P5_U16_ACC4, elements_gt_16) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U2V, elements_eq_2v) { - TEST_REQUIRES_RISCV_VECTOR; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(2 * xnn_init_hardware_config()->vlenb / sizeof(float)) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U2V, elements_div_2v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 4 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements < 20 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements += 2 * xnn_init_hardware_config()->vlenb / sizeof(float)) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U2V, elements_lt_2v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 1; - elements < 2 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U2V, elements_gt_2v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 2 * xnn_init_hardware_config()->vlenb / sizeof(float) + 1; - elements < 4 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v, nullptr); - } - } -#endif // XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV - - -#if XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U4V, elements_eq_4v) { - TEST_REQUIRES_RISCV_VECTOR; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4 * xnn_init_hardware_config()->vlenb / sizeof(float)) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U4V, elements_div_4v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 8 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements < 40 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements += 4 * xnn_init_hardware_config()->vlenb / sizeof(float)) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U4V, elements_lt_4v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 1; - elements < 4 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__RVV_RR2_P6_U4V, elements_gt_4v) { - TEST_REQUIRES_RISCV_VECTOR; - for (size_t elements = 4 * xnn_init_hardware_config()->vlenb / sizeof(float) + 1; - elements < 8 * xnn_init_hardware_config()->vlenb / sizeof(float); - elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v, nullptr); - } - } -#endif // XNN_ENABLE_RISCV_VECTOR && XNN_ARCH_RISCV - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U4, elements_eq_4) { - TEST_REQUIRES_X86_SSE2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U4, elements_div_4) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U4, elements_lt_4) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U4, elements_gt_4) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U8_ACC2, elements_eq_8) { - TEST_REQUIRES_X86_SSE2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U8_ACC2, elements_div_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U8_ACC2, elements_lt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U8_ACC2, elements_gt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_X86_SSE2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC4, elements_eq_16) { - TEST_REQUIRES_X86_SSE2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC4, elements_div_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC4, elements_lt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_RR2_P5_U16_ACC4, elements_gt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U8, elements_eq_8) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U8, elements_div_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U8, elements_lt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U8, elements_gt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC4, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC4, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC4, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR1_P5_U32_ACC4, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U8, elements_eq_8) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U8, elements_div_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U8, elements_lt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U8, elements_gt_8) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC4, elements_eq_32) { - TEST_REQUIRES_X86_AVX2; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC4, elements_div_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC4, elements_lt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX2_RR2_P5_U32_ACC4, elements_gt_32) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4, nullptr); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U8, elements_eq_8) { - TEST_REQUIRES_X86_AVX256SKX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U8, elements_div_8) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U8, elements_lt_8) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U8, elements_gt_8) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8, nullptr); - } - } -#endif // XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U16_ACC2, elements_eq_16) { - TEST_REQUIRES_X86_AVX256SKX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U16_ACC2, elements_div_16) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U16_ACC2, elements_lt_16) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U16_ACC2, elements_gt_16) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX256SKX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC4, elements_eq_32) { - TEST_REQUIRES_X86_AVX256SKX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC4, elements_div_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC4, elements_lt_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX256SKX_RR2_P5_U32_ACC4, elements_gt_32) { - TEST_REQUIRES_X86_AVX256SKX; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4, nullptr); - } - } -#endif // XNN_ENABLE_AVX256SKX && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U16, elements_eq_16) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U16, elements_div_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U16, elements_lt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U16, elements_gt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC2, elements_div_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC4, elements_eq_64) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC4, elements_div_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC4, elements_lt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR1_P5_SCALEF_U64_ACC4, elements_gt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U16, elements_eq_16) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U16, elements_div_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U16, elements_lt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U16, elements_gt_16) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U32_ACC2, elements_eq_32) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U32_ACC2, elements_div_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U32_ACC2, elements_lt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U32_ACC2, elements_gt_32) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC2, elements_div_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC4, elements_eq_64) { - TEST_REQUIRES_X86_AVX512F; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC4, elements_div_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC4, elements_lt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__AVX512F_RR2_P5_U64_ACC4, elements_gt_64) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4, nullptr); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U4, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U4, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4, nullptr); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U8_ACC2, elements_eq_8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U8_ACC2, elements_div_8) { - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U8_ACC2, elements_lt_8) { - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U8_ACC2, elements_gt_8) { - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC2, elements_eq_16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC2, elements_div_16) { - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC2, elements_lt_16) { - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC2, elements_gt_16) { - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC4, elements_eq_16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC4, elements_div_16) { - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC4, elements_lt_16) { - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMSIMD_RR2_P5_U16_ACC4, elements_gt_16) { - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U4, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U4, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4, nullptr); - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U8_ACC2, elements_eq_8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(8) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U8_ACC2, elements_div_8) { - for (size_t elements = 16; elements < 80; elements += 8) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U8_ACC2, elements_lt_8) { - for (size_t elements = 1; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U8_ACC2, elements_gt_8) { - for (size_t elements = 9; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2, nullptr); - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC2, elements_eq_16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC2, elements_div_16) { - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC2, elements_lt_16) { - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC2, elements_gt_16) { - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2, nullptr); - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC4, elements_eq_16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(16) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC4, elements_div_16) { - for (size_t elements = 32; elements < 160; elements += 16) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC4, elements_lt_16) { - for (size_t elements = 1; elements < 16; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__WASMRELAXEDSIMD_RR2_P5_U16_ACC4, elements_gt_16) { - for (size_t elements = 17; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4, nullptr); - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U32, elements_eq_32) { - TEST_REQUIRES_HVX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(32) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U32, elements_div_32) { - TEST_REQUIRES_HVX; - for (size_t elements = 64; elements < 320; elements += 32) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U32, elements_lt_32) { - TEST_REQUIRES_HVX; - for (size_t elements = 1; elements < 32; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U32, elements_gt_32) { - TEST_REQUIRES_HVX; - for (size_t elements = 33; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32, nullptr); - } - } -#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - - -#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_HVX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U64_ACC2, elements_div_64) { - TEST_REQUIRES_HVX; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_HVX; - for (size_t elements = 1; elements < 64; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_HVX; - for (size_t elements = 65; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2, nullptr); - } - } -#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - - -#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC2, elements_eq_128) { - TEST_REQUIRES_HVX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(128) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC2, elements_div_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 256; elements < 1280; elements += 128) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC2, elements_lt_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 1; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC2, elements_gt_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 129; elements < 256; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2, nullptr); - } - } -#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - - -#if XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC4, elements_eq_128) { - TEST_REQUIRES_HVX; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(128) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4, nullptr); - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC4, elements_div_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 256; elements < 1280; elements += 128) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC4, elements_lt_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 1; elements < 128; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4, nullptr); - } - } - - TEST(F32_RADDSTOREEXPMINUSMAX__HVX_RR2_P5_U128_ACC4, elements_gt_128) { - TEST_REQUIRES_HVX; - for (size_t elements = 129; elements < 256; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4, nullptr); - } - } -#endif // XNN_ENABLE_HVX && XNN_ARCH_HEXAGON - - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U1, elements_eq_1) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(1) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u1, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U1, elements_gt_1) { - for (size_t elements = 2; elements < 10; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u1, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U2_ACC2, elements_eq_2) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(2) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U2_ACC2, elements_div_2) { - for (size_t elements = 4; elements < 20; elements += 2) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U2_ACC2, elements_lt_2) { - for (size_t elements = 1; elements < 2; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U2_ACC2, elements_gt_2) { - for (size_t elements = 3; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC2, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC2, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC2, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC2, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC4, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC4, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_LUT64_P2_U4_ACC4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U1, elements_eq_1) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(1) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u1, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U1, elements_gt_1) { - for (size_t elements = 2; elements < 10; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u1, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U2_ACC2, elements_eq_2) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(2) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U2_ACC2, elements_div_2) { - for (size_t elements = 4; elements < 20; elements += 2) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U2_ACC2, elements_lt_2) { - for (size_t elements = 1; elements < 2; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U2_ACC2, elements_gt_2) { - for (size_t elements = 3; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC2, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC2, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC2, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC2, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC4, elements_eq_4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(4) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4, nullptr); -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC4, elements_div_4) { - for (size_t elements = 8; elements < 40; elements += 4) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC4, elements_lt_4) { - for (size_t elements = 1; elements < 4; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4, nullptr); - } -} - -TEST(F32_RADDSTOREEXPMINUSMAX__SCALAR_RR2_P5_U4_ACC4, elements_gt_4) { - for (size_t elements = 5; elements < 8; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4, nullptr); - } -} \ No newline at end of file +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_EQ(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_DIV(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_LT(ukernel, arch_flags, element_tile, datatype, params_type, init_params); \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_GT(ukernel, arch_flags, element_tile, datatype, params_type, init_params); +#include "f32-raddstoreexpminusmax/f32-raddstoreexpminusmax.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f32-raddstoreexpminusmax.yaml b/test/f32-raddstoreexpminusmax.yaml deleted file mode 100644 index ed1596e31f7..00000000000 --- a/test/f32-raddstoreexpminusmax.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_u16_acc4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_u16_acc4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_u16_acc4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_u16_acc4 - -# RISC-V Vector -- name: xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u2v -- name: xnn_f32_raddstoreexpminusmax_ukernel__rvv_rr2_p6_u4v - -# x86 SSE2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_u16_acc4 - -# x86 AVX2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u8 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_u32_acc4 - -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u8 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr2_p5_u32_acc4 - -# x86 AVX256SKX -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u8 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx256skx_rr2_p5_u32_acc4 - -# x86 AVX512F -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u16 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u32_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_u64_acc4 - -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u16 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u32_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr2_p5_u64_acc4 - -# WAsm SIMD -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_u16_acc4 - -# WAsm Relaxed SIMD -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u8_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__wasmrelaxedsimd_rr2_p5_u16_acc4 - -# Hexagon HVX -- name: xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u32 -- name: xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u64_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__hvx_rr2_p5_u128_acc4 - -# Scalar -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u1 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u2_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_u4_acc4 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u1 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u2_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc2 -- name: xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_u4_acc4 diff --git a/test/raddstoreexpminusmax-microkernel-tester.h b/test/raddstoreexpminusmax-microkernel-tester.h index 5e739474d96..a4cf4219c22 100644 --- a/test/raddstoreexpminusmax-microkernel-tester.h +++ b/test/raddstoreexpminusmax-microkernel-tester.h @@ -122,3 +122,67 @@ class RAddStoreExpMinusMaxMicrokernelTester { size_t elements_{1}; size_t iterations_{15}; }; + +#define XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_EQ( \ + ukernel, arch_flags, element_tile, datatype, params_type, init_params) \ + TEST(ukernel, elements_eq) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + RAddStoreExpMinusMaxMicrokernelTester().elements(element_tile).Test(ukernel, init_params); \ + } + +#define XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_DIV( \ + ukernel, arch_flags, element_tile, datatype, params_type, init_params) \ + TEST(ukernel, elements_div) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + const size_t elements_scale = get_batch_scale(); \ + if (elements_scale == 0) { \ + for (size_t elements = element_tile * 2; elements < element_tile * 10; elements += element_tile) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + else { \ + for (size_t elements = element_tile * 2 * elements_scale; elements < element_tile * 10 * elements_scale; \ + elements += element_tile * elements_scale) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + } + +#define XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_LT( \ + ukernel, arch_flags, element_tile, datatype, params_type, init_params) \ + TEST(ukernel, elements_lt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + const size_t elements_scale = get_batch_scale(); \ + if (elements_scale == 0) { \ + for (size_t elements = 1; elements < element_tile; elements++) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + else { \ + for (size_t elements = 1; elements < element_tile * elements_scale; elements++) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + } + +#define XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_GT( \ + ukernel, arch_flags, element_tile, datatype, params_type, init_params) \ + TEST(ukernel, elements_gt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + const size_t elements_scale = get_batch_scale(); \ + if (elements_scale == 0) { \ + for (size_t elements = element_tile + 1; elements < (element_tile == 1 ? 10 : element_tile * 2); elements++) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + else { \ + for (size_t elements = element_tile * elements_scale + 1; \ + elements < (element_tile == 1 ? 10 : element_tile * 2) * elements_scale; elements += element_tile) { \ + RAddStoreExpMinusMaxMicrokernelTester().elements(elements).Test(ukernel, init_params); \ + } \ + } \ + } diff --git a/tools/generate-raddstoreexpminusmax-test.py b/tools/generate-raddstoreexpminusmax-test.py index 862bcce4925..301860a1968 100755 --- a/tools/generate-raddstoreexpminusmax-test.py +++ b/tools/generate-raddstoreexpminusmax-test.py @@ -19,148 +19,40 @@ parser = argparse.ArgumentParser( description='RAddStoreExpMinusMax microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") +parser.add_argument("-t", "--tester", metavar="TESTER", required=True, + choices=["RAddStoreExpMinusMaxMicrokernelTester"], + help="Tester class to be used in the generated test") +parser.add_argument("-k", "--ukernel", metavar="FILE", required=True, + help="Microkernel type") parser.add_argument("-o", "--output", metavar="FILE", required=True, help='Output (C++ source) file') parser.set_defaults(defines=list()) -def split_ukernel_name(name): - match = re.fullmatch(r"xnn_(f16|f32)_raddstoreexpminusmax_ukernel__(.+)_u(\d+)(v)?(_acc(\d+))?", name) - if match is None: - raise ValueError("Unexpected microkernel name: " + name) - elements_tile = int(match.group(3)) - vector_tile = bool(match.group(4)) - - arch, isa, assembly = xnncommon.parse_target_name(target_name=match.group(2)) - return elements_tile, vector_tile, arch, isa - - RADDSTOREEXPMINUSMAX_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, elements_eq_${ELEMENTS_TILE}${ELEMENTS_SUFFIX}) { - $if ISA_CHECK: - ${ISA_CHECK}; - RAddStoreExpMinusMaxMicrokernelTester() - .elements(${ELEMENTS_TILE}${ELEMENTS_SCALE}) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); -} - -$if ELEMENTS_TILE > 1 or ELEMENTS_SCALE != "": - TEST(${TEST_NAME}, elements_div_${ELEMENTS_TILE}${ELEMENTS_SUFFIX}) { - $if ISA_CHECK: - ${ISA_CHECK}; - $if ELEMENTS_SCALE == "": - for (size_t elements = ${ELEMENTS_TILE*2}; elements < ${ELEMENTS_TILE*10}; elements += ${ELEMENTS_TILE}) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } - $else: - for (size_t elements = ${ELEMENTS_TILE*2}${ELEMENTS_SCALE}; - elements < ${ELEMENTS_TILE*10}${ELEMENTS_SCALE}; - elements += ${ELEMENTS_TILE}${ELEMENTS_SCALE}) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } - } - - TEST(${TEST_NAME}, elements_lt_${ELEMENTS_TILE}${ELEMENTS_SUFFIX}) { - $if ISA_CHECK: - ${ISA_CHECK}; - $if ELEMENTS_SCALE == "": - for (size_t elements = 1; elements < ${ELEMENTS_TILE}; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } - $else: - for (size_t elements = 1; - elements < ${ELEMENTS_TILE}${ELEMENTS_SCALE}; - elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } - } - -TEST(${TEST_NAME}, elements_gt_${ELEMENTS_TILE}${ELEMENTS_SUFFIX}) { - $if ISA_CHECK: - ${ISA_CHECK}; - $if ELEMENTS_SCALE == "": - for (size_t elements = ${ELEMENTS_TILE+1}; elements < ${10 if ELEMENTS_TILE == 1 else ELEMENTS_TILE*2}; elements++) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } - $else: - for (size_t elements = ${ELEMENTS_TILE}${ELEMENTS_SCALE} + 1; - elements < ${10 if ELEMENTS_TILE == 1 else ELEMENTS_TILE*2}${ELEMENTS_SCALE}; - elements += ${ELEMENTS_TILE*2}) { - RAddStoreExpMinusMaxMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}, ${INIT_FUNCTION}); - } -} +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_EQ(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_DIV(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_LT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_RADDSTOREEXPMINUSMAX_ELEMENT_GT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); """ - -def generate_test_cases(ukernel, init_fn, elements_tile, vector_tile, isa): - """Generates all tests cases for a RAddStoreExpMinusMax micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - init_fn: C name of the function to initialize microkernel parameters. - elements_tile: Number of batch elements processed per one iteration of the - inner loop of the micro-kernel. - vector_tile: Indicates if elements_tile is specified in vectors rather than - elements. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, _ = ukernel.split("_", 2) - elements_scale = "" - if vector_tile: - ctype = {"f16": "uint16_t", "f32": "float"}[datatype] - elements_scale = { - "rvv": " * xnn_init_hardware_config()->vlenb / sizeof(%s)" % ctype, - "rvvfp16arith": " * xnn_init_hardware_config()->vlenb / sizeof(%s)" % ctype, - }[isa] - - - return xngen.preprocess(RADDSTOREEXPMINUSMAX_TEST_TEMPLATE, { - "TEST_FUNCTION": ukernel, - "INIT_FUNCTION": init_fn, - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "DATATYPE": datatype, - "ELEMENTS_TILE": elements_tile, - "ELEMENTS_SCALE": elements_scale, - "ELEMENTS_SUFFIX": "v" if vector_tile else "", - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - }) - - def main(args): options = parser.parse_args(args) + tester = options.tester + tester_header = { + "RAddStoreExpMinusMaxMicrokernelTester": "raddstoreexpminusmax-microkernel-tester.h", + }[tester] + ukernel = options.ukernel - with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: - spec_yaml = yaml.safe_load(spec_file) - if not isinstance(spec_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ + tests = """\ // Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: {specification} +// Microkernel: {specification} // Generator: {generator} @@ -170,19 +62,27 @@ def main(args): #include "xnnpack/microparams-init.h" #include "xnnpack/raddstoreexpminusmax.h" #include "raddstoreexpminusmax-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spec_yaml: - name = ukernel_spec["name"] - init_fn = ukernel_spec.get("init") - if init_fn is None: - init_fn = "nullptr" - elements_tile, vector_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases(name, init_fn, elements_tile, vector_tile, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) +""".format(specification=options.ukernel, generator=sys.argv[0]) + ukernel_parts = options.ukernel.split("-") + datatype = ukernel_parts[0] + op = ukernel_parts[1] + test_args = ["element_tile"] + test_args.append("datatype") + test_args.append("params_type") + test_args.append("init_params") + tests += xnncommon.make_multiline_macro(xngen.preprocess( + RADDSTOREEXPMINUSMAX_TEST_TEMPLATE, + { + "TEST_ARGS": test_args, + "TESTER": tester, + "DATATYPE": datatype, + }, + )) + folder = datatype + "-" + ("raddstoreexpminusmax" if datatype.startswith("f") else op) + tests += f'#include "{xnncommon._XNNPACK_SRC}{folder}/{options.ukernel}.h"\n' + tests += "#undef XNN_UKERNEL_WITH_PARAMS\n" + + xnncommon.overwrite_if_changed(options.output, tests) if __name__ == "__main__":