Skip to content

Commit

Permalink
Use GEMM compiler microkernel on Cortex X1
Browse files Browse the repository at this point in the history
Performance is identical.

PiperOrigin-RevId: 728745958
  • Loading branch information
alankelly authored and xnnpack-bot committed Feb 19, 2025
1 parent bfbf44d commit 2ff4206
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
8 changes: 4 additions & 4 deletions cmake/gen/aarch64_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ SET(PROD_AARCH64_ASM_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4.S
src/f32-gemm/gen/f32-gemm-4x2-minmax-asm-aarch64-neonfma-ld128.S
Expand All @@ -35,13 +36,14 @@ SET(PROD_AARCH64_ASM_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128.S
src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-igemm/f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S
src/f32-igemm/f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S
src/f32-igemm/f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S
Expand Down Expand Up @@ -131,7 +133,6 @@ SET(NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4-prfm.S
src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-prfm.S
Expand Down Expand Up @@ -160,7 +161,7 @@ SET(NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld32-2.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64-2.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S
src/f32-gemm/gen/f32-gemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen/f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld32.S
src/f32-gemm/gen/f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld64.S
Expand All @@ -180,7 +181,6 @@ SET(NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS
src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld32-2.S
src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld64-2.S
src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld128-2.S
src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld32-2.S
src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld64-2.S
src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld128-2.S
Expand Down
8 changes: 4 additions & 4 deletions gen/aarch64_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ PROD_AARCH64_ASM_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a53.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-cortex-a75.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4.S",
"src/f32-gemm/gen/f32-gemm-4x2-minmax-asm-aarch64-neonfma-ld128.S",
Expand All @@ -31,13 +32,14 @@ PROD_AARCH64_ASM_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-cortex-a75.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53-prfm.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a53.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-cortex-a75-prfm.S",
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128.S",
"src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-igemm/f32-igemm-4x8-minmax-asm-aarch64-neonfma-cortex-a55.S",
"src/f32-igemm/f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a55.S",
"src/f32-igemm/f32-igemm-6x8-minmax-asm-aarch64-neonfma-cortex-a73.S",
Expand Down Expand Up @@ -128,7 +130,6 @@ NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-acc4.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64-prfm.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld64.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc2-prfm.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-acc4-prfm.S",
"src/f32-gemm/gen/f32-gemm-1x8-minmax-asm-aarch64-neonfma-ld128-prfm.S",
Expand Down Expand Up @@ -157,7 +158,7 @@ NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld32-2.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64-2.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld64.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-4x8-minmax-asm-aarch64-neonfma-ld128.S",
"src/f32-gemm/gen/f32-gemm-4x12-minmax-asm-aarch64-neonfma-cortex-a53.S",
"src/f32-gemm/gen/f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld32.S",
"src/f32-gemm/gen/f32-gemm-4x16-minmax-asm-aarch64-neonfma-ld64.S",
Expand All @@ -177,7 +178,6 @@ NON_PROD_AARCH64_ASM_MICROKERNEL_SRCS = [
"src/f32-gemm/gen/f32-gemm-6x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld32-2.S",
"src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld64-2.S",
"src/f32-gemm/gen/f32-gemm-7x8-minmax-asm-aarch64-neonfma-ld128-2.S",
"src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld32-2.S",
"src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld64-2.S",
"src/f32-gemm/gen/f32-gemm-8x8-minmax-asm-aarch64-neonfma-ld128-2.S",
Expand Down
8 changes: 4 additions & 4 deletions src/configs/gemm-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,15 +608,15 @@ static void init_f32_gemm_config(void) {
case cpuinfo_uarch_neoverse_n2:
case cpuinfo_uarch_neoverse_v1:
default:
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_1x8__asm_aarch64_neonfma_ld128_acc4);
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_6x8__asm_aarch64_neonfma_ld128);
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_1x8__asm_aarch64_neonfma_ld128_2);
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_7x8__asm_aarch64_neonfma_ld128_2);
#if XNN_ENABLE_GEMM_M_SPECIALIZATION
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_4x8__asm_aarch64_neonfma_ld128);
f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_4x8__asm_aarch64_neonfma_ld128_2);
#endif
f32_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params;
f32_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_f32_gemm_gio_w;
f32_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_x32_packw_gemm_goi_ukernel_x8__neon_ld4lane_u4_prfm;
f32_gemm_config.mr = 6;
f32_gemm_config.mr = 7;
f32_gemm_config.nr = 8;
break;
}
Expand Down

0 comments on commit 2ff4206

Please sign in to comment.