Skip to content

Commit e2042c0

Browse files
alankellyxnnpack-bot
authored andcommitted
Turn on c2 asm f32 kernels
This is the first time that we exploit the broken dependency between gemm & igemm PiperOrigin-RevId: 724255293
1 parent 2750c59 commit e2042c0

8 files changed

+899
-402
lines changed

cmake/gen/amd64_microkernels.cmake

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
SET(PROD_AMD64_ASM_MICROKERNEL_SRCS
1313
src/bf16-f32-gemm/gen/bf16-f32-gemm-1x32c2-minmax-asm-amd64-avx512bf16-broadcast.S
14-
src/bf16-f32-gemm/gen/bf16-f32-gemm-7x32c2-minmax-asm-amd64-avx512bf16-broadcast.S)
14+
src/bf16-f32-gemm/gen/bf16-f32-gemm-7x32c2-minmax-asm-amd64-avx512bf16-broadcast.S
15+
src/f32-gemm/gen/f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S
16+
src/f32-gemm/gen/f32-gemm-5x32c2-minmax-asm-amd64-avx512f-broadcast.S)
1517

1618
SET(NON_PROD_AMD64_ASM_MICROKERNEL_SRCS
1719
src/bf16-f32-gemm/gen/bf16-f32-gemm-1x16c2-minmax-asm-amd64-avx512bf16-broadcast.S
@@ -42,7 +44,6 @@ SET(NON_PROD_AMD64_ASM_MICROKERNEL_SRCS
4244
src/f32-gemm/gen/f32-gemm-1x16-minmax-asm-amd64-avx512f-broadcast.S
4345
src/f32-gemm/gen/f32-gemm-1x16c2-minmax-asm-amd64-avx512f-broadcast.S
4446
src/f32-gemm/gen/f32-gemm-1x32-minmax-asm-amd64-avx512f-broadcast.S
45-
src/f32-gemm/gen/f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S
4647
src/f32-gemm/gen/f32-gemm-1x64-minmax-asm-amd64-avx512f-broadcast.S
4748
src/f32-gemm/gen/f32-gemm-2x16-minmax-asm-amd64-avx512f-broadcast.S
4849
src/f32-gemm/gen/f32-gemm-2x16c2-minmax-asm-amd64-avx512f-broadcast.S
@@ -62,7 +63,6 @@ SET(NON_PROD_AMD64_ASM_MICROKERNEL_SRCS
6263
src/f32-gemm/gen/f32-gemm-5x16-minmax-asm-amd64-avx512f-broadcast.S
6364
src/f32-gemm/gen/f32-gemm-5x16c2-minmax-asm-amd64-avx512f-broadcast.S
6465
src/f32-gemm/gen/f32-gemm-5x32-minmax-asm-amd64-avx512f-broadcast.S
65-
src/f32-gemm/gen/f32-gemm-5x32c2-minmax-asm-amd64-avx512f-broadcast.S
6666
src/f32-gemm/gen/f32-gemm-5x64-minmax-asm-amd64-avx512f-broadcast.S
6767
src/f32-gemm/gen/f32-gemm-6x16-minmax-asm-amd64-avx512f-broadcast.S
6868
src/f32-gemm/gen/f32-gemm-6x16c2-minmax-asm-amd64-avx512f-broadcast.S

gen/amd64_microkernels.bzl

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Auto-generated file. Do not edit!
88
PROD_AMD64_ASM_MICROKERNEL_SRCS = [
99
"src/bf16-f32-gemm/gen/bf16-f32-gemm-1x32c2-minmax-asm-amd64-avx512bf16-broadcast.S",
1010
"src/bf16-f32-gemm/gen/bf16-f32-gemm-7x32c2-minmax-asm-amd64-avx512bf16-broadcast.S",
11+
"src/f32-gemm/gen/f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S",
12+
"src/f32-gemm/gen/f32-gemm-5x32c2-minmax-asm-amd64-avx512f-broadcast.S",
1113
]
1214

1315
NON_PROD_AMD64_ASM_MICROKERNEL_SRCS = [
@@ -39,7 +41,6 @@ NON_PROD_AMD64_ASM_MICROKERNEL_SRCS = [
3941
"src/f32-gemm/gen/f32-gemm-1x16-minmax-asm-amd64-avx512f-broadcast.S",
4042
"src/f32-gemm/gen/f32-gemm-1x16c2-minmax-asm-amd64-avx512f-broadcast.S",
4143
"src/f32-gemm/gen/f32-gemm-1x32-minmax-asm-amd64-avx512f-broadcast.S",
42-
"src/f32-gemm/gen/f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S",
4344
"src/f32-gemm/gen/f32-gemm-1x64-minmax-asm-amd64-avx512f-broadcast.S",
4445
"src/f32-gemm/gen/f32-gemm-2x16-minmax-asm-amd64-avx512f-broadcast.S",
4546
"src/f32-gemm/gen/f32-gemm-2x16c2-minmax-asm-amd64-avx512f-broadcast.S",
@@ -59,7 +60,6 @@ NON_PROD_AMD64_ASM_MICROKERNEL_SRCS = [
5960
"src/f32-gemm/gen/f32-gemm-5x16-minmax-asm-amd64-avx512f-broadcast.S",
6061
"src/f32-gemm/gen/f32-gemm-5x16c2-minmax-asm-amd64-avx512f-broadcast.S",
6162
"src/f32-gemm/gen/f32-gemm-5x32-minmax-asm-amd64-avx512f-broadcast.S",
62-
"src/f32-gemm/gen/f32-gemm-5x32c2-minmax-asm-amd64-avx512f-broadcast.S",
6363
"src/f32-gemm/gen/f32-gemm-5x64-minmax-asm-amd64-avx512f-broadcast.S",
6464
"src/f32-gemm/gen/f32-gemm-6x16-minmax-asm-amd64-avx512f-broadcast.S",
6565
"src/f32-gemm/gen/f32-gemm-6x16c2-minmax-asm-amd64-avx512f-broadcast.S",

src/configs/gemm-config.c

+541-134
Large diffs are not rendered by default.

src/operators/convolution-nhwc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,7 @@ enum xnn_status xnn_create_convolution2d_nhwc_f32(
18241824
xnn_code_cache_t code_cache,
18251825
xnn_weights_cache_t weights_cache,
18261826
xnn_operator_t* convolution_op_out) {
1827-
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
1827+
const struct xnn_gemm_config* gemm_config = xnn_init_f32_igemm_config();
18281828
if (gemm_config == NULL) {
18291829
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
18301830
xnn_operator_type_to_string(xnn_operator_type_convolution_nhwc_f32));

src/operators/deconvolution-nhwc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,7 @@ enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
11071107
return xnn_status_invalid_parameter;
11081108
}
11091109

1110-
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
1110+
const struct xnn_gemm_config* gemm_config = xnn_init_f32_igemm_config();
11111111
if (gemm_config == NULL) {
11121112
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
11131113
xnn_operator_type_to_string(xnn_operator_type_deconvolution_nhwc_f32));

src/xnnpack/config.h

+1
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ XNN_INTERNAL const struct xnn_gemm_config* xnn_init_bf16_f32_gemm_config();
244244
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f16_gemm_config();
245245
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_config();
246246
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_nr2_config();
247+
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_igemm_config();
247248
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_qc8w_gemm_config();
248249
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_qc4w_gemm_config();
249250
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_pf16_gemm_config();

0 commit comments

Comments
 (0)