Skip to content

Commit 025e3b6

Browse files
fbarchardxnnpack-bot
authored andcommitted
MRx2 GEMM/IGEMM fma3 variant
PiperOrigin-RevId: 719127729
1 parent a108468 commit 025e3b6

29 files changed

+1534
-156
lines changed

bench/f32-gemm-minmax.cc

+33-11
Original file line numberDiff line numberDiff line change
@@ -3421,6 +3421,17 @@
34213421

34223422
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_4x2c4__sse)
34233423

3424+
static void f32_gemm_minmax_ukernel_6x2c4__sse(benchmark::State& state, const char* net) {
3425+
GEMMBenchmark(state,
3426+
xnn_f32_gemm_minmax_ukernel_6x2c4__sse,
3427+
xnn_init_f32_minmax_scalar_params,
3428+
xnn_pack_f32_gemm_goi_w,
3429+
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
3430+
/*isa_check=*/nullptr);
3431+
}
3432+
3433+
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__sse)
3434+
34243435
static void f32_gemm_minmax_ukernel_4x8__sse_dup(benchmark::State& state, const char* net) {
34253436
GEMMBenchmark(state,
34263437
xnn_f32_gemm_minmax_ukernel_4x8__sse_dup,
@@ -3487,17 +3498,6 @@
34873498

34883499
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_5x8s4__sse)
34893500

3490-
static void f32_gemm_minmax_ukernel_6x2c4__sse(benchmark::State& state, const char* net) {
3491-
GEMMBenchmark(state,
3492-
xnn_f32_gemm_minmax_ukernel_6x2c4__sse,
3493-
xnn_init_f32_minmax_scalar_params,
3494-
xnn_pack_f32_gemm_goi_w,
3495-
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
3496-
/*isa_check=*/nullptr);
3497-
}
3498-
3499-
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__sse)
3500-
35013501
static void f32_gemm_minmax_ukernel_6x8__sse_dup(benchmark::State& state, const char* net) {
35023502
GEMMBenchmark(state,
35033503
xnn_f32_gemm_minmax_ukernel_6x8__sse_dup,
@@ -3531,6 +3531,28 @@
35313531

35323532
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x8s4__sse)
35333533

3534+
static void f32_gemm_minmax_ukernel_4x2c4__fma3(benchmark::State& state, const char* net) {
3535+
GEMMBenchmark(state,
3536+
xnn_f32_gemm_minmax_ukernel_4x2c4__fma3,
3537+
xnn_init_f32_minmax_scalar_params,
3538+
xnn_pack_f32_gemm_goi_w,
3539+
/*mr=*/4, /*nr=*/2, /*kr=*/4, /*sr=*/1,
3540+
benchmark::utils::CheckFMA3);
3541+
}
3542+
3543+
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_4x2c4__fma3)
3544+
3545+
static void f32_gemm_minmax_ukernel_6x2c4__fma3(benchmark::State& state, const char* net) {
3546+
GEMMBenchmark(state,
3547+
xnn_f32_gemm_minmax_ukernel_6x2c4__fma3,
3548+
xnn_init_f32_minmax_scalar_params,
3549+
xnn_pack_f32_gemm_goi_w,
3550+
/*mr=*/6, /*nr=*/2, /*kr=*/4, /*sr=*/1,
3551+
benchmark::utils::CheckFMA3);
3552+
}
3553+
3554+
BENCHMARK_GEMM(f32_gemm_minmax_ukernel_6x2c4__fma3)
3555+
35343556
static void f32_gemm_minmax_ukernel_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
35353557
GEMMBenchmark(state,
35363558
xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,

cmake/gen/avx512f_microkernels.cmake

+6-6
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,9 @@ SET(PROD_AVX512F_MICROKERNEL_SRCS
1515
src/f32-dwconv/gen/f32-dwconv-5f5m5l32c16s1r-minmax-avx512f.c
1616
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx512f.c
1717
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f.c
18-
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c
1918
src/f32-gemm/gen/f32-gemm-1x32-minmax-avx512f-broadcast.c
20-
src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c
2119
src/f32-gemm/gen/f32-gemm-7x32-minmax-avx512f-broadcast.c
22-
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c
2320
src/f32-igemm/gen/f32-igemm-1x32-minmax-avx512f-broadcast.c
24-
src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c
2521
src/f32-igemm/gen/f32-igemm-7x32-minmax-avx512f-broadcast.c
2622
src/f32-raddstoreexpminusmax/gen/f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc2.c
2723
src/f32-rdsum/gen/f32-rdsum-7p7x-minmax-avx512f-c64.c
@@ -68,8 +64,6 @@ SET(PROD_AVX512F_MICROKERNEL_SRCS
6864
src/f32-vunary/gen/f32-vabs-avx512f.c
6965
src/f32-vunary/gen/f32-vneg-avx512f.c
7066
src/f32-vunary/gen/f32-vsqr-avx512f.c
71-
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c
72-
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c
7367
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u8.c
7468
src/x32-packw/gen/x32-packw-x32-gemm-goi-avx512f-u4-prfm.c)
7569

@@ -89,6 +83,7 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
8983
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f-acc2.c
9084
src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f-acc2.c
9185
src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f.c
86+
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c
9287
src/f32-gemm/gen/f32-gemm-1x64-minmax-avx512f-broadcast.c
9388
src/f32-gemm/gen/f32-gemm-4x16-minmax-avx512f-broadcast.c
9489
src/f32-gemm/gen/f32-gemm-4x32-minmax-avx512f-broadcast.c
@@ -99,6 +94,7 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
9994
src/f32-gemm/gen/f32-gemm-6x16-minmax-avx512f-broadcast.c
10095
src/f32-gemm/gen/f32-gemm-6x32-minmax-avx512f-broadcast.c
10196
src/f32-gemm/gen/f32-gemm-6x64-minmax-avx512f-broadcast.c
97+
src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c
10298
src/f32-gemm/gen/f32-gemm-7x64-minmax-avx512f-broadcast.c
10399
src/f32-gemm/gen/f32-gemm-8x16-minmax-avx512f-broadcast.c
104100
src/f32-gemm/gen/f32-gemm-8x32-minmax-avx512f-broadcast.c
@@ -133,12 +129,14 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
133129
src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx512f-broadcast.c
134130
src/f32-gemminc/gen/f32-gemminc-7x16-minmax-avx512f-broadcast.c
135131
src/f32-gemminc/gen/f32-gemminc-8x16-minmax-avx512f-broadcast.c
132+
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c
136133
src/f32-igemm/gen/f32-igemm-4x16-minmax-avx512f-broadcast.c
137134
src/f32-igemm/gen/f32-igemm-4x32-minmax-avx512f-broadcast.c
138135
src/f32-igemm/gen/f32-igemm-5x16-minmax-avx512f-broadcast.c
139136
src/f32-igemm/gen/f32-igemm-5x32-minmax-avx512f-broadcast.c
140137
src/f32-igemm/gen/f32-igemm-6x16-minmax-avx512f-broadcast.c
141138
src/f32-igemm/gen/f32-igemm-6x32-minmax-avx512f-broadcast.c
139+
src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c
142140
src/f32-igemm/gen/f32-igemm-8x16-minmax-avx512f-broadcast.c
143141
src/f32-igemm/gen/f32-igemm-8x32-minmax-avx512f-broadcast.c
144142
src/f32-raddexpminusmax/gen/f32-raddexpminusmax-avx512f-p5-scalef-u64-acc2.c
@@ -277,6 +275,8 @@ SET(NON_PROD_AVX512F_MICROKERNEL_SRCS
277275
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1-prfm.c
278276
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1.c
279277
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8-prfm.c
278+
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c
279+
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c
280280
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c
281281
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1-prfm.c
282282
src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1.c

cmake/gen/avx_microkernels.cmake

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,9 @@ SET(PROD_AVX_MICROKERNEL_SRCS
1717
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx.c
1818
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-avx.c
1919
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u24.c
20-
src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c
2120
src/f32-gemm/gen/f32-gemm-1x16-minmax-avx-broadcast.c
22-
src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c
2321
src/f32-gemm/gen/f32-gemm-5x16-minmax-avx-broadcast.c
24-
src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c
2522
src/f32-igemm/gen/f32-igemm-1x16-minmax-avx-broadcast.c
26-
src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c
2723
src/f32-igemm/gen/f32-igemm-5x16-minmax-avx-broadcast.c
2824
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-avx-broadcast.c
2925
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-avx-broadcast.c
@@ -105,8 +101,6 @@ SET(PROD_AVX_MICROKERNEL_SRCS
105101
src/qu8-vmul/gen/qu8-vmul-minmax-fp32-avx-mul16-ld64-u16.c
106102
src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c
107103
src/x8-lut/gen/x8-lut-avx-u64.c
108-
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c
109-
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c
110104
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u8.c
111105
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx-u4.c
112106
src/x32-packw/gen/x32-packw-x16s4-gemm-goi-avx-u4.c
@@ -147,9 +141,11 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
147141
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u8.c
148142
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u16.c
149143
src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u32.c
144+
src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c
150145
src/f32-gemm/gen/f32-gemm-3x16-minmax-avx-broadcast.c
151146
src/f32-gemm/gen/f32-gemm-4x8-minmax-avx-broadcast.c
152147
src/f32-gemm/gen/f32-gemm-4x16-minmax-avx-broadcast.c
148+
src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c
153149
src/f32-gemm/gen/f32-gemm-6x8-minmax-avx-broadcast.c
154150
src/f32-gemm/gen/f32-gemm-6x16-minmax-avx-broadcast.c
155151
src/f32-gemm/gen/f32-gemm-7x8-minmax-avx-broadcast.c
@@ -163,9 +159,11 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
163159
src/f32-gemminc/gen/f32-gemminc-6x8-minmax-avx-broadcast.c
164160
src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx-broadcast.c
165161
src/f32-gemminc/gen/f32-gemminc-7x8-minmax-avx-broadcast.c
162+
src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c
166163
src/f32-igemm/gen/f32-igemm-3x16-minmax-avx-broadcast.c
167164
src/f32-igemm/gen/f32-igemm-4x8-minmax-avx-broadcast.c
168165
src/f32-igemm/gen/f32-igemm-4x16-minmax-avx-broadcast.c
166+
src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c
169167
src/f32-igemm/gen/f32-igemm-6x8-minmax-avx-broadcast.c
170168
src/f32-igemm/gen/f32-igemm-6x16-minmax-avx-broadcast.c
171169
src/f32-igemm/gen/f32-igemm-7x8-minmax-avx-broadcast.c
@@ -459,7 +457,9 @@ SET(NON_PROD_AVX_MICROKERNEL_SRCS
459457
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1-prfm.c
460458
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1.c
461459
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8-prfm.c
460+
src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c
462461
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4-prfm.c
462+
src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c
463463
src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4-prfm.c
464464
src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4.c
465465
src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u1-prfm.c

cmake/gen/fma3_microkernels.cmake

+12-6
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,15 @@ SET(PROD_FMA3_MICROKERNEL_SRCS
2222
src/f32-dwconv/gen/f32-dwconv-5f5m5l8c8s4r-minmax-fma3.c
2323
src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-fma3.c
2424
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-fma3.c
25-
src/f32-gemm/gen/f32-gemm-1x8-minmax-fma3-broadcast.c
2625
src/f32-gemm/gen/f32-gemm-1x16-minmax-fma3-broadcast.c
2726
src/f32-gemm/gen/f32-gemm-1x16s4-minmax-fma3-broadcast.c
28-
src/f32-gemm/gen/f32-gemm-4x8-minmax-fma3-broadcast.c
27+
src/f32-gemm/gen/f32-gemm-4x2c4-minmax-fma3.c
2928
src/f32-gemm/gen/f32-gemm-4x16s4-minmax-fma3-broadcast.c
30-
src/f32-gemm/gen/f32-gemm-5x8-minmax-fma3-broadcast.c
3129
src/f32-gemm/gen/f32-gemm-5x16-minmax-fma3-broadcast.c
32-
src/f32-igemm/gen/f32-igemm-1x8-minmax-fma3-broadcast.c
3330
src/f32-igemm/gen/f32-igemm-1x16-minmax-fma3-broadcast.c
3431
src/f32-igemm/gen/f32-igemm-1x16s4-minmax-fma3-broadcast.c
35-
src/f32-igemm/gen/f32-igemm-4x8-minmax-fma3-broadcast.c
32+
src/f32-igemm/gen/f32-igemm-4x2c4-minmax-fma3.c
3633
src/f32-igemm/gen/f32-igemm-4x16s4-minmax-fma3-broadcast.c
37-
src/f32-igemm/gen/f32-igemm-5x8-minmax-fma3-broadcast.c
3834
src/f32-igemm/gen/f32-igemm-5x16-minmax-fma3-broadcast-prfm.c
3935
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-fma3-broadcast.c
4036
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-fma3-broadcast.c
@@ -123,10 +119,14 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
123119
src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-fma3-acc2.c
124120
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-fma3-acc2.c
125121
src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-fma3.c
122+
src/f32-gemm/gen/f32-gemm-1x8-minmax-fma3-broadcast.c
126123
src/f32-gemm/gen/f32-gemm-3x16-minmax-fma3-broadcast.c
127124
src/f32-gemm/gen/f32-gemm-3x16s4-minmax-fma3-broadcast.c
125+
src/f32-gemm/gen/f32-gemm-4x8-minmax-fma3-broadcast.c
128126
src/f32-gemm/gen/f32-gemm-4x16-minmax-fma3-broadcast.c
127+
src/f32-gemm/gen/f32-gemm-5x8-minmax-fma3-broadcast.c
129128
src/f32-gemm/gen/f32-gemm-5x16s4-minmax-fma3-broadcast.c
129+
src/f32-gemm/gen/f32-gemm-6x2c4-minmax-fma3.c
130130
src/f32-gemm/gen/f32-gemm-6x8-minmax-fma3-broadcast.c
131131
src/f32-gemm/gen/f32-gemm-6x16-minmax-fma3-broadcast.c
132132
src/f32-gemm/gen/f32-gemm-6x16s4-minmax-fma3-broadcast.c
@@ -148,11 +148,15 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
148148
src/f32-gemminc/gen/f32-gemminc-6x16s4-minmax-fma3-broadcast.c
149149
src/f32-gemminc/gen/f32-gemminc-7x8-minmax-fma3-broadcast.c
150150
src/f32-gemminc/gen/f32-gemminc-8x8-minmax-fma3-broadcast.c
151+
src/f32-igemm/gen/f32-igemm-1x8-minmax-fma3-broadcast.c
151152
src/f32-igemm/gen/f32-igemm-3x16-minmax-fma3-broadcast.c
152153
src/f32-igemm/gen/f32-igemm-3x16s4-minmax-fma3-broadcast.c
154+
src/f32-igemm/gen/f32-igemm-4x8-minmax-fma3-broadcast.c
153155
src/f32-igemm/gen/f32-igemm-4x16-minmax-fma3-broadcast.c
156+
src/f32-igemm/gen/f32-igemm-5x8-minmax-fma3-broadcast.c
154157
src/f32-igemm/gen/f32-igemm-5x16-minmax-fma3-broadcast.c
155158
src/f32-igemm/gen/f32-igemm-5x16s4-minmax-fma3-broadcast.c
159+
src/f32-igemm/gen/f32-igemm-6x2c4-minmax-fma3.c
156160
src/f32-igemm/gen/f32-igemm-6x8-minmax-fma3-broadcast.c
157161
src/f32-igemm/gen/f32-igemm-6x16-minmax-fma3-broadcast-prfm.c
158162
src/f32-igemm/gen/f32-igemm-6x16-minmax-fma3-broadcast.c
@@ -167,7 +171,9 @@ SET(NON_PROD_FMA3_MICROKERNEL_SRCS
167171
src/f32-qc4w-gemm/gen/f32-qc4w-gemm-8x16-minmax-fma3-broadcast.c
168172
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-2x16-minmax-fma3-broadcast.c
169173
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-3x16-minmax-fma3-broadcast.c
174+
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-4x2c4-minmax-fma3.c
170175
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-4x16-minmax-fma3-broadcast.c
176+
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-6x2c4-minmax-fma3.c
171177
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-6x16-minmax-fma3-broadcast.c
172178
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-7x16-minmax-fma3-broadcast.c
173179
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-8x16-minmax-fma3-broadcast.c

gen/avx512f_microkernels.bzl

+6-6
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,9 @@ PROD_AVX512F_MICROKERNEL_SRCS = [
1111
"src/f32-dwconv/gen/f32-dwconv-5f5m5l32c16s1r-minmax-avx512f.c",
1212
"src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx512f.c",
1313
"src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f.c",
14-
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c",
1514
"src/f32-gemm/gen/f32-gemm-1x32-minmax-avx512f-broadcast.c",
16-
"src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c",
1715
"src/f32-gemm/gen/f32-gemm-7x32-minmax-avx512f-broadcast.c",
18-
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c",
1916
"src/f32-igemm/gen/f32-igemm-1x32-minmax-avx512f-broadcast.c",
20-
"src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c",
2117
"src/f32-igemm/gen/f32-igemm-7x32-minmax-avx512f-broadcast.c",
2218
"src/f32-raddstoreexpminusmax/gen/f32-raddstoreexpminusmax-avx512f-rr2-p5-u64-acc2.c",
2319
"src/f32-rdsum/gen/f32-rdsum-7p7x-minmax-avx512f-c64.c",
@@ -64,8 +60,6 @@ PROD_AVX512F_MICROKERNEL_SRCS = [
6460
"src/f32-vunary/gen/f32-vabs-avx512f.c",
6561
"src/f32-vunary/gen/f32-vneg-avx512f.c",
6662
"src/f32-vunary/gen/f32-vsqr-avx512f.c",
67-
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c",
68-
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c",
6963
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u8.c",
7064
"src/x32-packw/gen/x32-packw-x32-gemm-goi-avx512f-u4-prfm.c",
7165
]
@@ -86,6 +80,7 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
8680
"src/f32-dwconv/gen/f32-dwconv-25p16c-minmax-avx512f-acc2.c",
8781
"src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f-acc2.c",
8882
"src/f32-dwconv/gen/f32-dwconv-25p32c-minmax-avx512f.c",
83+
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx512f-broadcast.c",
8984
"src/f32-gemm/gen/f32-gemm-1x64-minmax-avx512f-broadcast.c",
9085
"src/f32-gemm/gen/f32-gemm-4x16-minmax-avx512f-broadcast.c",
9186
"src/f32-gemm/gen/f32-gemm-4x32-minmax-avx512f-broadcast.c",
@@ -96,6 +91,7 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
9691
"src/f32-gemm/gen/f32-gemm-6x16-minmax-avx512f-broadcast.c",
9792
"src/f32-gemm/gen/f32-gemm-6x32-minmax-avx512f-broadcast.c",
9893
"src/f32-gemm/gen/f32-gemm-6x64-minmax-avx512f-broadcast.c",
94+
"src/f32-gemm/gen/f32-gemm-7x16-minmax-avx512f-broadcast.c",
9995
"src/f32-gemm/gen/f32-gemm-7x64-minmax-avx512f-broadcast.c",
10096
"src/f32-gemm/gen/f32-gemm-8x16-minmax-avx512f-broadcast.c",
10197
"src/f32-gemm/gen/f32-gemm-8x32-minmax-avx512f-broadcast.c",
@@ -130,12 +126,14 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
130126
"src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx512f-broadcast.c",
131127
"src/f32-gemminc/gen/f32-gemminc-7x16-minmax-avx512f-broadcast.c",
132128
"src/f32-gemminc/gen/f32-gemminc-8x16-minmax-avx512f-broadcast.c",
129+
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx512f-broadcast.c",
133130
"src/f32-igemm/gen/f32-igemm-4x16-minmax-avx512f-broadcast.c",
134131
"src/f32-igemm/gen/f32-igemm-4x32-minmax-avx512f-broadcast.c",
135132
"src/f32-igemm/gen/f32-igemm-5x16-minmax-avx512f-broadcast.c",
136133
"src/f32-igemm/gen/f32-igemm-5x32-minmax-avx512f-broadcast.c",
137134
"src/f32-igemm/gen/f32-igemm-6x16-minmax-avx512f-broadcast.c",
138135
"src/f32-igemm/gen/f32-igemm-6x32-minmax-avx512f-broadcast.c",
136+
"src/f32-igemm/gen/f32-igemm-7x16-minmax-avx512f-broadcast.c",
139137
"src/f32-igemm/gen/f32-igemm-8x16-minmax-avx512f-broadcast.c",
140138
"src/f32-igemm/gen/f32-igemm-8x32-minmax-avx512f-broadcast.c",
141139
"src/f32-raddexpminusmax/gen/f32-raddexpminusmax-avx512f-p5-scalef-u64-acc2.c",
@@ -274,6 +272,8 @@ NON_PROD_AVX512F_MICROKERNEL_SRCS = [
274272
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1-prfm.c",
275273
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u1.c",
276274
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8-prfm.c",
275+
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx512f-u8.c",
276+
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c",
277277
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c",
278278
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1-prfm.c",
279279
"src/x32-packw/gen/x32-packw-x32-gemm-gio-avx512f-u1.c",

gen/avx_microkernels.bzl

+6-6
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,9 @@ PROD_AVX_MICROKERNEL_SRCS = [
1313
"src/f32-dwconv/gen/f32-dwconv-9p16c-minmax-avx.c",
1414
"src/f32-dwconv/gen/f32-dwconv-25p8c-minmax-avx.c",
1515
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u24.c",
16-
"src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c",
1716
"src/f32-gemm/gen/f32-gemm-1x16-minmax-avx-broadcast.c",
18-
"src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c",
1917
"src/f32-gemm/gen/f32-gemm-5x16-minmax-avx-broadcast.c",
20-
"src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c",
2118
"src/f32-igemm/gen/f32-igemm-1x16-minmax-avx-broadcast.c",
22-
"src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c",
2319
"src/f32-igemm/gen/f32-igemm-5x16-minmax-avx-broadcast.c",
2420
"src/f32-qc4w-gemm/gen/f32-qc4w-gemm-1x16-minmax-avx-broadcast.c",
2521
"src/f32-qc4w-gemm/gen/f32-qc4w-gemm-3x16-minmax-avx-broadcast.c",
@@ -101,8 +97,6 @@ PROD_AVX_MICROKERNEL_SRCS = [
10197
"src/qu8-vmul/gen/qu8-vmul-minmax-fp32-avx-mul16-ld64-u16.c",
10298
"src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-avx-mul16-ld64-u16.c",
10399
"src/x8-lut/gen/x8-lut-avx-u64.c",
104-
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c",
105-
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c",
106100
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u8.c",
107101
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx-u4.c",
108102
"src/x32-packw/gen/x32-packw-x16s4-gemm-goi-avx-u4.c",
@@ -144,9 +138,11 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
144138
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u8.c",
145139
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u16.c",
146140
"src/f32-f16-vcvt/gen/f32-f16-vcvt-avx-u32.c",
141+
"src/f32-gemm/gen/f32-gemm-1x8-minmax-avx-broadcast.c",
147142
"src/f32-gemm/gen/f32-gemm-3x16-minmax-avx-broadcast.c",
148143
"src/f32-gemm/gen/f32-gemm-4x8-minmax-avx-broadcast.c",
149144
"src/f32-gemm/gen/f32-gemm-4x16-minmax-avx-broadcast.c",
145+
"src/f32-gemm/gen/f32-gemm-5x8-minmax-avx-broadcast.c",
150146
"src/f32-gemm/gen/f32-gemm-6x8-minmax-avx-broadcast.c",
151147
"src/f32-gemm/gen/f32-gemm-6x16-minmax-avx-broadcast.c",
152148
"src/f32-gemm/gen/f32-gemm-7x8-minmax-avx-broadcast.c",
@@ -160,9 +156,11 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
160156
"src/f32-gemminc/gen/f32-gemminc-6x8-minmax-avx-broadcast.c",
161157
"src/f32-gemminc/gen/f32-gemminc-6x16-minmax-avx-broadcast.c",
162158
"src/f32-gemminc/gen/f32-gemminc-7x8-minmax-avx-broadcast.c",
159+
"src/f32-igemm/gen/f32-igemm-1x8-minmax-avx-broadcast.c",
163160
"src/f32-igemm/gen/f32-igemm-3x16-minmax-avx-broadcast.c",
164161
"src/f32-igemm/gen/f32-igemm-4x8-minmax-avx-broadcast.c",
165162
"src/f32-igemm/gen/f32-igemm-4x16-minmax-avx-broadcast.c",
163+
"src/f32-igemm/gen/f32-igemm-5x8-minmax-avx-broadcast.c",
166164
"src/f32-igemm/gen/f32-igemm-6x8-minmax-avx-broadcast.c",
167165
"src/f32-igemm/gen/f32-igemm-6x16-minmax-avx-broadcast.c",
168166
"src/f32-igemm/gen/f32-igemm-7x8-minmax-avx-broadcast.c",
@@ -456,7 +454,9 @@ NON_PROD_AVX_MICROKERNEL_SRCS = [
456454
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1-prfm.c",
457455
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u1.c",
458456
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8-prfm.c",
457+
"src/x32-packw/gen/x32-packw-x8-gemm-gio-avx-u8.c",
459458
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4-prfm.c",
459+
"src/x32-packw/gen/x32-packw-x8-gemm-goi-avx-u4.c",
460460
"src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4-prfm.c",
461461
"src/x32-packw/gen/x32-packw-x8s4-gemm-goi-avx-u4.c",
462462
"src/x32-packw/gen/x32-packw-x16-gemm-gio-avx-u1-prfm.c",

0 commit comments

Comments
 (0)