Skip to content

Commit 9da59da

Browse files
Added separate block sizes for zen5 and zen4 in dlp classic. (#508)
Added a new block size map macro for Zen5 (DLP_GEMM_BLKSZ_MAP_ZEN5) and moved the previous Zen4 block size map to a new macro (DLP_GEMM_BLKSZ_MAP_ZEN4). This ensures that Zen5 and Zen4 architectures use their own optimized block sizes.
1 parent 29e8155 commit 9da59da

2 files changed

Lines changed: 31 additions & 2 deletions

File tree

classic/config/dlp_gemm_blksz_map.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// ID,MC,NC,KC,MR,NR,PACKA_RS,PACKA_CS,PACKB_RS,PACKB_CS: ID = One of the
3434
// AOCL_DLP_OPERATION_TYPE enum.
3535

36-
#define DLP_GEMM_BLKSZ_MAP_ZEN4 \
36+
#define DLP_GEMM_BLKSZ_MAP_ZEN5 \
3737
XMACRO(U8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4 * 64, 64) \
3838
XMACRO(F32F32F32OF32, 192, 8064, 1024, 6, 64, 1, 6, 64, 1) \
3939
XMACRO(BF16BF16F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2) \
@@ -45,6 +45,18 @@
4545
XMACRO(F32OBF16, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2) \
4646
XMACRO(BF16U4F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2)
4747

48+
#define DLP_GEMM_BLKSZ_MAP_ZEN4 \
49+
XMACRO(U8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4 * 64, 64) \
50+
XMACRO(F32F32F32OF32, 192, 8064, 512, 6, 64, 1, 6, 64, 1) \
51+
XMACRO(BF16BF16F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2) \
52+
XMACRO(BF16S4F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2) \
53+
XMACRO(F16F16F16OF16, 288, 1024, 2048, 6, 128, 1, 12, 128, 1) \
54+
XMACRO(F32F16F32OF32, 192, 4096, 2048, 6, 64, 1, 6, 64, 1) \
55+
XMACRO(S8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4 * 64, 64) \
56+
XMACRO(U8S4S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4 * 64, 64) \
57+
XMACRO(F32OBF16, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2) \
58+
XMACRO(BF16U4F32OF32, 144, 1024, 4096, 6, 64, 0, 0, 2 * 64, 64 / 2)
59+
4860
#define DLP_GEMM_BLKSZ_MAP_ZEN \
4961
XMACRO(U8S8S32OS32, 144, 1024, 2048, 6, 64, 4, 24, 4 * 64, 64) \
5062
XMACRO(F32F32F32OF32, 144, 8160, 512, 6, 16, 1, 6, 16, 1) \

classic/config/dlp_gemm_config.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,15 @@ _dlp_gemm_cntx_init_func_map()
289289

290290
_DLP_GEMM_CNTX_UPD_FUNC_MAP_FOR_CONFIGURED_ARCH()
291291
#endif
292+
} else if (dlp_cpuid_is_avx512_supported() == TRUE) {
293+
#ifdef DLP_KERNELS_ZEN4
294+
DLP_GEMM_KERN_FUNC_MAP_AVX512
295+
DLP_GEMM_PACKA_FUNC_MAP_AVX512
296+
DLP_GEMM_PACKB_FUNC_MAP_AVX512
297+
DLP_GEMM_PACKBMXP_FUNC_MAP_AVX512
298+
299+
_DLP_GEMM_CNTX_UPD_FUNC_MAP_FOR_CONFIGURED_ARCH()
300+
#endif
292301
} else if (dlp_cpuid_is_avx2fma3_supported() == TRUE) {
293302
#ifdef DLP_KERNELS_ZEN3
294303
DLP_GEMM_KERN_FUNC_MAP_AVX2
@@ -376,7 +385,15 @@ _dlp_gemm_cntx_init_blksz_map()
376385
// the blocksize for a particular version of zen id is generalized
377386
// for all machines that support the ISA supported by that particular
378387
// zen id.
379-
if (dlp_cpuid_is_avx512vnni_supported() == TRUE) {
388+
if (dlp_cpuid_is_similar_zen5_arch() == TRUE) {
389+
DLP_GEMM_BLKSZ_MAP_ZEN5
390+
391+
// Fallback to zen3 blocksizes has the same logic for both
392+
// zen5 and zen4.
393+
if (global_dlp_gemmenable_arch == DLP_ARCH_ZEN3) {
394+
DLP_GEMM_BLKSZ_UPD_MAP_ZEN4_TO_ZEN
395+
}
396+
} else if (dlp_cpuid_is_avx512_supported() == TRUE) {
380397
DLP_GEMM_BLKSZ_MAP_ZEN4
381398

382399
if (global_dlp_gemmenable_arch == DLP_ARCH_ZEN3) {

0 commit comments

Comments
 (0)