From b723c1b7b79663583e303e43132ee2e79ed1592c Mon Sep 17 00:00:00 2001 From: Marek Michalowski Date: Thu, 20 Feb 2025 10:18:47 +0000 Subject: [PATCH 1/2] Add thread throttling profile for SGEMM on `NEOVERSEV2` --- CONTRIBUTORS.md | 3 ++- interface/gemm.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f4a93aa1bc..80dd9211fe 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -237,8 +237,9 @@ In chronological order: * [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1 * [2025-01-21] Optimize gemv_t_sve_v1x3 kernel -* Marek Michalowski +* Marek Michalowski * [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1` + * [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2` * Ye Tao * [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1 diff --git a/interface/gemm.c b/interface/gemm.c index 2cd7d7b5c3..67ab42b484 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) { } #endif +#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2) +static inline int get_gemm_optimal_nthreads_neoversev2(double MNK, int ncpu) { + return + MNK < 125000L ? 1 + : MNK < 1092727L ? MIN(ncpu, 6) + : MNK < 2628072L ? MIN(ncpu, 8) + : MNK < 8000000L ? MIN(ncpu, 12) + : MNK < 20346417L ? MIN(ncpu, 16) + : MNK < 57066625L ? MIN(ncpu, 24) + : MNK < 91125000L ? MIN(ncpu, 28) + : MNK < 238328000L ? MIN(ncpu, 40) + : MNK < 454756609L ? MIN(ncpu, 48) + : MNK < 857375000L ? MIN(ncpu, 56) + : MNK < 1073741824L ? MIN(ncpu, 64) + : ncpu; +} +#endif + static inline int get_gemm_optimal_nthreads(double MNK) { int ncpu = num_cpu_avail(3); #if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu); +#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu); #elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) if (strcmp(gotoblas_corename(), "neoversev1") == 0) { return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu); } + if (strcmp(gotoblas_corename(), "neoversev2") == 0) { + return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu); + } #endif if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) { return 1; From 650a062e19e452cf1eb77617b14af4d8a838fc27 Mon Sep 17 00:00:00 2001 From: Marek Michalowski Date: Thu, 20 Feb 2025 10:19:40 +0000 Subject: [PATCH 2/2] Add thread throttling profile for SGEMV on `NEOVERSEV2` --- CONTRIBUTORS.md | 1 + interface/gemv.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 80dd9211fe..99166f5203 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -240,6 +240,7 @@ In chronological order: * Marek Michalowski * [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1` * [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2` + * [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2` * Ye Tao * [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1 diff --git a/interface/gemv.c b/interface/gemv.c index f91f364eed..4bcdf07c49 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -77,14 +77,30 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { } #endif +#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2) +static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) { + return + MN < 24964L ? 1 + : MN < 65536L ? MIN(ncpu, 8) + : MN < 262144L ? MIN(ncpu, 32) + : MN < 1638400L ? MIN(ncpu, 64) + : ncpu; +} +#endif + static inline int get_gemv_optimal_nthreads(BLASLONG MN) { int ncpu = num_cpu_avail(3); #if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); +#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); #elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) if (strcmp(gotoblas_corename(), "neoversev1") == 0) { return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); } + if (strcmp(gotoblas_corename(), "neoversev2") == 0) { + return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); + } #endif if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )