Skip to content

Add throttling profile for SGEMM and SGEMV on NEOVERSEV2 #5141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,10 @@ In chronological order:
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
* [2025-01-21] Optimize gemv_t_sve_v1x3 kernel

* Marek Michalowski <https://github.com/michalowski-arm>
* Marek Michalowski <marek.michalowski@arm.com>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2`
* [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2`

* Ye Tao <[email protected]>
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1
23 changes: 23 additions & 0 deletions interface/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
}
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
static inline int get_gemm_optimal_nthreads_neoversev2(double MNK, int ncpu) {
return
MNK < 125000L ? 1
: MNK < 1092727L ? MIN(ncpu, 6)
: MNK < 2628072L ? MIN(ncpu, 8)
: MNK < 8000000L ? MIN(ncpu, 12)
: MNK < 20346417L ? MIN(ncpu, 16)
: MNK < 57066625L ? MIN(ncpu, 24)
: MNK < 91125000L ? MIN(ncpu, 28)
: MNK < 238328000L ? MIN(ncpu, 40)
: MNK < 454756609L ? MIN(ncpu, 48)
: MNK < 857375000L ? MIN(ncpu, 56)
: MNK < 1073741824L ? MIN(ncpu, 64)
: ncpu;
}
#endif

static inline int get_gemm_optimal_nthreads(double MNK) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
}
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
}
#endif
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
return 1;
Expand Down
16 changes: 16 additions & 0 deletions interface/gemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,30 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
}
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
return
MN < 24964L ? 1
: MN < 65536L ? MIN(ncpu, 8)
: MN < 262144L ? MIN(ncpu, 32)
: MN < 1638400L ? MIN(ncpu, 64)
: ncpu;
}
#endif

static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
}
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
}
#endif

if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
Expand Down
Loading