Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1b85b6a

Browse files
authoredFeb 7, 2025··
Merge pull request #5108 from taoye9/sbgemm_neoversev1
Add SBGEMM for arm neoversev1
2 parents cae4806 + c748e6a commit 1b85b6a

9 files changed

+1077
-2
lines changed
 

‎CONTRIBUTORS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,5 @@ In chronological order:
240240
* Marek Michalowski <https://github.com/michalowski-arm>
241241
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
242242

243+
* Ye Tao <ye.tao@arm.com>
244+
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1

‎Makefile.arm64

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ ifeq ($(CORE), NEOVERSEV1)
101101
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
102102
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
103103
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
104-
CCOMMON_OPT += -march=armv8.4-a+sve
104+
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
105105
ifeq (1, $(ISCLANG))
106106
CCOMMON_OPT += -mtune=cortex-x1
107107
else
@@ -111,7 +111,7 @@ ifneq ($(F_COMPILER), NAG)
111111
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
112112
endif
113113
else
114-
CCOMMON_OPT += -march=armv8.4-a+sve
114+
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
115115
ifneq ($(CROSS), 1)
116116
CCOMMON_OPT += -mtune=native
117117
endif

‎kernel/arm64/KERNEL.NEOVERSEV1

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,17 @@ include $(KERNELDIR)/KERNEL.ARMV8SVE
22

33
SGEMVTKERNEL = gemv_t_sve_v1x3.c
44
DGEMVTKERNEL = gemv_t_sve_v1x3.c
5+
ifeq ($(BUILD_BFLOAT16), 1)
6+
SBGEMM_BETA = sbgemm_beta_neoversev1.c
7+
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversev1.c
8+
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
9+
SBGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversev1.c
10+
SBGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversev1.c
11+
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
12+
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
13+
endif
14+
SBGEMMONCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_N)_neoversev1.c
15+
SBGEMMOTCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_N)_neoversev1.c
16+
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
17+
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
18+
endif

‎kernel/arm64/sbgemm_beta_neoversev1.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/***************************************************************************
2+
* Copyright (c) 2024, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include "common.h"
30+
31+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, IFLOAT *dummy2,
32+
BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5, FLOAT *c,
33+
BLASLONG ldc) {
34+
35+
BLASLONG i, j;
36+
BLASLONG chunk, remain;
37+
FLOAT *c_offset1, *c_offset;
38+
c_offset = c;
39+
chunk = m >> 3;
40+
remain = m & 7;
41+
if (beta == ZERO) {
42+
for (j = n; j > 0; j--) {
43+
c_offset1 = c_offset;
44+
c_offset += ldc;
45+
for (i = chunk; i > 0; i--) {
46+
*(c_offset1 + 0) = ZERO;
47+
*(c_offset1 + 1) = ZERO;
48+
*(c_offset1 + 2) = ZERO;
49+
*(c_offset1 + 3) = ZERO;
50+
*(c_offset1 + 4) = ZERO;
51+
*(c_offset1 + 5) = ZERO;
52+
*(c_offset1 + 6) = ZERO;
53+
*(c_offset1 + 7) = ZERO;
54+
c_offset1 += 8;
55+
}
56+
for (i = remain; i > 0; i--) {
57+
*c_offset1 = ZERO;
58+
c_offset1++;
59+
}
60+
}
61+
} else {
62+
for (j = n; j > 0; j--) {
63+
c_offset1 = c_offset;
64+
c_offset += ldc;
65+
for (i = chunk; i > 0; i--) {
66+
*(c_offset1 + 0) *= beta;
67+
*(c_offset1 + 1) *= beta;
68+
*(c_offset1 + 2) *= beta;
69+
*(c_offset1 + 3) *= beta;
70+
*(c_offset1 + 4) *= beta;
71+
*(c_offset1 + 5) *= beta;
72+
*(c_offset1 + 6) *= beta;
73+
*(c_offset1 + 7) *= beta;
74+
c_offset1 += 8;
75+
}
76+
for (i = remain; i > 0; i--) {
77+
*c_offset1 *= beta;
78+
c_offset1++;
79+
}
80+
}
81+
}
82+
return 0;
83+
};
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/***************************************************************************
2+
* Copyright (c) 2024-2025, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include <arm_sve.h>
30+
31+
#include "common.h"
32+
33+
#define ALPHA_ONE
34+
#include "sbgemm_kernel_4x4_neoversev1_impl.c"
35+
#undef ALPHA_ONE
36+
#include "sbgemm_kernel_4x4_neoversev1_impl.c"
37+
38+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
39+
FLOAT *C, BLASLONG ldc) {
40+
if (alpha == 1.0f)
41+
return sbgemm_kernel_neoversev1_alpha_one(m, n, k, alpha, A, B, C, ldc);
42+
else
43+
return sbgemm_kernel_neoversev1_alpha(m, n, k, alpha, A, B, C, ldc);
44+
return 0;
45+
}
46+

0 commit comments

Comments
 (0)
Please sign in to comment.