Skip to content

Commit a2c7029

Browse files
committed
sve - initial SVE backend framework
1 parent 8305768 commit a2c7029

File tree

6 files changed

+333
-2
lines changed

6 files changed

+333
-2
lines changed

Makefile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ blocked.c := $(sort $(wildcard backends/blocked/*.c))
223223
ceedmemcheck.c := $(sort $(wildcard backends/memcheck/*.c))
224224
opt.c := $(sort $(wildcard backends/opt/*.c))
225225
avx.c := $(sort $(wildcard backends/avx/*.c))
226+
sve.c := $(sort $(wildcard backends/sve/*.c))
226227
xsmm.c := $(sort $(wildcard backends/xsmm/*.c))
227228
cuda.c := $(sort $(wildcard backends/cuda/*.c))
228229
cuda.cpp := $(sort $(wildcard backends/cuda/*.cpp))
@@ -283,6 +284,7 @@ info:
283284
$(info ------------------------------------)
284285
$(info MEMCHK_STATUS = $(MEMCHK_STATUS)$(call backend_status,$(MEMCHK_BACKENDS)))
285286
$(info AVX_STATUS = $(AVX_STATUS)$(call backend_status,$(AVX_BACKENDS)))
287+
$(info SVE_STATUS = $(SVE_STATUS)$(call backend_status,$(SVE_BACKENDS)))
286288
$(info XSMM_DIR = $(XSMM_DIR)$(call backend_status,$(XSMM_BACKENDS)))
287289
$(info OCCA_DIR = $(OCCA_DIR)$(call backend_status,$(OCCA_BACKENDS)))
288290
$(info MAGMA_DIR = $(MAGMA_DIR)$(call backend_status,$(MAGMA_BACKENDS)))
@@ -323,7 +325,7 @@ ifeq ($(MEMCHK),1)
323325
BACKENDS_MAKE += $(MEMCHK_BACKENDS)
324326
endif
325327

326-
# AVX Backed
328+
# AVX Backends
327329
AVX_STATUS = Disabled
328330
AVX_FLAG := $(if $(filter clang,$(CC_VENDOR)),+avx,-mavx)
329331
AVX := $(filter $(AVX_FLAG),$(shell $(CC) $(OPT) -v -E -x c /dev/null 2>&1))
@@ -334,6 +336,17 @@ ifneq ($(AVX),)
334336
BACKENDS_MAKE += $(AVX_BACKENDS)
335337
endif
336338

339+
# SVE Backends
340+
SVE_STATUS = Disabled
341+
SVE_FLAG := $(if $(filter clang,$(CC_VENDOR)),+sve,-msve)
342+
SVE := 1
343+
SVE_BACKENDS = /cpu/self/sve/serial /cpu/self/sve/blocked
344+
ifneq ($(SVE),)
345+
SVE_STATUS = Enabled
346+
libceed.c += $(sve.c)
347+
BACKENDS_MAKE += $(SVE_BACKENDS)
348+
endif
349+
337350
# Collect list of libraries and paths for use in linking and pkg-config
338351
PKG_LIBS =
339352

@@ -410,7 +423,7 @@ ifneq ($(HIP_LIB_DIR),)
410423
BACKENDS_MAKE += $(HIP_BACKENDS)
411424
endif
412425

413-
# MAGMA Backend
426+
# MAGMA Backends
414427
ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),)
415428
MAGMA_ARCH=$(shell nm -g $(MAGMA_DIR)/lib/libmagma.* | grep -c "hipblas")
416429
ifeq ($(MAGMA_ARCH), 0) #CUDA MAGMA

backends/ceed-backend-list.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ MACRO(CeedRegister_Opt_Blocked, 1, "/cpu/self/opt/blocked")
2222
MACRO(CeedRegister_Opt_Serial, 1, "/cpu/self/opt/serial")
2323
MACRO(CeedRegister_Ref, 1, "/cpu/self/ref/serial")
2424
MACRO(CeedRegister_Ref_Blocked, 1, "/cpu/self/ref/blocked")
25+
MACRO(CeedRegister_Sve_Serial, 1, "/cpu/self/sve/serial")
26+
MACRO(CeedRegister_Sve_Blocked, 1, "/cpu/self/sve/blocked")
2527
MACRO(CeedRegister_Tmpl, 1, "/cpu/self/tmpl")
2628
MACRO(CeedRegister_Tmpl_Sub, 1, "/cpu/self/tmpl/sub")
2729
MACRO(CeedRegister_Xsmm_Blocked, 1, "/cpu/self/xsmm/blocked")

backends/sve/ceed-sve-blocked.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed/ceed.h>
18+
#include <ceed/backend.h>
19+
#include <stdbool.h>
20+
#include <string.h>
21+
#include "ceed-sve.h"
22+
23+
//------------------------------------------------------------------------------
24+
// Backend Init
25+
//------------------------------------------------------------------------------
26+
static int CeedInit_Sve(const char *resource, Ceed ceed) {
27+
int ierr;
28+
if (strcmp(resource, "/cpu/self") && strcmp(resource, "/cpu/self/sve") &&
29+
strcmp(resource, "/cpu/self/sve/blocked"))
30+
// LCOV_EXCL_START
31+
return CeedError(ceed, CEED_ERROR_BACKEND,
32+
"SVE backend cannot use resource: %s", resource);
33+
// LCOV_EXCL_STOP
34+
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);
35+
36+
// Create reference CEED that implementation will be dispatched
37+
// through unless overridden
38+
Ceed ceed_ref;
39+
CeedInit("/cpu/self/opt/blocked", &ceed_ref);
40+
ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr);
41+
42+
// TODO: Make f64 and f32 versions
43+
ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate",
44+
CeedTensorContractCreate_f64_Sve);
45+
CeedChkBackend(ierr);
46+
47+
return CEED_ERROR_SUCCESS;
48+
}
49+
50+
//------------------------------------------------------------------------------
51+
// Backend Register
52+
//------------------------------------------------------------------------------
53+
CEED_INTERN int CeedRegister_Sve_Blocked(void) {
54+
return CeedRegister("/cpu/self/sve/blocked", CeedInit_Sve, 30);
55+
}
56+
//------------------------------------------------------------------------------

backends/sve/ceed-sve-serial.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed/ceed.h>
18+
#include <ceed/backend.h>
19+
#include <stdbool.h>
20+
#include <string.h>
21+
#include "ceed-sve.h"
22+
23+
//------------------------------------------------------------------------------
24+
// Backend Init
25+
//------------------------------------------------------------------------------
26+
static int CeedInit_Sve(const char *resource, Ceed ceed) {
27+
int ierr;
28+
if (strcmp(resource, "/cpu/self")
29+
&& strcmp(resource, "/cpu/self/sve/serial"))
30+
// LCOV_EXCL_START
31+
return CeedError(ceed, CEED_ERROR_BACKEND,
32+
"SVE backend cannot use resource: %s", resource);
33+
// LCOV_EXCL_STOP
34+
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);
35+
36+
// Create reference CEED that implementation will be dispatched
37+
// through unless overridden
38+
Ceed ceed_ref;
39+
CeedInit("/cpu/self/opt/serial", &ceed_ref);
40+
ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr);
41+
42+
// TODO: Make f64 and f32 versions
43+
ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate",
44+
CeedTensorContractCreate_f64_Sve);
45+
CeedChkBackend(ierr);
46+
47+
return CEED_ERROR_SUCCESS;
48+
}
49+
50+
//------------------------------------------------------------------------------
51+
// Backend Register
52+
//------------------------------------------------------------------------------
53+
CEED_INTERN int CeedRegister_Sve_Serial(void) {
54+
return CeedRegister("/cpu/self/sve/serial", CeedInit_Sve, 35);
55+
}
56+
//------------------------------------------------------------------------------

backends/sve/ceed-sve-tensor-f64.c

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed/ceed.h>
18+
#include <ceed/backend.h>
19+
#ifdef __ARM_FEATURE_SVE
20+
#include <arm_sve.h>
21+
#endif
22+
#include <stdbool.h>
23+
#include "ceed-sve.h"
24+
25+
//------------------------------------------------------------------------------
26+
// Blocked Tensor Contract
27+
//------------------------------------------------------------------------------
28+
static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract,
29+
CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t,
30+
CeedTransposeMode t_mode, const CeedInt add, const double *restrict u,
31+
double *restrict v, const CeedInt JJ) {
32+
CeedInt t_stride_0 = B, t_stride_1 = 1;
33+
if (t_mode == CEED_TRANSPOSE) {
34+
t_stride_0 = 1; t_stride_1 = J;
35+
}
36+
37+
for (CeedInt a=0; a<A; a++)
38+
for (CeedInt b=0; b<B; b++)
39+
// Blocks of JJ rows
40+
for (CeedInt j=0; j<(J/JJ)*JJ; j+=JJ)
41+
for (CeedInt jj=0; jj<JJ; jj++) { // unroll
42+
// C vectorization by compiler
43+
int32_t c = 0;
44+
svbool_t pg = svwhilelt_b64(c, C);
45+
do {
46+
// Load u, v into vectors
47+
svfloat64_t u_vec = svld1(pg, &u[(a*B+b)*C+c]);
48+
svfloat64_t v_vec = svld1(pg, &v[(a*J+j+jj)*C+c]);
49+
// Basis matrix value
50+
double tq = t[(j+jj)*t_stride_0 + b*t_stride_1];
51+
// fmadd
52+
svst1(pg, &v[(a*J+j+jj)*C+c], svmla_x(pg, v_vec, u_vec, tq));
53+
// Loop update
54+
c += svcntd();
55+
pg = svwhilelt_b64(c, C);
56+
} while (svptest_any(svptrue_b64(), pg));
57+
}
58+
// Remainder of rows
59+
CeedInt j=(J/JJ)*JJ;
60+
if (j < J) {
61+
for (CeedInt a=0; a<A; a++)
62+
for (CeedInt b=0; b<B; b++)
63+
// Blocks of JJ rows
64+
for (CeedInt jj=0; jj<J-j; jj++) { // not unrolled
65+
// C vectorization by compiler
66+
int32_t c = 0;
67+
svbool_t pg = svwhilelt_b64(c, C);
68+
do {
69+
// Load u, v into vectors
70+
svfloat64_t u_vec = svld1(pg, &u[(a*B+b)*C+c]);
71+
svfloat64_t v_vec = svld1(pg, &v[(a*J+j+jj)*C+c]);
72+
// Basis matrix value
73+
double tq = t[(j+jj)*t_stride_0 + b*t_stride_1];
74+
// fmadd
75+
svst1(pg, &v[(a*J+j+jj)*C+c], svmla_x(pg, v_vec, u_vec, tq));
76+
// Loop update
77+
c += svcntd();
78+
pg = svwhilelt_b64(c, C);
79+
} while (svptest_any(svptrue_b64(), pg));
80+
}
81+
}
82+
return CEED_ERROR_SUCCESS;
83+
}
84+
85+
//------------------------------------------------------------------------------
86+
// Blocked Tensor Contract
87+
//------------------------------------------------------------------------------
88+
static inline int CeedTensorContract_Sve_Serial(CeedTensorContract contract,
89+
CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t,
90+
CeedTransposeMode t_mode, const CeedInt add, const double *restrict u,
91+
double *restrict v, const CeedInt JJ) {
92+
CeedInt t_stride_0 = B, t_stride_1 = 1;
93+
if (t_mode == CEED_TRANSPOSE) {
94+
t_stride_0 = 1; t_stride_1 = J;
95+
}
96+
97+
for (CeedInt a=0; a<A; a++)
98+
for (CeedInt b=0; b<B; b++)
99+
for (CeedInt j=0; j<(J/JJ)*JJ; j+=JJ)
100+
for (CeedInt jj=0; jj<JJ; jj++) // unroll
101+
v[a*J+(j+jj)] += t[(j+jj)*t_stride_0 + b*t_stride_1] * u[a*B+b];
102+
103+
CeedInt j=(J/JJ)*JJ;
104+
if (j < J)
105+
for (CeedInt a=0; a<A; a++)
106+
for (CeedInt b=0; b<B; b++)
107+
for (CeedInt jj=0; jj<J-j; jj++) // not unrolled
108+
v[a*J+(j+jj)] += t[(j+jj)*t_stride_0 + b*t_stride_1] * u[a*B+b];
109+
110+
return CEED_ERROR_SUCCESS;
111+
}
112+
113+
//------------------------------------------------------------------------------
114+
// Tensor Contract - Common Sizes
115+
//------------------------------------------------------------------------------
116+
static int CeedTensorContract_Sve_Blocked_8(CeedTensorContract contract,
117+
CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t,
118+
CeedTransposeMode t_mode, const CeedInt add, const double *restrict u,
119+
double *restrict v) {
120+
return CeedTensorContract_Sve_Blocked(contract, A, B, C, J, t, t_mode, add, u,
121+
v, 8);
122+
}
123+
static int CeedTensorContract_Sve_Serial_8(CeedTensorContract contract,
124+
CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t,
125+
CeedTransposeMode t_mode, const CeedInt add, const double *restrict u,
126+
double *restrict v) {
127+
return CeedTensorContract_Sve_Serial(contract, A, B, C, J, t, t_mode, add, u, v,
128+
8);
129+
}
130+
131+
//------------------------------------------------------------------------------
132+
// Tensor Contract Apply
133+
//------------------------------------------------------------------------------
134+
static int CeedTensorContractApply_Sve(CeedTensorContract contract, CeedInt A,
135+
CeedInt B, CeedInt C, CeedInt J,
136+
const double *restrict t,
137+
CeedTransposeMode t_mode,
138+
const CeedInt add,
139+
const double *restrict u,
140+
double *restrict v) {
141+
if (!add)
142+
for (CeedInt q=0; q<A*J*C; q++)
143+
v[q] = (double) 0.0;
144+
145+
if (C == 1)
146+
CeedTensorContract_Sve_Serial_8(contract, A, B, C, J, t, t_mode, true, u, v);
147+
else
148+
CeedTensorContract_Sve_Blocked_8(contract, A, B, C, J, t, t_mode, true, u, v);
149+
150+
return CEED_ERROR_SUCCESS;
151+
}
152+
153+
//------------------------------------------------------------------------------
154+
// Tensor Contract Destroy
155+
//------------------------------------------------------------------------------
156+
static int CeedTensorContractDestroy_Sve(CeedTensorContract contract) {
157+
return CEED_ERROR_SUCCESS;
158+
}
159+
160+
//------------------------------------------------------------------------------
161+
// Tensor Contract Create
162+
//------------------------------------------------------------------------------
163+
int CeedTensorContractCreate_f64_Sve(CeedBasis basis,
164+
CeedTensorContract contract) {
165+
int ierr;
166+
Ceed ceed;
167+
ierr = CeedTensorContractGetCeed(contract, &ceed); CeedChkBackend(ierr);
168+
169+
ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply",
170+
CeedTensorContractApply_Sve); CeedChkBackend(ierr);
171+
ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Destroy",
172+
CeedTensorContractDestroy_Sve); CeedChkBackend(ierr);
173+
174+
return CEED_ERROR_SUCCESS;
175+
}
176+
//------------------------------------------------------------------------------

backends/sve/ceed-sve.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#ifndef _ceed_sve_h
18+
#define _ceed_sve_h
19+
20+
#include <ceed/ceed.h>
21+
#include <ceed/backend.h>
22+
23+
CEED_INTERN int CeedTensorContractCreate_f32_Sve(CeedBasis basis,
24+
CeedTensorContract contract);
25+
CEED_INTERN int CeedTensorContractCreate_f64_Sve(CeedBasis basis,
26+
CeedTensorContract contract);
27+
28+
#endif // _ceed_sve_h

0 commit comments

Comments
 (0)