Skip to content

Commit b92d21b

Browse files
committed
Merge branch 'template-backend' [PR #63]
* template-backend: Add template backend, requested in issue #38 Split Ref into multiple files, update style
2 parents e339c64 + a718229 commit b92d21b

18 files changed

+841
-457
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ ceed.pc
1313
# Mac specific
1414
.DS_Store
1515
*.dSYM
16+
*~

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ examples := $(examples.c:examples/ceed/%.c=$(OBJDIR)/%)
104104
examples += $(examples.f:examples/ceed/%.f=$(OBJDIR)/%)
105105
# backends/[ref & occa & magma]
106106
ref.c := $(sort $(wildcard backends/ref/*.c))
107+
template.c:= $(sort $(wildcard backends/template/*.c))
107108
occa.c := $(sort $(wildcard backends/occa/*.c))
108109
magma_preprocessor := python backends/magma/gccm.py
109110
magma_pre_src := $(filter-out %_tmp.c, $(wildcard backends/magma/ceed-*.c))
@@ -149,6 +150,7 @@ all:;@$(MAKE) $(MFLAGS) V=$(V) this
149150
$(libceed) : LDFLAGS += $(if $(DARWIN), -install_name @rpath/$(notdir $(libceed)))
150151

151152
libceed.c += $(ref.c)
153+
libceed.c += $(template.c)
152154
ifneq ($(wildcard $(OCCA_DIR)/lib/libocca.*),)
153155
$(libceed) : LDFLAGS += -L$(OCCA_DIR)/lib -Wl,-rpath,$(abspath $(OCCA_DIR)/lib)
154156
$(libceed) : LDLIBS += -locca

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ be provided with the `-ceed` option, for example:
104104

105105
CEED resource (`-ceed`) | Backend
106106
----------------------- | ---------------------------------
107-
`/cpu/self` | Serial reference implementation
107+
`/cpu/self/ref` | Serial reference implementation
108+
`/cpu/self/tmpl` | Backend template, dispatches to reference
108109
`/cpu/occa` | Serial OCCA kernels
109110
`/gpu/occa` | CUDA OCCA kernels
110111
`/omp/occa` | OpenMP OCCA kernels

backends/ref/ceed-ref-basis.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed-impl.h>
18+
#include <string.h>
19+
#include "ceed-ref.h"
20+
21+
// Contracts on the middle index
22+
// NOTRANSPOSE: V_ajc = T_jb U_abc
23+
// TRANSPOSE: V_ajc = T_bj U_abc
24+
// If Add != 0, "=" is replaced by "+="
25+
static int CeedTensorContract_Ref(Ceed ceed,
26+
CeedInt A, CeedInt B, CeedInt C, CeedInt J,
27+
const CeedScalar *t, CeedTransposeMode tmode,
28+
const CeedInt Add,
29+
const CeedScalar *u, CeedScalar *v) {
30+
CeedInt tstride0 = B, tstride1 = 1;
31+
if (tmode == CEED_TRANSPOSE) {
32+
tstride0 = 1; tstride1 = J;
33+
}
34+
35+
for (CeedInt a=0; a<A; a++) {
36+
for (CeedInt j=0; j<J; j++) {
37+
if (!Add) {
38+
for (CeedInt c=0; c<C; c++)
39+
v[(a*J+j)*C+c] = 0;
40+
}
41+
for (CeedInt b=0; b<B; b++) {
42+
for (CeedInt c=0; c<C; c++) {
43+
v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
44+
}
45+
}
46+
}
47+
}
48+
return 0;
49+
}
50+
51+
static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode,
52+
CeedEvalMode emode,
53+
const CeedScalar *u, CeedScalar *v) {
54+
int ierr;
55+
const CeedInt dim = basis->dim;
56+
const CeedInt ndof = basis->ndof;
57+
const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim);
58+
const CeedInt add = (tmode == CEED_TRANSPOSE);
59+
60+
if (tmode == CEED_TRANSPOSE) {
61+
const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim);
62+
for (CeedInt i = 0; i < vsize; i++)
63+
v[i] = (CeedScalar) 0;
64+
}
65+
if (emode & CEED_EVAL_INTERP) {
66+
CeedInt P = basis->P1d, Q = basis->Q1d;
67+
if (tmode == CEED_TRANSPOSE) {
68+
P = basis->Q1d; Q = basis->P1d;
69+
}
70+
CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
71+
CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
72+
for (CeedInt d=0; d<dim; d++) {
73+
ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d,
74+
tmode, add&&(d==dim-1),
75+
d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
76+
CeedChk(ierr);
77+
pre /= P;
78+
post *= Q;
79+
}
80+
if (tmode == CEED_NOTRANSPOSE) {
81+
v += nqpt;
82+
} else {
83+
u += nqpt;
84+
}
85+
}
86+
if (emode & CEED_EVAL_GRAD) {
87+
CeedInt P = basis->P1d, Q = basis->Q1d;
88+
// In CEED_NOTRANSPOSE mode:
89+
// u is (P^dim x nc), column-major layout (nc = ndof)
90+
// v is (Q^dim x nc x dim), column-major layout (nc = ndof)
91+
// In CEED_TRANSPOSE mode, the sizes of u and v are switched.
92+
if (tmode == CEED_TRANSPOSE) {
93+
P = basis->Q1d, Q = basis->P1d;
94+
}
95+
CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)];
96+
for (CeedInt p = 0; p < dim; p++) {
97+
CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
98+
for (CeedInt d=0; d<dim; d++) {
99+
ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q,
100+
(p==d)?basis->grad1d:basis->interp1d,
101+
tmode, add&&(d==dim-1),
102+
d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
103+
CeedChk(ierr);
104+
pre /= P;
105+
post *= Q;
106+
}
107+
if (tmode == CEED_NOTRANSPOSE) {
108+
v += nqpt;
109+
} else {
110+
u += nqpt;
111+
}
112+
}
113+
}
114+
if (emode & CEED_EVAL_WEIGHT) {
115+
if (tmode == CEED_TRANSPOSE)
116+
return CeedError(basis->ceed, 1,
117+
"CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
118+
CeedInt Q = basis->Q1d;
119+
for (CeedInt d=0; d<dim; d++) {
120+
CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
121+
for (CeedInt i=0; i<pre; i++) {
122+
for (CeedInt j=0; j<Q; j++) {
123+
for (CeedInt k=0; k<post; k++) {
124+
v[(i*Q + j)*post + k] = basis->qweight1d[j]
125+
* (d == 0 ? 1 : v[(i*Q + j)*post + k]);
126+
}
127+
}
128+
}
129+
}
130+
}
131+
return 0;
132+
}
133+
134+
static int CeedBasisDestroy_Ref(CeedBasis basis) {
135+
return 0;
136+
}
137+
138+
int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d,
139+
CeedInt Q1d, const CeedScalar *interp1d,
140+
const CeedScalar *grad1d,
141+
const CeedScalar *qref1d,
142+
const CeedScalar *qweight1d,
143+
CeedBasis basis) {
144+
basis->Apply = CeedBasisApply_Ref;
145+
basis->Destroy = CeedBasisDestroy_Ref;
146+
return 0;
147+
}

backends/ref/ceed-ref-operator.c

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed-impl.h>
18+
#include <string.h>
19+
#include "ceed-ref.h"
20+
21+
static int CeedOperatorDestroy_Ref(CeedOperator op) {
22+
CeedOperator_Ref *impl = op->data;
23+
int ierr;
24+
25+
ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
26+
ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
27+
ierr = CeedFree(&op->data); CeedChk(ierr);
28+
return 0;
29+
}
30+
31+
static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata,
32+
CeedVector ustate,
33+
CeedVector residual, CeedRequest *request) {
34+
CeedOperator_Ref *impl = op->data;
35+
CeedVector etmp;
36+
CeedInt Q;
37+
const CeedInt nc = op->basis->ndof, dim = op->basis->dim;
38+
CeedScalar *Eu;
39+
char *qd;
40+
int ierr;
41+
CeedTransposeMode lmode = CEED_NOTRANSPOSE;
42+
43+
if (!impl->etmp) {
44+
ierr = CeedVectorCreate(op->ceed,
45+
nc * op->Erestrict->nelem * op->Erestrict->elemsize,
46+
&impl->etmp); CeedChk(ierr);
47+
// etmp is allocated when CeedVectorGetArray is called below
48+
}
49+
etmp = impl->etmp;
50+
if (op->qf->inmode & ~CEED_EVAL_WEIGHT) {
51+
ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE,
52+
nc, lmode, ustate, etmp,
53+
CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
54+
}
55+
ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
56+
ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
57+
ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd);
58+
CeedChk(ierr);
59+
for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
60+
CeedScalar BEu[Q*nc*(dim+2)], BEv[Q*nc*(dim+2)], *out[5] = {0,0,0,0,0};
61+
const CeedScalar *in[5] = {0,0,0,0,0};
62+
// TODO: quadrature weights can be computed just once
63+
ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
64+
&Eu[e*op->Erestrict->elemsize*nc], BEu);
65+
CeedChk(ierr);
66+
CeedScalar *u_ptr = BEu, *v_ptr = BEv;
67+
if (op->qf->inmode & CEED_EVAL_INTERP) { in[0] = u_ptr; u_ptr += Q*nc; }
68+
if (op->qf->inmode & CEED_EVAL_GRAD) { in[1] = u_ptr; u_ptr += Q*nc*dim; }
69+
if (op->qf->inmode & CEED_EVAL_WEIGHT) { in[4] = u_ptr; u_ptr += Q; }
70+
if (op->qf->outmode & CEED_EVAL_INTERP) { out[0] = v_ptr; v_ptr += Q*nc; }
71+
if (op->qf->outmode & CEED_EVAL_GRAD) { out[1] = v_ptr; v_ptr += Q*nc*dim; }
72+
ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
73+
CeedChk(ierr);
74+
ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
75+
&Eu[e*op->Erestrict->elemsize*nc]);
76+
CeedChk(ierr);
77+
}
78+
ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
79+
ierr = CeedVectorRestoreArray(qdata, (CeedScalar**)&qd); CeedChk(ierr);
80+
if (residual) {
81+
CeedScalar *res;
82+
ierr = CeedVectorGetArray(residual, CEED_MEM_HOST, &res); CeedChk(ierr);
83+
for (int i = 0; i < residual->length; i++)
84+
res[i] = (CeedScalar)0;
85+
ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE,
86+
nc, lmode, etmp, residual,
87+
CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
88+
ierr = CeedVectorRestoreArray(residual, &res); CeedChk(ierr);
89+
}
90+
if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
91+
*request = NULL;
92+
return 0;
93+
}
94+
95+
static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) {
96+
CeedOperator_Ref *impl = op->data;
97+
int ierr;
98+
99+
if (!impl->qdata) {
100+
CeedInt Q;
101+
ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
102+
ierr = CeedVectorCreate(op->ceed,
103+
op->Erestrict->nelem * Q
104+
* op->qf->qdatasize / sizeof(CeedScalar),
105+
&impl->qdata); CeedChk(ierr);
106+
}
107+
*qdata = impl->qdata;
108+
return 0;
109+
}
110+
111+
int CeedOperatorCreate_Ref(CeedOperator op) {
112+
CeedOperator_Ref *impl;
113+
int ierr;
114+
115+
ierr = CeedCalloc(1, &impl); CeedChk(ierr);
116+
op->data = impl;
117+
op->Destroy = CeedOperatorDestroy_Ref;
118+
op->Apply = CeedOperatorApply_Ref;
119+
op->GetQData = CeedOperatorGetQData_Ref;
120+
return 0;
121+
}

backends/ref/ceed-ref-qfunction.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2+
// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3+
// All Rights reserved. See files LICENSE and NOTICE for details.
4+
//
5+
// This file is part of CEED, a collection of benchmarks, miniapps, software
6+
// libraries and APIs for efficient high-order finite element and spectral
7+
// element discretizations for exascale applications. For more information and
8+
// source code availability see http://github.com/ceed.
9+
//
10+
// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11+
// a collaborative effort of two U.S. Department of Energy organizations (Office
12+
// of Science and the National Nuclear Security Administration) responsible for
13+
// the planning and preparation of a capable exascale ecosystem, including
14+
// software, applications, hardware, advanced system engineering and early
15+
// testbed platforms, in support of the nation's exascale computing imperative.
16+
17+
#include <ceed-impl.h>
18+
#include <string.h>
19+
#include "ceed-ref.h"
20+
21+
static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q,
22+
const CeedScalar *const *u,
23+
CeedScalar *const *v) {
24+
int ierr;
25+
ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
26+
return 0;
27+
}
28+
29+
static int CeedQFunctionDestroy_Ref(CeedQFunction qf) {
30+
return 0;
31+
}
32+
33+
int CeedQFunctionCreate_Ref(CeedQFunction qf) {
34+
qf->Apply = CeedQFunctionApply_Ref;
35+
qf->Destroy = CeedQFunctionDestroy_Ref;
36+
return 0;
37+
}

0 commit comments

Comments
 (0)