Skip to content

Commit 8c7519e

Browse files
committed
Merge branch 'origin/blocked-nontensor' [PR #160]
* origin/blocked-nontensor: Add non-tensor bases to /cpu/self/blocked (and thus tmpl)
2 parents 5ccd0c6 + 43547ca commit 8c7519e

File tree

1 file changed

+140
-92
lines changed

1 file changed

+140
-92
lines changed

backends/blocked/ceed-blocked-basis.c

Lines changed: 140 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ static int CeedBasisApply_Blocked(CeedBasis basis, CeedInt nelem,
5151
int ierr;
5252
const CeedInt dim = basis->dim;
5353
const CeedInt ncomp = basis->ncomp;
54-
const CeedInt nqpt = CeedIntPow(basis->Q1d, dim);
54+
CeedInt nqpt;
55+
ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
5556
const CeedInt add = (tmode == CEED_TRANSPOSE);
5657
const CeedInt blksize = 8;
5758

@@ -64,107 +65,152 @@ static int CeedBasisApply_Blocked(CeedBasis basis, CeedInt nelem,
6465
for (CeedInt i = 0; i < vsize; i++)
6566
v[i] = (CeedScalar) 0.0;
6667
}
67-
switch (emode) {
68-
case CEED_EVAL_INTERP: {
69-
CeedInt P = basis->P1d, Q = basis->Q1d;
70-
if (tmode == CEED_TRANSPOSE) {
71-
P = basis->Q1d; Q = basis->P1d;
72-
}
73-
CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
74-
CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
75-
for (CeedInt d=0; d<dim; d++) {
76-
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
77-
basis->interp1d, tmode, add&&(d==dim-1),
78-
d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
79-
CeedChk(ierr);
80-
pre /= P;
81-
post *= Q;
82-
}
83-
} break;
84-
case CEED_EVAL_GRAD: {
85-
// In CEED_NOTRANSPOSE mode:
86-
// u has shape [dim, ncomp, P^dim, nelem], row-major layout
87-
// v has shape [dim, ncomp, Q^dim, nelem], row-major layout
88-
// In CEED_TRANSPOSE mode, the sizes of u and v are switched.
89-
CeedInt P = basis->P1d, Q = basis->Q1d;
90-
if (tmode == CEED_TRANSPOSE) {
68+
if (basis->tensorbasis) {
69+
switch (emode) {
70+
case CEED_EVAL_INTERP: {
71+
CeedInt P = basis->P1d, Q = basis->Q1d;
72+
if (tmode == CEED_TRANSPOSE) {
73+
P = basis->Q1d; Q = basis->P1d;
74+
}
75+
CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
76+
CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
77+
for (CeedInt d=0; d<dim; d++) {
78+
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
79+
basis->interp1d, tmode, add&&(d==dim-1),
80+
d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]);
81+
CeedChk(ierr);
82+
pre /= P;
83+
post *= Q;
84+
}
85+
} break;
86+
case CEED_EVAL_GRAD: {
87+
// In CEED_NOTRANSPOSE mode:
88+
// u has shape [dim, ncomp, P^dim, nelem], row-major layout
89+
// v has shape [dim, ncomp, Q^dim, nelem], row-major layout
90+
// In CEED_TRANSPOSE mode, the sizes of u and v are switched.
91+
CeedInt P = basis->P1d, Q = basis->Q1d;
92+
if (tmode == CEED_TRANSPOSE) {
93+
P = basis->Q1d, Q = basis->Q1d;
94+
}
95+
CeedBasis_Blocked *impl = basis->data;
96+
CeedScalar interp[nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
97+
CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
98+
CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
99+
// Interpolate to quadrature points (NoTranspose)
100+
// or Grad to quadrature points (Transpose)
101+
for (CeedInt d=0; d<dim; d++) {
102+
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
103+
(tmode == CEED_NOTRANSPOSE
104+
? basis->interp1d
105+
: impl->colograd1d),
106+
tmode, add&&(d>0),
107+
(tmode == CEED_NOTRANSPOSE
108+
? (d==0?u:tmp[d%2])
109+
: u + d*nqpt*ncomp*nelem),
110+
(tmode == CEED_NOTRANSPOSE
111+
? (d==dim-1?interp:tmp[(d+1)%2])
112+
: interp));
113+
CeedChk(ierr);
114+
pre /= P;
115+
post *= Q;
116+
}
117+
// Grad to quadrature points (NoTranspose)
118+
// or Interpolate to dofs (Transpose)
91119
P = basis->Q1d, Q = basis->Q1d;
120+
if (tmode == CEED_TRANSPOSE) {
121+
P = basis->Q1d, Q = basis->P1d;
122+
}
123+
pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
124+
for (CeedInt d=0; d<dim; d++) {
125+
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
126+
(tmode == CEED_NOTRANSPOSE
127+
? impl->colograd1d
128+
: basis->interp1d),
129+
tmode, add&&(d==dim-1),
130+
(tmode == CEED_NOTRANSPOSE
131+
? interp
132+
: (d==0?interp:tmp[d%2])),
133+
(tmode == CEED_NOTRANSPOSE
134+
? v + d*nqpt*ncomp*nelem
135+
: (d==dim-1?v:tmp[(d+1)%2])));
136+
CeedChk(ierr);
137+
pre /= P;
138+
post *= Q;
139+
}
140+
} break;
141+
case CEED_EVAL_WEIGHT: {
142+
if (tmode == CEED_TRANSPOSE)
143+
return CeedError(basis->ceed, 1,
144+
"CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
145+
CeedInt Q = basis->Q1d;
146+
for (CeedInt d=0; d<dim; d++) {
147+
CeedInt pre = CeedIntPow(Q, dim-d-1), post = CeedIntPow(Q, d);
148+
for (CeedInt i=0; i<pre; i++)
149+
for (CeedInt j=0; j<Q; j++)
150+
for (CeedInt k=0; k<post; k++) {
151+
CeedScalar w = basis->qweight1d[j]
152+
* (d == 0 ? 1 : v[((i*Q + j)*post + k)*nelem]);
153+
for (CeedInt e=0; e<nelem; e++)
154+
v[((i*Q + j)*post + k)*nelem + e] = w;
155+
}
156+
}
157+
} break;
158+
case CEED_EVAL_DIV:
159+
return CeedError(basis->ceed, 1, "CEED_EVAL_DIV not supported");
160+
case CEED_EVAL_CURL:
161+
return CeedError(basis->ceed, 1, "CEED_EVAL_CURL not supported");
162+
case CEED_EVAL_NONE:
163+
return CeedError(basis->ceed, 1,
164+
"CEED_EVAL_NONE does not make sense in this context");
92165
}
93-
CeedBasis_Blocked *impl = basis->data;
94-
CeedScalar interp[nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
95-
CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
96-
CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)];
97-
// Interpolate to quadrature points (NoTranspose)
98-
// or Grad to quadrature points (Transpose)
99-
for (CeedInt d=0; d<dim; d++) {
100-
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
101-
(tmode == CEED_NOTRANSPOSE
102-
? basis->interp1d
103-
: impl->colograd1d),
104-
tmode, add&&(d>0),
105-
(tmode == CEED_NOTRANSPOSE
106-
? (d==0?u:tmp[d%2])
107-
: u + d*nqpt*ncomp*nelem),
108-
(tmode == CEED_NOTRANSPOSE
109-
? (d==dim-1?interp:tmp[(d+1)%2])
110-
: interp));
166+
} else {
167+
// Non-tensor basis
168+
switch (emode) {
169+
case CEED_EVAL_INTERP: {
170+
CeedInt P = basis->P, Q = basis->Q;
171+
if (tmode == CEED_TRANSPOSE) {
172+
P = basis->Q; Q = basis->P;
173+
}
174+
ierr = CeedTensorContract_Blocked(basis->ceed, ncomp, P, nelem, Q,
175+
basis->interp1d, tmode, add, u, v);
111176
CeedChk(ierr);
112-
pre /= P;
113-
post *= Q;
114-
}
115-
// Grad to quadrature points (NoTranspose)
116-
// or Interpolate to dofs (Transpose)
117-
P = basis->Q1d, Q = basis->Q1d;
118-
if (tmode == CEED_TRANSPOSE) {
119-
P = basis->Q1d, Q = basis->P1d;
120177
}
121-
pre = ncomp*CeedIntPow(P, dim-1), post = nelem;
122-
for (CeedInt d=0; d<dim; d++) {
123-
ierr = CeedTensorContract_Blocked(basis->ceed, pre, P, post, Q,
124-
(tmode == CEED_NOTRANSPOSE
125-
? impl->colograd1d
126-
: basis->interp1d),
127-
tmode, add&&(d==dim-1),
128-
(tmode == CEED_NOTRANSPOSE
129-
? interp
130-
: (d==0?interp:tmp[d%2])),
131-
(tmode == CEED_NOTRANSPOSE
132-
? v + d*nqpt*ncomp*nelem
133-
: (d==dim-1?v:tmp[(d+1)%2])));
178+
break;
179+
case CEED_EVAL_GRAD: {
180+
CeedInt P = basis->P, Q = dim*basis->Q;
181+
if (tmode == CEED_TRANSPOSE) {
182+
P = dim*basis->Q; Q = basis->P;
183+
}
184+
ierr = CeedTensorContract_Blocked(basis->ceed, ncomp, P, nelem, Q,
185+
basis->grad1d, tmode, add, u, v);
134186
CeedChk(ierr);
135-
pre /= P;
136-
post *= Q;
137187
}
138-
} break;
139-
case CEED_EVAL_WEIGHT: {
140-
if (tmode == CEED_TRANSPOSE)
188+
break;
189+
case CEED_EVAL_WEIGHT: {
190+
if (tmode == CEED_TRANSPOSE)
191+
return CeedError(basis->ceed, 1,
192+
"CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
193+
for (CeedInt i=0; i<nqpt; i++)
194+
for (CeedInt e=0; e<nelem; e++)
195+
v[i*nelem + e] = basis->qweight1d[i];
196+
} break;
197+
case CEED_EVAL_DIV:
198+
return CeedError(basis->ceed, 1, "CEED_EVAL_DIV not supported");
199+
case CEED_EVAL_CURL:
200+
return CeedError(basis->ceed, 1, "CEED_EVAL_CURL not supported");
201+
case CEED_EVAL_NONE:
141202
return CeedError(basis->ceed, 1,
142-
"CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
143-
CeedInt Q = basis->Q1d;
144-
for (CeedInt d=0; d<dim; d++) {
145-
CeedInt pre = CeedIntPow(Q, dim-d-1), post = CeedIntPow(Q, d);
146-
for (CeedInt i=0; i<pre; i++)
147-
for (CeedInt j=0; j<Q; j++)
148-
for (CeedInt k=0; k<post; k++) {
149-
CeedScalar w = basis->qweight1d[j]
150-
* (d == 0 ? 1 : v[((i*Q + j)*post + k)*nelem]);
151-
for (CeedInt e=0; e<nelem; e++)
152-
v[((i*Q + j)*post + k)*nelem + e] = w;
153-
}
203+
"CEED_EVAL_NONE does not make sense in this context");
154204
}
155-
} break;
156-
case CEED_EVAL_DIV:
157-
return CeedError(basis->ceed, 1, "CEED_EVAL_DIV not supported");
158-
case CEED_EVAL_CURL:
159-
return CeedError(basis->ceed, 1, "CEED_EVAL_CURL not supported");
160-
case CEED_EVAL_NONE:
161-
return CeedError(basis->ceed, 1,
162-
"CEED_EVAL_NONE does not make sense in this context");
163205
}
164206
return 0;
165207
}
166208

167-
static int CeedBasisDestroy_Blocked(CeedBasis basis) {
209+
static int CeedBasisDestroyNonTensor_Blocked(CeedBasis basis) {
210+
return 0;
211+
}
212+
213+
static int CeedBasisDestroyTensor_Blocked(CeedBasis basis) {
168214
CeedBasis_Blocked *impl = basis->data;
169215
int ierr;
170216

@@ -188,7 +234,7 @@ int CeedBasisCreateTensorH1_Blocked(CeedInt dim, CeedInt P1d,
188234
basis->data = impl;
189235

190236
basis->Apply = CeedBasisApply_Blocked;
191-
basis->Destroy = CeedBasisDestroy_Blocked;
237+
basis->Destroy = CeedBasisDestroyTensor_Blocked;
192238
return 0;
193239
}
194240

@@ -201,5 +247,7 @@ int CeedBasisCreateH1_Blocked(CeedElemTopology topo, CeedInt dim,
201247
const CeedScalar *qref,
202248
const CeedScalar *qweight,
203249
CeedBasis basis) {
204-
return CeedError(basis->ceed, 1, "Backend does not implement non-tensor bases");
250+
basis->Apply = CeedBasisApply_Blocked;
251+
basis->Destroy = CeedBasisDestroyNonTensor_Blocked;
252+
return 0;
205253
}

0 commit comments

Comments
 (0)