@@ -51,7 +51,8 @@ static int CeedBasisApply_Blocked(CeedBasis basis, CeedInt nelem,
5151 int ierr ;
5252 const CeedInt dim = basis -> dim ;
5353 const CeedInt ncomp = basis -> ncomp ;
54- const CeedInt nqpt = CeedIntPow (basis -> Q1d , dim );
54+ CeedInt nqpt ;
55+ ierr = CeedBasisGetNumQuadraturePoints (basis , & nqpt ); CeedChk (ierr );
5556 const CeedInt add = (tmode == CEED_TRANSPOSE );
5657 const CeedInt blksize = 8 ;
5758
@@ -64,107 +65,152 @@ static int CeedBasisApply_Blocked(CeedBasis basis, CeedInt nelem,
6465 for (CeedInt i = 0 ; i < vsize ; i ++ )
6566 v [i ] = (CeedScalar ) 0.0 ;
6667 }
67- switch (emode ) {
68- case CEED_EVAL_INTERP : {
69- CeedInt P = basis -> P1d , Q = basis -> Q1d ;
70- if (tmode == CEED_TRANSPOSE ) {
71- P = basis -> Q1d ; Q = basis -> P1d ;
72- }
73- CeedInt pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
74- CeedScalar tmp [2 ][nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
75- for (CeedInt d = 0 ; d < dim ; d ++ ) {
76- ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
77- basis -> interp1d , tmode , add && (d == dim - 1 ),
78- d == 0 ?u :tmp [d %2 ], d == dim - 1 ?v :tmp [(d + 1 )%2 ]);
79- CeedChk (ierr );
80- pre /= P ;
81- post *= Q ;
82- }
83- } break ;
84- case CEED_EVAL_GRAD : {
85- // In CEED_NOTRANSPOSE mode:
86- // u has shape [dim, ncomp, P^dim, nelem], row-major layout
87- // v has shape [dim, ncomp, Q^dim, nelem], row-major layout
88- // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
89- CeedInt P = basis -> P1d , Q = basis -> Q1d ;
90- if (tmode == CEED_TRANSPOSE ) {
68+ if (basis -> tensorbasis ) {
69+ switch (emode ) {
70+ case CEED_EVAL_INTERP : {
71+ CeedInt P = basis -> P1d , Q = basis -> Q1d ;
72+ if (tmode == CEED_TRANSPOSE ) {
73+ P = basis -> Q1d ; Q = basis -> P1d ;
74+ }
75+ CeedInt pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
76+ CeedScalar tmp [2 ][nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
77+ for (CeedInt d = 0 ; d < dim ; d ++ ) {
78+ ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
79+ basis -> interp1d , tmode , add && (d == dim - 1 ),
80+ d == 0 ?u :tmp [d %2 ], d == dim - 1 ?v :tmp [(d + 1 )%2 ]);
81+ CeedChk (ierr );
82+ pre /= P ;
83+ post *= Q ;
84+ }
85+ } break ;
86+ case CEED_EVAL_GRAD : {
87+ // In CEED_NOTRANSPOSE mode:
88+ // u has shape [dim, ncomp, P^dim, nelem], row-major layout
89+ // v has shape [dim, ncomp, Q^dim, nelem], row-major layout
90+ // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
91+ CeedInt P = basis -> P1d , Q = basis -> Q1d ;
92+ if (tmode == CEED_TRANSPOSE ) {
93+ P = basis -> Q1d , Q = basis -> Q1d ;
94+ }
95+ CeedBasis_Blocked * impl = basis -> data ;
96+ CeedScalar interp [nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
97+ CeedInt pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
98+ CeedScalar tmp [2 ][nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
99+ // Interpolate to quadrature points (NoTranspose)
100+ // or Grad to quadrature points (Transpose)
101+ for (CeedInt d = 0 ; d < dim ; d ++ ) {
102+ ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
103+ (tmode == CEED_NOTRANSPOSE
104+ ? basis -> interp1d
105+ : impl -> colograd1d ),
106+ tmode , add && (d > 0 ),
107+ (tmode == CEED_NOTRANSPOSE
108+ ? (d == 0 ?u :tmp [d %2 ])
109+ : u + d * nqpt * ncomp * nelem ),
110+ (tmode == CEED_NOTRANSPOSE
111+ ? (d == dim - 1 ?interp :tmp [(d + 1 )%2 ])
112+ : interp ));
113+ CeedChk (ierr );
114+ pre /= P ;
115+ post *= Q ;
116+ }
117+ // Grad to quadrature points (NoTranspose)
118+ // or Interpolate to dofs (Transpose)
91119 P = basis -> Q1d , Q = basis -> Q1d ;
120+ if (tmode == CEED_TRANSPOSE ) {
121+ P = basis -> Q1d , Q = basis -> P1d ;
122+ }
123+ pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
124+ for (CeedInt d = 0 ; d < dim ; d ++ ) {
125+ ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
126+ (tmode == CEED_NOTRANSPOSE
127+ ? impl -> colograd1d
128+ : basis -> interp1d ),
129+ tmode , add && (d == dim - 1 ),
130+ (tmode == CEED_NOTRANSPOSE
131+ ? interp
132+ : (d == 0 ?interp :tmp [d %2 ])),
133+ (tmode == CEED_NOTRANSPOSE
134+ ? v + d * nqpt * ncomp * nelem
135+ : (d == dim - 1 ?v :tmp [(d + 1 )%2 ])));
136+ CeedChk (ierr );
137+ pre /= P ;
138+ post *= Q ;
139+ }
140+ } break ;
141+ case CEED_EVAL_WEIGHT : {
142+ if (tmode == CEED_TRANSPOSE )
143+ return CeedError (basis -> ceed , 1 ,
144+ "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE" );
145+ CeedInt Q = basis -> Q1d ;
146+ for (CeedInt d = 0 ; d < dim ; d ++ ) {
147+ CeedInt pre = CeedIntPow (Q , dim - d - 1 ), post = CeedIntPow (Q , d );
148+ for (CeedInt i = 0 ; i < pre ; i ++ )
149+ for (CeedInt j = 0 ; j < Q ; j ++ )
150+ for (CeedInt k = 0 ; k < post ; k ++ ) {
151+ CeedScalar w = basis -> qweight1d [j ]
152+ * (d == 0 ? 1 : v [((i * Q + j )* post + k )* nelem ]);
153+ for (CeedInt e = 0 ; e < nelem ; e ++ )
154+ v [((i * Q + j )* post + k )* nelem + e ] = w ;
155+ }
156+ }
157+ } break ;
158+ case CEED_EVAL_DIV :
159+ return CeedError (basis -> ceed , 1 , "CEED_EVAL_DIV not supported" );
160+ case CEED_EVAL_CURL :
161+ return CeedError (basis -> ceed , 1 , "CEED_EVAL_CURL not supported" );
162+ case CEED_EVAL_NONE :
163+ return CeedError (basis -> ceed , 1 ,
164+ "CEED_EVAL_NONE does not make sense in this context" );
92165 }
93- CeedBasis_Blocked * impl = basis -> data ;
94- CeedScalar interp [nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
95- CeedInt pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
96- CeedScalar tmp [2 ][nelem * ncomp * Q * CeedIntPow (P > Q ?P :Q , dim - 1 )];
97- // Interpolate to quadrature points (NoTranspose)
98- // or Grad to quadrature points (Transpose)
99- for (CeedInt d = 0 ; d < dim ; d ++ ) {
100- ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
101- (tmode == CEED_NOTRANSPOSE
102- ? basis -> interp1d
103- : impl -> colograd1d ),
104- tmode , add && (d > 0 ),
105- (tmode == CEED_NOTRANSPOSE
106- ? (d == 0 ?u :tmp [d %2 ])
107- : u + d * nqpt * ncomp * nelem ),
108- (tmode == CEED_NOTRANSPOSE
109- ? (d == dim - 1 ?interp :tmp [(d + 1 )%2 ])
110- : interp ));
166+ } else {
167+ // Non-tensor basis
168+ switch (emode ) {
169+ case CEED_EVAL_INTERP : {
170+ CeedInt P = basis -> P , Q = basis -> Q ;
171+ if (tmode == CEED_TRANSPOSE ) {
172+ P = basis -> Q ; Q = basis -> P ;
173+ }
174+ ierr = CeedTensorContract_Blocked (basis -> ceed , ncomp , P , nelem , Q ,
175+ basis -> interp1d , tmode , add , u , v );
111176 CeedChk (ierr );
112- pre /= P ;
113- post *= Q ;
114- }
115- // Grad to quadrature points (NoTranspose)
116- // or Interpolate to dofs (Transpose)
117- P = basis -> Q1d , Q = basis -> Q1d ;
118- if (tmode == CEED_TRANSPOSE ) {
119- P = basis -> Q1d , Q = basis -> P1d ;
120177 }
121- pre = ncomp * CeedIntPow (P , dim - 1 ), post = nelem ;
122- for (CeedInt d = 0 ; d < dim ; d ++ ) {
123- ierr = CeedTensorContract_Blocked (basis -> ceed , pre , P , post , Q ,
124- (tmode == CEED_NOTRANSPOSE
125- ? impl -> colograd1d
126- : basis -> interp1d ),
127- tmode , add && (d == dim - 1 ),
128- (tmode == CEED_NOTRANSPOSE
129- ? interp
130- : (d == 0 ?interp :tmp [d %2 ])),
131- (tmode == CEED_NOTRANSPOSE
132- ? v + d * nqpt * ncomp * nelem
133- : (d == dim - 1 ?v :tmp [(d + 1 )%2 ])));
178+ break ;
179+ case CEED_EVAL_GRAD : {
180+ CeedInt P = basis -> P , Q = dim * basis -> Q ;
181+ if (tmode == CEED_TRANSPOSE ) {
182+ P = dim * basis -> Q ; Q = basis -> P ;
183+ }
184+ ierr = CeedTensorContract_Blocked (basis -> ceed , ncomp , P , nelem , Q ,
185+ basis -> grad1d , tmode , add , u , v );
134186 CeedChk (ierr );
135- pre /= P ;
136- post *= Q ;
137187 }
138- } break ;
139- case CEED_EVAL_WEIGHT : {
140- if (tmode == CEED_TRANSPOSE )
188+ break ;
189+ case CEED_EVAL_WEIGHT : {
190+ if (tmode == CEED_TRANSPOSE )
191+ return CeedError (basis -> ceed , 1 ,
192+ "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE" );
193+ for (CeedInt i = 0 ; i < nqpt ; i ++ )
194+ for (CeedInt e = 0 ; e < nelem ; e ++ )
195+ v [i * nelem + e ] = basis -> qweight1d [i ];
196+ } break ;
197+ case CEED_EVAL_DIV :
198+ return CeedError (basis -> ceed , 1 , "CEED_EVAL_DIV not supported" );
199+ case CEED_EVAL_CURL :
200+ return CeedError (basis -> ceed , 1 , "CEED_EVAL_CURL not supported" );
201+ case CEED_EVAL_NONE :
141202 return CeedError (basis -> ceed , 1 ,
142- "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE" );
143- CeedInt Q = basis -> Q1d ;
144- for (CeedInt d = 0 ; d < dim ; d ++ ) {
145- CeedInt pre = CeedIntPow (Q , dim - d - 1 ), post = CeedIntPow (Q , d );
146- for (CeedInt i = 0 ; i < pre ; i ++ )
147- for (CeedInt j = 0 ; j < Q ; j ++ )
148- for (CeedInt k = 0 ; k < post ; k ++ ) {
149- CeedScalar w = basis -> qweight1d [j ]
150- * (d == 0 ? 1 : v [((i * Q + j )* post + k )* nelem ]);
151- for (CeedInt e = 0 ; e < nelem ; e ++ )
152- v [((i * Q + j )* post + k )* nelem + e ] = w ;
153- }
203+ "CEED_EVAL_NONE does not make sense in this context" );
154204 }
155- } break ;
156- case CEED_EVAL_DIV :
157- return CeedError (basis -> ceed , 1 , "CEED_EVAL_DIV not supported" );
158- case CEED_EVAL_CURL :
159- return CeedError (basis -> ceed , 1 , "CEED_EVAL_CURL not supported" );
160- case CEED_EVAL_NONE :
161- return CeedError (basis -> ceed , 1 ,
162- "CEED_EVAL_NONE does not make sense in this context" );
163205 }
164206 return 0 ;
165207}
166208
167- static int CeedBasisDestroy_Blocked (CeedBasis basis ) {
209+ static int CeedBasisDestroyNonTensor_Blocked (CeedBasis basis ) {
210+ return 0 ;
211+ }
212+
213+ static int CeedBasisDestroyTensor_Blocked (CeedBasis basis ) {
168214 CeedBasis_Blocked * impl = basis -> data ;
169215 int ierr ;
170216
@@ -188,7 +234,7 @@ int CeedBasisCreateTensorH1_Blocked(CeedInt dim, CeedInt P1d,
188234 basis -> data = impl ;
189235
190236 basis -> Apply = CeedBasisApply_Blocked ;
191- basis -> Destroy = CeedBasisDestroy_Blocked ;
237+ basis -> Destroy = CeedBasisDestroyTensor_Blocked ;
192238 return 0 ;
193239}
194240
@@ -201,5 +247,7 @@ int CeedBasisCreateH1_Blocked(CeedElemTopology topo, CeedInt dim,
201247 const CeedScalar * qref ,
202248 const CeedScalar * qweight ,
203249 CeedBasis basis ) {
204- return CeedError (basis -> ceed , 1 , "Backend does not implement non-tensor bases" );
250+ basis -> Apply = CeedBasisApply_Blocked ;
251+ basis -> Destroy = CeedBasisDestroyNonTensor_Blocked ;
252+ return 0 ;
205253}
0 commit comments