@@ -234,6 +234,30 @@ inline __device__ void GradTransposeTensor2d(SharedData_Hip &data, const CeedSca
234234 }
235235}
236236
237+ // ------------------------------------------------------------------------------
238+ // 2D derivatives at quadrature points, nodes and quadrature points collocated
239+ // ------------------------------------------------------------------------------
240+ template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
241+ inline __device__ void GradTensorCollocatedNodes2d (SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
242+ CeedScalar *__restrict__ r_V) {
243+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
244+ ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 0 * NUM_COMP]);
245+ ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 1 * NUM_COMP]);
246+ }
247+ }
248+
249+ // ------------------------------------------------------------------------------
250+ // 2D derivatives transpose, nodes and quadrature points collocated
251+ // ------------------------------------------------------------------------------
252+ template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
253+ inline __device__ void GradTransposeTensorCollocatedNodes2d (SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
254+ CeedScalar *__restrict__ r_V) {
255+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
256+ ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, &r_V[comp]);
257+ ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_G, &r_V[comp]);
258+ }
259+ }
260+
237261// ------------------------------------------------------------------------------
238262// 2D quadrature weights
239263// ------------------------------------------------------------------------------
@@ -519,6 +543,32 @@ inline __device__ void GradTransposeTensorCollocated3d(SharedData_Hip &data, con
519543 }
520544}
521545
546+ // ------------------------------------------------------------------------------
547+ // 3D derivatives at quadrature points, nodes and quadrature points collocated
548+ // ------------------------------------------------------------------------------
549+ template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
550+ inline __device__ void GradTensorCollocatedNodes3d (SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
551+ CeedScalar *__restrict__ r_V) {
552+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
553+ ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]);
554+ ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]);
555+ ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]);
556+ }
557+ }
558+
559+ // ------------------------------------------------------------------------------
560+ // 3D derivatives transpose, nodes and quadrature points collocated
561+ // ------------------------------------------------------------------------------
562+ template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
563+ inline __device__ void GradTransposeTensorCollocatedNodes3d (SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
564+ CeedScalar *__restrict__ r_V) {
565+ for (CeedInt comp = 0 ; comp < NUM_COMP; comp++) {
566+ ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
567+ ContractTransposeAddY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
568+ ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
569+ }
570+ }
571+
522572// ------------------------------------------------------------------------------
523573// 3D quadrature weights
524574// ------------------------------------------------------------------------------
0 commit comments