Skip to content

Commit 02219a0

Browse files
committed
hip - collocated nodes/qpts for shared
1 parent 2129291 commit 02219a0

File tree

3 files changed

+273
-6
lines changed

3 files changed

+273
-6
lines changed

backends/hip-shared/ceed-hip-shared-basis.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -692,19 +692,22 @@ int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
692692
CeedCallBackend(ComputeBasisThreadBlockSizes(dim, P_1d, Q_1d, num_comp, data->block_sizes));
693693

694694
// Compile basis kernels
695+
bool is_collocated = false;
695696
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-shared-basis-tensor.h>\n";
696697

697698
CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 11, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_T_1D",
698699
CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim),
699700
"BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_INTERP_BLOCK_SIZE", data->block_sizes[0], "BASIS_GRAD_BLOCK_SIZE",
700701
data->block_sizes[1], "BASIS_WEIGHT_BLOCK_SIZE", data->block_sizes[2], "BASIS_HAS_COLLOCATED_GRAD",
701702
has_collocated_grad));
702-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
703-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
704-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTransposeAdd", &data->InterpTransposeAdd));
705-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad));
706-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTranspose", &data->GradTranspose));
707-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "GradTransposeAdd", &data->GradTransposeAdd));
703+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
704+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, is_collocated ? "InterpCollocated" : "Interp", &data->Interp));
705+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, is_collocated ? "InterpCollocatedTranspose" : "InterpTranspose", &data->InterpTranspose));
706+
CeedCallBackend(
707+
CeedGetKernel_Hip(ceed, data->module, is_collocated ? "InterpCollocatedTransposeAdd" : "InterpTransposeAdd", &data->InterpTransposeAdd));
708+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, is_collocated ? "GradCollocated" : "Grad", &data->Grad));
709+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, is_collocated ? "GradCollocatedTranspose" : "GradTranspose", &data->GradTranspose));
710+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, is_collocated ? "GradCollocatedTransposeAdd" : "GradTransposeAdd", &data->GradTransposeAdd));
708711
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
709712

710713
CeedCallBackend(CeedBasisSetData(basis, data));

include/ceed/jit-source/hip/hip-shared-basis-tensor-templates.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,30 @@ inline __device__ void GradTransposeTensor2d(SharedData_Hip &data, const CeedSca
234234
}
235235
}
236236

237+
//------------------------------------------------------------------------------
238+
// 2D derivatives at quadrature points, nodes and quadrature points collocated
239+
//------------------------------------------------------------------------------
240+
template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
241+
inline __device__ void GradTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
242+
CeedScalar *__restrict__ r_V) {
243+
for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
244+
ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 0 * NUM_COMP]);
245+
ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 1 * NUM_COMP]);
246+
}
247+
}
248+
249+
//------------------------------------------------------------------------------
250+
// 2D derivatives transpose, nodes and quadrature points collocated
251+
//------------------------------------------------------------------------------
252+
template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
253+
inline __device__ void GradTransposeTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
254+
CeedScalar *__restrict__ r_V) {
255+
for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
256+
ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, &r_V[comp]);
257+
ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_G, &r_V[comp]);
258+
}
259+
}
260+
237261
//------------------------------------------------------------------------------
238262
// 2D quadrature weights
239263
//------------------------------------------------------------------------------
@@ -519,6 +543,32 @@ inline __device__ void GradTransposeTensorCollocated3d(SharedData_Hip &data, con
519543
}
520544
}
521545

546+
//------------------------------------------------------------------------------
547+
// 3D derivatives at quadrature points, nodes and quadrature points collocated
548+
//------------------------------------------------------------------------------
549+
template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
550+
inline __device__ void GradTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
551+
CeedScalar *__restrict__ r_V) {
552+
for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
553+
ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]);
554+
ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]);
555+
ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]);
556+
}
557+
}
558+
559+
//------------------------------------------------------------------------------
560+
// 3D derivatives transpose, nodes and quadrature points collocated
561+
//------------------------------------------------------------------------------
562+
template <int NUM_COMP, int P_1D, int Q_1D, int T_1D>
563+
inline __device__ void GradTransposeTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
564+
CeedScalar *__restrict__ r_V) {
565+
for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
566+
ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
567+
ContractTransposeAddY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
568+
ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]);
569+
}
570+
}
571+
522572
//------------------------------------------------------------------------------
523573
// 3D quadrature weights
524574
//------------------------------------------------------------------------------

0 commit comments

Comments
 (0)