Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 18 additions & 40 deletions backends/cuda-ref/ceed-cuda-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,12 @@ static int CeedBasisApplyCore_Cuda(CeedBasis basis, bool apply_add, const CeedIn
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes, num_qpts;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
length = (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)num_qpts * (CeedSize)q_comp));
CeedCallCuda(ceed, cudaMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}
CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
CeedCallBackend(CeedBasisGetDimension(basis, &dim));
Expand Down Expand Up @@ -203,20 +195,12 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
length =
(CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp));
CeedCallCuda(ceed, cudaMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Basis action
Expand Down Expand Up @@ -287,18 +271,12 @@ static int CeedBasisApplyNonTensorCore_Cuda(CeedBasis basis, bool apply_add, con
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
length = (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)num_qpts * (CeedSize)q_comp));
CeedCallCuda(ceed, cudaMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
Expand Down
34 changes: 16 additions & 18 deletions backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ static int CeedBasisApplyTensorCore_Cuda_shared(CeedBasis basis, bool apply_add,
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
switch (eval_mode) {
Expand Down Expand Up @@ -297,20 +300,12 @@ static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_ad
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
length =
(CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp));
CeedCallCuda(ceed, cudaMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Basis action
Expand Down Expand Up @@ -436,8 +431,11 @@ static int CeedBasisApplyNonTensorCore_Cuda_shared(CeedBasis basis, bool apply_a
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
switch (eval_mode) {
Expand Down
56 changes: 19 additions & 37 deletions backends/hip-ref/ceed-hip-ref-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,14 @@ static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes, num_qpts;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
length = (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)num_qpts * (CeedSize)q_comp));
CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
CeedCallBackend(CeedBasisGetDimension(basis, &dim));

Expand Down Expand Up @@ -201,20 +194,12 @@ static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
length =
(CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp));
CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Basis action
Expand Down Expand Up @@ -285,15 +270,12 @@ static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, cons
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedSize length;

CeedCallBackend(CeedVectorGetLength(v, &length));
CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
Expand Down
34 changes: 16 additions & 18 deletions backends/hip-shared/ceed-hip-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,11 @@ static int CeedBasisApplyTensorCore_Hip_shared(CeedBasis basis, bool apply_add,
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
switch (eval_mode) {
Expand Down Expand Up @@ -365,20 +368,12 @@ static int CeedBasisApplyAtPointsCore_Hip_shared(CeedBasis basis, bool apply_add
CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));

// Clear v for transpose operation
if (is_transpose && !apply_add) {
CeedInt num_comp, q_comp, num_nodes;
CeedSize length;

CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
length =
(CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp));
CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar)));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
// Clear v for transpose operation
if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Basis action
Expand Down Expand Up @@ -502,8 +497,11 @@ static int CeedBasisApplyNonTensorCore_Hip_shared(CeedBasis basis, bool apply_ad
// Get read/write access to u, v
if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
if (apply_add) {
CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
} else {
CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
}

// Apply basis operation
switch (eval_mode) {
Expand Down
Loading