Skip to content

Commit c48f2a8

Browse files
authored
Merge pull request #1773 from CEED/jeremy/basis-t1d
Make BASIS_T_1D explicit template parameter
2 parents 7577ddf + 6b92dc4 commit c48f2a8

16 files changed

+492
-492
lines changed

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -469,42 +469,42 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
469469
std::string function_name = (dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d";
470470

471471
code << " CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q_name : "1") << "];\n";
472-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
473-
<< var_suffix << ", r_c" << var_suffix << ");\n";
472+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
473+
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
474474
} else {
475475
std::string function_name = is_tensor ? ((dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d") : "InterpNonTensor";
476476

477477
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_tensor && (dim >= 3) ? Q_name : "1") << "];\n";
478-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
479-
<< var_suffix << ", r_q" << var_suffix << ");\n";
478+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
479+
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
480480
}
481481
break;
482482
case CEED_EVAL_GRAD:
483483
if (is_at_points) {
484484
std::string function_name = (dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d";
485485

486486
code << " CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q_name : "1") << "];\n";
487-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
488-
<< var_suffix << ", r_c" << var_suffix << ");\n";
487+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
488+
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
489489
} else if (use_3d_slices) {
490490
std::string function_name = (dim > 1 ? "InterpTensor" : "Interp") + std::to_string(dim) + "d";
491491

492492
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];\n";
493-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
494-
<< var_suffix << ", r_q" << var_suffix << ");\n";
493+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
494+
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
495495
} else if (is_tensor) {
496496
bool is_collocated = dim == 3 && Q_1d >= P_1d;
497497
std::string function_name = (dim == 1 ? "Grad" : (is_collocated ? "GradTensorCollocated" : "GradTensor")) + std::to_string(dim) + "d";
498498

499499
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim*" << (dim >= 3 ? Q_name : "1") << "];\n";
500-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix << ", s_B"
501-
<< var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
500+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
501+
<< ", s_B" << var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
502502
} else {
503503
std::string function_name = "GradNonTensor";
504504

505505
code << " CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim];\n";
506-
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ">(data, r_e" << var_suffix
507-
<< ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
506+
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e"
507+
<< var_suffix << ", s_G" << var_suffix << ", r_q" << var_suffix << ");\n";
508508
}
509509
break;
510510
case CEED_EVAL_WEIGHT: {
@@ -537,40 +537,40 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
537537
if (is_at_points) {
538538
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
539539

540-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_c" << var_suffix << ", s_B"
541-
<< var_suffix << ", r_e" << var_suffix << ");\n";
540+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
541+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
542542
} else {
543543
std::string function_name =
544544
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d") : "InterpTransposeNonTensor";
545545

546-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
547-
<< var_suffix << ", r_e" << var_suffix << ");\n";
546+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
547+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
548548
}
549549
break;
550550
case CEED_EVAL_GRAD:
551551
code << " CeedScalar *r_e" << var_suffix << " = r_e_scratch;\n";
552552
if (is_at_points) {
553553
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
554554

555-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_c" << var_suffix << ", s_B"
556-
<< var_suffix << ", r_e" << var_suffix << ");\n";
555+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
556+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
557557
} else if (use_3d_slices) {
558558
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
559559

560-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
561-
<< var_suffix << ", r_e" << var_suffix << ");\n";
560+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
561+
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
562562
} else if (is_tensor) {
563563
bool is_collocated = dim == 3 && Q_1d >= P_1d;
564564
std::string function_name =
565565
(dim == 1 ? "GradTranspose" : (is_collocated ? "GradTransposeTensorCollocated" : "GradTransposeTensor")) + std::to_string(dim) + "d";
566566

567-
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix << ", s_B"
568-
<< var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
567+
code << " " << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
568+
<< ", s_B" << var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
569569
} else {
570570
std::string function_name = "GradTransposeNonTensor";
571571

572-
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ">(data, r_q" << var_suffix
573-
<< ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
572+
code << " " << function_name << "<num_comp" << var_suffix << ", dim, " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q"
573+
<< var_suffix << ", s_G" << var_suffix << ", r_e" << var_suffix << ");\n";
574574
}
575575
break;
576576
// LCOV_EXCL_START
@@ -794,8 +794,8 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C
794794
break;
795795
case CEED_EVAL_GRAD:
796796
code << " CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim];\n";
797-
code << " GradColloSlice3d<num_comp" << var_suffix << ", " << Q_name << ">(data, q, r_q" << var_suffix << ", s_G" << var_suffix
798-
<< ", r_s" << var_suffix << ");\n";
797+
code << " GradColloSlice3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>(data, q, r_q" << var_suffix << ", s_G"
798+
<< var_suffix << ", r_s" << var_suffix << ");\n";
799799
break;
800800
case CEED_EVAL_WEIGHT:
801801
code << " CeedScalar r_s" << var_suffix << "[1];\n";
@@ -963,7 +963,7 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C
963963
code << " }\n";
964964
break;
965965
case CEED_EVAL_GRAD:
966-
code << " GradColloSliceTranspose3d<num_comp" << var_suffix << ", " << Q_name << ">(data, q, r_s" << var_suffix << ", s_G"
966+
code << " GradColloSliceTranspose3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>(data, q, r_s" << var_suffix << ", s_G"
967967
<< var_suffix << ", r_q" << var_suffix << ");\n";
968968
break;
969969
// LCOV_EXCL_START
@@ -1203,7 +1203,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
12031203
code << " data.t_id_y = threadIdx.y;\n";
12041204
code << " data.t_id_z = threadIdx.z;\n";
12051205
code << " data.t_id = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*blockDim.y*blockDim.x;\n";
1206-
code << " data.slice = slice + data.t_id_z*T_1D" << ((!is_tensor || dim == 1) ? "" : "*T_1D") << ";\n";
1206+
code << " data.slice = slice + data.t_id_z*OP_T_1D" << ((!is_tensor || dim == 1) ? "" : "*OP_T_1D") << ";\n";
12071207

12081208
// -- Determine input mat reuse
12091209
FieldReuse_Cuda input_matrix_reuse[CEED_FIELD_MAX];
@@ -1441,7 +1441,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
14411441
{
14421442
bool is_compile_good = false;
14431443

1444-
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), &is_compile_good, &data->module, 1, "T_1D", CeedIntMax(Q_1d, data->max_P_1d)));
1444+
CeedCallBackend(CeedTryCompile_Cuda(ceed, code.str().c_str(), &is_compile_good, &data->module, 1, "OP_T_1D", CeedIntMax(Q_1d, data->max_P_1d)));
14451445
if (is_compile_good) {
14461446
*is_good_build = true;
14471447
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, operator_name.c_str(), &data->op));

backends/cuda-shared/ceed-cuda-shared-basis.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ static int CeedBasisApplyAtPointsCore_Cuda_shared(CeedBasis basis, bool apply_ad
289289

290290
if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints));
291291
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
292-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 8, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D",
292+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 8, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_T_1D",
293293
CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim),
294294
"BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", max_num_points));
295295
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
@@ -630,7 +630,7 @@ int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d,
630630
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/cuda/cuda-shared-basis-tensor.h>\n";
631631

632632
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
633-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 8, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "T_1D",
633+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 8, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_T_1D",
634634
CeedIntMax(Q_1d, P_1d), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_NUM_NODES", CeedIntPow(P_1d, dim),
635635
"BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_HAS_COLLOCATED_GRAD", has_collocated_grad));
636636
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
@@ -704,7 +704,7 @@ int CeedBasisCreateH1_Cuda_shared(CeedElemTopology topo, CeedInt dim, CeedInt nu
704704
const char basis_kernel_source[] = "// Non-tensor basis source\n#include <ceed/jit-source/cuda/cuda-shared-basis-nontensor.h>\n";
705705

706706
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
707-
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "T_1D",
707+
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_T_1D",
708708
CeedIntMax(num_qpts, num_nodes), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp));
709709
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
710710
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));

0 commit comments

Comments
 (0)