Skip to content

Commit 84326e1

Browse files
committed
gpu - collocated nodes/qpts for gen
1 parent 02219a0 commit 84326e1

8 files changed

+531
-70
lines changed

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,15 @@ static int CeedOperatorBuildKernelFieldData_Cuda_gen(std::ostringstream &code, C
261261

262262
code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n";
263263
} else {
264-
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
265-
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
264+
bool is_collocated = false;
265+
266+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
267+
if (is_collocated && !is_at_points) {
268+
code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n";
269+
} else {
270+
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
271+
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
272+
}
266273
}
267274
break;
268275
case CEED_EVAL_GRAD:
@@ -293,8 +300,15 @@ static int CeedOperatorBuildKernelFieldData_Cuda_gen(std::ostringstream &code, C
293300

294301
code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n";
295302
} else {
296-
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
297-
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
303+
bool is_collocated = false;
304+
305+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
306+
if (is_collocated && !is_at_points) {
307+
code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n";
308+
} else {
309+
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
310+
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
311+
}
298312
}
299313
}
300314
if (is_at_points) break; // No G mat for AtPoints
@@ -492,10 +506,11 @@ static int CeedOperatorBuildKernelRestriction_Cuda_gen(std::ostringstream &code,
492506
static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedOperator_Cuda_gen *data, Tab &tab, CeedInt i,
493507
CeedOperatorField op_field, CeedQFunctionField qf_field, CeedInt max_dim, CeedInt Q_1d,
494508
bool is_input, bool is_all_tensor, bool is_at_points, bool use_3d_slices) {
495-
bool is_tensor = true;
509+
bool is_tensor = true, is_collocated = true;
496510
CeedBasis basis;
497511
CeedCallBackend(CeedOperatorFieldGetBasis(op_field, &basis));
498512
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
513+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
499514

500515
std::string var_suffix = (is_input ? "_in_" : "_out_") + std::to_string(i);
501516
std::string P_name = (is_tensor ? "P_1d" : "P") + var_suffix, Q_name = is_tensor ? "Q_1d" : "Q";
@@ -534,9 +549,9 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
534549
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
535550
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
536551
} else {
537-
std::string function_name = is_tensor
538-
? ((dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
539-
: "InterpNonTensor";
552+
std::string function_name = is_tensor ? ((dim == 1 ? "Interp" : "InterpTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
553+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
554+
: "InterpNonTensor";
540555
std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
541556

542557
code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_all_tensor && (dim >= 3) ? Q_name : "1") << "];\n";
@@ -552,15 +567,18 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
552567
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
553568
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
554569
} else if (use_3d_slices) {
555-
std::string function_name = (dim > 1 ? "InterpTensor" : "Interp") + std::to_string(dim) + "d";
570+
std::string function_name =
571+
(dim > 1 ? "InterpTensor" : "Interp") + std::string(is_collocated ? "CollocatedNodes" : "") + std::to_string(dim) + "d";
556572

557573
code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];\n";
558574
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
559575
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
560576
} else if (is_tensor) {
561-
bool is_collocated = dim == 3 && Q_1d >= P_1d;
562-
std::string function_name = (dim == 1 ? "Grad" : (is_collocated ? "GradTensorCollocated" : "GradTensor")) + std::to_string(dim) + "d" +
563-
(is_all_tensor ? "" : "Flattened");
577+
bool is_collocated_grad = dim == 3 && Q_1d >= P_1d;
578+
std::string function_name =
579+
(dim == 1 ? "Grad"
580+
: ("GradTensor" + std::string(is_collocated ? "CollocatedNodes" : (is_collocated_grad ? "Collocated" : ""))) +
581+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"));
564582
std::string op_t_1d_name = is_all_tensor ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
565583

566584
code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix << "*"
@@ -611,7 +629,8 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
611629
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
612630
} else {
613631
std::string function_name =
614-
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
632+
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
633+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
615634
: "InterpTransposeNonTensor";
616635
std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
617636

@@ -627,14 +646,17 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
627646
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
628647
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
629648
} else if (use_3d_slices) {
630-
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
649+
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
650+
std::to_string(dim) + "d";
631651

632652
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
633653
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
634654
} else if (is_tensor) {
635-
bool is_collocated = dim == 3 && Q_1d >= P_1d;
636-
std::string function_name = (dim == 1 ? "GradTranspose" : (is_collocated ? "GradTransposeTensorCollocated" : "GradTransposeTensor")) +
637-
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened");
655+
bool is_collocated_grad = dim == 3 && Q_1d >= P_1d;
656+
std::string function_name =
657+
(dim == 1 ? "GradTranspose"
658+
: ("GradTransposeTensor" + std::string(is_collocated ? "CollocatedNodes" : (is_collocated_grad ? "Collocated" : "")))) +
659+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened");
638660
std::string op_t_1d_name = is_all_tensor ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
639661

640662
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", " << op_t_1d_name << ">(data, r_q"
@@ -870,7 +892,7 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C
870892
code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n";
871893
code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << "; j++) {\n";
872894
tab.push();
873-
code << "r_s" << var_suffix << "[j] = r_q" << var_suffix << "[q + j*" << Q_name << "];\n";
895+
code << tab << "r_s" << var_suffix << "[j] = r_q" << var_suffix << "[q + j*" << Q_name << "];\n";
874896
tab.pop();
875897
code << tab << "}\n";
876898
break;

backends/hip-gen/ceed-hip-gen-operator-build.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,15 @@ static int CeedOperatorBuildKernelFieldData_Hip_gen(std::ostringstream &code, Ce
288288

289289
code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n";
290290
} else {
291-
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
292-
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
291+
bool is_collocated = false;
292+
293+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
294+
if (is_collocated && !is_at_points) {
295+
code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n";
296+
} else {
297+
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
298+
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
299+
}
293300
}
294301
break;
295302
case CEED_EVAL_GRAD:
@@ -320,8 +327,15 @@ static int CeedOperatorBuildKernelFieldData_Hip_gen(std::ostringstream &code, Ce
320327

321328
code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n";
322329
} else {
323-
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
324-
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
330+
bool is_collocated = false;
331+
332+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
333+
if (is_collocated && !is_at_points) {
334+
code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n";
335+
} else {
336+
code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\n";
337+
code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << i << "], s_B" << var_suffix << ");\n";
338+
}
325339
}
326340
}
327341
if (is_at_points) break; // No G mat for AtPoints
@@ -519,10 +533,11 @@ static int CeedOperatorBuildKernelRestriction_Hip_gen(std::ostringstream &code,
519533
static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOperator_Hip_gen *data, Tab &tab, CeedInt i, CeedOperatorField op_field,
520534
CeedQFunctionField qf_field, CeedInt max_dim, CeedInt Q_1d, bool is_input, bool is_all_tensor,
521535
bool is_at_points, bool use_3d_slices) {
522-
bool is_tensor = true;
536+
bool is_tensor = true, is_collocated = true;
523537
CeedBasis basis;
524538
CeedCallBackend(CeedOperatorFieldGetBasis(op_field, &basis));
525539
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
540+
CeedCallBackend(CeedBasisIsCollocated(basis, &is_collocated));
526541

527542
std::string var_suffix = (is_input ? "_in_" : "_out_") + std::to_string(i);
528543
std::string P_name = (is_tensor ? "P_1d" : "P") + var_suffix, Q_name = is_tensor ? "Q_1d" : "Q";
@@ -561,9 +576,9 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
561576
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
562577
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
563578
} else {
564-
std::string function_name = is_tensor
565-
? ((dim == 1 ? "Interp" : "InterpTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
566-
: "InterpNonTensor";
579+
std::string function_name = is_tensor ? ((dim == 1 ? "Interp" : "InterpTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
580+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
581+
: "InterpNonTensor";
567582
std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
568583

569584
code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_all_tensor && (dim >= 3) ? Q_name : "1") << "];\n";
@@ -579,14 +594,15 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
579594
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
580595
<< ", s_B" << var_suffix << ", r_c" << var_suffix << ");\n";
581596
} else if (use_3d_slices) {
582-
std::string function_name = (dim > 1 ? "InterpTensor" : "Interp") + std::to_string(dim) + "d";
597+
std::string function_name =
598+
(dim > 1 ? "InterpTensor" : "Interp") + std::string(is_collocated ? "CollocatedNodes" : "") + std::to_string(dim) + "d";
583599

584600
code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];\n";
585601
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_e" << var_suffix
586602
<< ", s_B" << var_suffix << ", r_q" << var_suffix << ");\n";
587603
} else if (is_tensor) {
588-
bool is_collocated = dim == 3 && Q_1d >= P_1d;
589-
std::string function_name = (dim == 1 ? "Grad" : (is_collocated ? "GradTensorCollocated" : "GradTensor")) + std::to_string(dim) + "d" +
604+
bool is_collocated_grad = dim == 3 && Q_1d >= P_1d;
605+
std::string function_name = (dim == 1 ? "Grad" : ("GradTensor" + std::string(is_collocated ? "CollocatedNodes" : (is_collocated_grad ? "Collocated" : "")) + std::to_string(dim) + "d" +
590606
(is_all_tensor ? "" : "Flattened");
591607
std::string op_t_1d_name = is_all_tensor ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
592608

@@ -638,7 +654,8 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
638654
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
639655
} else {
640656
std::string function_name =
641-
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
657+
is_tensor ? ((dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
658+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened"))
642659
: "InterpTransposeNonTensor";
643660
std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
644661

@@ -654,14 +671,17 @@ static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOp
654671
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_c" << var_suffix
655672
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
656673
} else if (use_3d_slices) {
657-
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::to_string(dim) + "d";
674+
std::string function_name = (dim == 1 ? "InterpTranspose" : "InterpTransposeTensor") + std::string(is_collocated ? "CollocatedNodes" : "") +
675+
std::to_string(dim) + "d";
658676

659677
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", OP_T_1D>(data, r_q" << var_suffix
660678
<< ", s_B" << var_suffix << ", r_e" << var_suffix << ");\n";
661679
} else if (is_tensor) {
662-
bool is_collocated = dim == 3 && Q_1d >= P_1d;
663-
std::string function_name = (dim == 1 ? "GradTranspose" : (is_collocated ? "GradTransposeTensorCollocated" : "GradTransposeTensor")) +
664-
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened");
680+
bool is_collocated_grad = dim == 3 && Q_1d >= P_1d;
681+
std::string function_name =
682+
(dim == 1 ? "GradTranspose"
683+
: ("GradTransposeTensor" + std::string(is_collocated ? "CollocatedNodes" : (is_collocated_grad ? "Collocated" : "")))) +
684+
std::to_string(dim) + "d" + (is_all_tensor ? "" : "Flattened");
665685
std::string op_t_1d_name = is_all_tensor ? "OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
666686

667687
code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << ", " << op_t_1d_name << ">(data, r_q"

0 commit comments

Comments
 (0)