@@ -177,7 +177,7 @@ static int CeedOperatorBuildKernelData_Cuda_gen(Ceed ceed, CeedInt num_input_fie
177177// Setup fields
178178// ------------------------------------------------------------------------------
179179static int CeedOperatorBuildKernelFieldData_Cuda_gen (std::ostringstream &code, CeedOperator_Cuda_gen *data, CeedInt i, CeedOperatorField op_field,
180- CeedQFunctionField qf_field, FieldReuse_Cuda field_reuse, CeedInt Q_1d, bool is_input,
180+ CeedQFunctionField qf_field, FieldReuse_Cuda field_reuse, CeedInt Q, CeedInt Q_1d, bool is_input,
181181 bool is_all_tensor, bool is_at_points, bool use_3d_slices) {
182182 bool is_tensor = true ;
183183 CeedBasis basis;
@@ -220,7 +220,7 @@ static int CeedOperatorBuildKernelFieldData_Cuda_gen(std::ostringstream &code, C
220220 CeedInt P = 0 ;
221221
222222 CeedCallBackend (CeedBasisGetNumNodes (basis, &P));
223- code << " const CeedInt P" << var_suffix << " = " << (basis == CEED_BASIS_NONE ? Q_1d : P) << " ;\n " ;
223+ code << " const CeedInt P" << var_suffix << " = " << (basis == CEED_BASIS_NONE ? Q : P) << " ;\n " ;
224224 }
225225 code << " const CeedInt " << P_name << " = " << (basis == CEED_BASIS_NONE ? Q_1d : P_1d) << " ;\n " ;
226226 if (eval_mode != CEED_EVAL_WEIGHT) {
@@ -528,10 +528,11 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
528528 std::string function_name = is_tensor
529529 ? ((dim == 1 ? " Interp" : " InterpTensor" ) + std::to_string (dim) + " d" + (is_all_tensor ? " " : " Flattened" ))
530530 : " InterpNonTensor" ;
531+ std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? " OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
531532
532533 code << " CeedScalar r_q" << var_suffix << " [num_comp" << var_suffix << " *" << (is_tensor && (dim >= 3 ) ? Q_name : " 1" ) << " ];\n " ;
533- code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << (P_1d > Q_1d ? P_name : Q_name)
534- << " >(data, r_e " << var_suffix << " , s_B" << var_suffix << " , r_q" << var_suffix << " );\n " ;
534+ code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << op_t_1d_name << " >(data, r_e "
535+ << var_suffix << " , s_B" << var_suffix << " , r_q" << var_suffix << " );\n " ;
535536 }
536537 break ;
537538 case CEED_EVAL_GRAD:
@@ -551,17 +552,18 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
551552 bool is_collocated = dim == 3 && Q_1d >= P_1d;
552553 std::string function_name = (dim == 1 ? " Grad" : (is_collocated ? " GradTensorCollocated" : " GradTensor" )) + std::to_string (dim) + " d" +
553554 (is_all_tensor ? " " : " Flattened" );
555+ std::string op_t_1d_name = is_all_tensor ? " OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
554556
555557 code << " CeedScalar r_q" << var_suffix << " [num_comp" << var_suffix << " *dim" << var_suffix << " *" << (dim >= 3 ? Q_name : " 1" )
556558 << " ];\n " ;
557- code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << (P_1d > Q_1d ? P_name : Q_name)
558- << " >(data, r_e " << var_suffix << " , s_B" << var_suffix << " , s_G" << var_suffix << " , r_q" << var_suffix << " );\n " ;
559+ code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << op_t_1d_name << " >(data, r_e "
560+ << var_suffix << " , s_B" << var_suffix << " , s_G" << var_suffix << " , r_q" << var_suffix << " );\n " ;
559561 } else {
560562 std::string function_name = " GradNonTensor" ;
561563
562564 code << " CeedScalar r_q" << var_suffix << " [num_comp" << var_suffix << " *dim" << var_suffix << " ];\n " ;
563- code << " " << function_name << " <num_comp" << var_suffix << " , dim" << var_suffix << " , " << P_name << " , " << Q_name << " , "
564- << (P_1d > Q_1d ? P_name : Q_name) << " >(data, r_e" << var_suffix << " , s_G" << var_suffix << " , r_q" << var_suffix << " );\n " ;
565+ code << " " << function_name << " <num_comp" << var_suffix << " , dim" << var_suffix << " , " << P_name << " , " << Q_name
566+ << " , OP_T_1D >(data, r_e" << var_suffix << " , s_G" << var_suffix << " , r_q" << var_suffix << " );\n " ;
565567 }
566568 break ;
567569 case CEED_EVAL_WEIGHT: {
@@ -602,9 +604,10 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
602604 std::string function_name =
603605 is_tensor ? ((dim == 1 ? " InterpTranspose" : " InterpTransposeTensor" ) + std::to_string (dim) + " d" + (is_all_tensor ? " " : " Flattened" ))
604606 : " InterpTransposeNonTensor" ;
607+ std::string op_t_1d_name = (is_all_tensor || !is_tensor) ? " OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
605608
606- code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << (P_1d > Q_1d ? P_name : Q_name)
607- << " >(data, r_q " << var_suffix << " , s_B" << var_suffix << " , r_e" << var_suffix << " );\n " ;
609+ code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << op_t_1d_name << " >(data, r_q "
610+ << var_suffix << " , s_B" << var_suffix << " , r_e" << var_suffix << " );\n " ;
608611 }
609612 break ;
610613 case CEED_EVAL_GRAD:
@@ -623,14 +626,15 @@ static int CeedOperatorBuildKernelBasis_Cuda_gen(std::ostringstream &code, CeedO
623626 bool is_collocated = dim == 3 && Q_1d >= P_1d;
624627 std::string function_name = (dim == 1 ? " GradTranspose" : (is_collocated ? " GradTransposeTensorCollocated" : " GradTransposeTensor" )) +
625628 std::to_string (dim) + " d" + (is_all_tensor ? " " : " Flattened" );
629+ std::string op_t_1d_name = is_all_tensor ? " OP_T_1D" : (P_1d > Q_1d ? P_name : Q_name);
626630
627- code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << (P_1d > Q_1d ? P_name : Q_name)
628- << " >(data, r_q " << var_suffix << " , s_B" << var_suffix << " , s_G" << var_suffix << " , r_e" << var_suffix << " );\n " ;
631+ code << " " << function_name << " <num_comp" << var_suffix << " , " << P_name << " , " << Q_name << " , " << op_t_1d_name << " >(data, r_q "
632+ << var_suffix << " , s_B" << var_suffix << " , s_G" << var_suffix << " , r_e" << var_suffix << " );\n " ;
629633 } else {
630634 std::string function_name = " GradTransposeNonTensor" ;
631635
632- code << " " << function_name << " <num_comp" << var_suffix << " , dim" << var_suffix << " , " << P_name << " , " << Q_name << " , "
633- << (P_1d > Q_1d ? P_name : Q_name) << " >(data, r_q" << var_suffix << " , s_G" << var_suffix << " , r_e" << var_suffix << " );\n " ;
636+ code << " " << function_name << " <num_comp" << var_suffix << " , dim" << var_suffix << " , " << P_name << " , " << Q_name
637+ << " , OP_T_1D >(data, r_q" << var_suffix << " , s_G" << var_suffix << " , r_e" << var_suffix << " );\n " ;
634638 }
635639 break ;
636640 // LCOV_EXCL_START
@@ -1160,6 +1164,8 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
11601164 if (is_at_points) Q_1d = max_num_points;
11611165 else CeedCallBackend (CeedOperatorGetNumQuadraturePoints (op, &Q_1d));
11621166 }
1167+ if (Q == 0 ) Q = Q_1d;
1168+ data->Q = Q;
11631169 data->Q_1d = Q_1d;
11641170
11651171 // Check for restriction only identity operator
@@ -1389,13 +1395,13 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
13891395 // Initialize constants, and matrices B and G
13901396 code << " \n // Input field constants and basis data\n " ;
13911397 for (CeedInt i = 0 ; i < num_input_fields; i++) {
1392- CeedCallBackend (CeedOperatorBuildKernelFieldData_Cuda_gen (code, data, i, op_input_fields[i], qf_input_fields[i], input_matrix_reuse[i], Q_1d,
1398+ CeedCallBackend (CeedOperatorBuildKernelFieldData_Cuda_gen (code, data, i, op_input_fields[i], qf_input_fields[i], input_matrix_reuse[i], Q, Q_1d,
13931399 true , is_all_tensor, is_at_points, use_3d_slices));
13941400 }
13951401 code << " \n // Output field constants and basis data\n " ;
13961402 for (CeedInt i = 0 ; i < num_output_fields; i++) {
1397- CeedCallBackend (CeedOperatorBuildKernelFieldData_Cuda_gen (code, data, i, op_output_fields[i], qf_output_fields[i], output_matrix_reuse[i], Q_1d ,
1398- false , is_all_tensor, is_at_points, use_3d_slices));
1403+ CeedCallBackend (CeedOperatorBuildKernelFieldData_Cuda_gen (code, data, i, op_output_fields[i], qf_output_fields[i], output_matrix_reuse[i], Q ,
1404+ Q_1d, false , is_all_tensor, is_at_points, use_3d_slices));
13991405 }
14001406
14011407 // Loop over all elements
0 commit comments