Skip to content

Commit 05c4fc1

Browse files
committed
cuda/hip - use new include pattern for JiT
1 parent 439015d commit 05c4fc1

21 files changed

+133
-395
lines changed

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -696,42 +696,17 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) {
696696
CeedCallBackend(CeedGetData(ceed, &ceed_data));
697697
CeedCallBackend(cudaGetDeviceProperties(&prop, ceed_data->device_id));
698698
if ((prop.major < 6) && (CEED_SCALAR_TYPE != CEED_SCALAR_FP32)) {
699-
char *atomic_add_source;
700-
const char *atomic_add_path;
701-
702-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-atomic-add-fallback.h", &atomic_add_path));
703-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Atomic Add Source -----\n");
704-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, atomic_add_path, &atomic_add_source));
705-
code << atomic_add_source;
706-
CeedCallBackend(CeedFree(&atomic_add_path));
707-
CeedCallBackend(CeedFree(&atomic_add_source));
699+
code << "// AtomicAdd fallback source\n";
700+
code << "#include <ceed/jit-source/cuda/cuda-atomic-add-fallback.h>\n\n";
708701
}
709702
}
710703

711704
// Load basis source files
712705
// TODO: Add non-tensor, AtPoints
713-
{
714-
char *tensor_basis_kernel_source;
715-
const char *tensor_basis_kernel_path;
716-
717-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h", &tensor_basis_kernel_path));
718-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Tensor Basis Kernel Source -----\n");
719-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, tensor_basis_kernel_path, &tensor_basis_kernel_source));
720-
code << tensor_basis_kernel_source;
721-
CeedCallBackend(CeedFree(&tensor_basis_kernel_path));
722-
CeedCallBackend(CeedFree(&tensor_basis_kernel_source));
723-
}
724-
{
725-
char *cuda_gen_template_source;
726-
const char *cuda_gen_template_path;
727-
728-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-gen-templates.h", &cuda_gen_template_path));
729-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Cuda-Gen Template Source -----\n");
730-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, cuda_gen_template_path, &cuda_gen_template_source));
731-
code << cuda_gen_template_source;
732-
CeedCallBackend(CeedFree(&cuda_gen_template_path));
733-
CeedCallBackend(CeedFree(&cuda_gen_template_source));
734-
}
706+
code << "// Tensor basis source\n";
707+
code << "#include <ceed/jit-source/cuda/cuda-shared-basis-tensor-templates.h>\n\n";
708+
code << "// CodeGen operator source\n";
709+
code << "#include <ceed/jit-source/cuda/cuda-gen-templates.h>\n\n";
735710

736711
// Get QFunction name
737712
std::string qfunction_name(qf_data->qfunction_name);
@@ -749,9 +724,13 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) {
749724

750725
// Add user QFunction source
751726
{
752-
std::string qfunction_source(qf_data->qfunction_source);
727+
const char *source_path;
728+
729+
CeedCallBackend(CeedQFunctionGetSourcePath(qf, &source_path));
730+
CeedCheck(source_path, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file");
753731

754-
code << qfunction_source;
732+
code << "// User QFunction source\n";
733+
code << "#include \"" << source_path << "\"\n\n";
755734
}
756735

757736
// Setup

backends/cuda-gen/ceed-cuda-gen-qfunction.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ static int CeedQFunctionDestroy_Cuda_gen(CeedQFunction qf) {
2727

2828
CeedCallBackend(CeedQFunctionGetData(qf, &data));
2929
CeedCallCuda(CeedQFunctionReturnCeed(qf), cudaFree(data->d_c));
30-
CeedCallBackend(CeedFree(&data->qfunction_source));
3130
CeedCallBackend(CeedFree(&data));
3231
return CEED_ERROR_SUCCESS;
3332
}
@@ -45,10 +44,6 @@ int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf) {
4544

4645
// Read QFunction source
4746
CeedCallBackend(CeedQFunctionGetKernelName(qf, &data->qfunction_name));
48-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n");
49-
CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &data->qfunction_source));
50-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n");
51-
CeedCheck(data->qfunction_source, ceed, CEED_ERROR_UNSUPPORTED, "/gpu/cuda/gen backend requires QFunction source code file");
5247

5348
CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Apply", CeedQFunctionApply_Cuda_gen));
5449
CeedCallBackend(CeedSetBackendFunction(ceed, "QFunction", qf, "Destroy", CeedQFunctionDestroy_Cuda_gen));

backends/cuda-gen/ceed-cuda-gen.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ typedef struct {
2626

2727
typedef struct {
2828
const char *qfunction_name;
29-
const char *qfunction_source;
3029
void *d_c;
3130
} CeedQFunction_Cuda_gen;
3231

backends/cuda-ref/ceed-cuda-ref-basis.c

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -182,24 +182,17 @@ static int CeedBasisApplyAtPointsCore_Cuda(CeedBasis basis, bool apply_add, cons
182182
}
183183

184184
// -- Compile kernels
185-
char *basis_kernel_source;
186-
const char *basis_kernel_path;
187-
CeedInt num_comp;
185+
const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h>\n";
186+
CeedInt num_comp;
188187

189188
if (data->moduleAtPoints) CeedCallCuda(ceed, cuModuleUnload(data->moduleAtPoints));
190189
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
191-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor-at-points.h", &basis_kernel_path));
192-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
193-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
194-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
195190
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
196191
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
197192
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
198193
max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
199194
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
200195
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
201-
CeedCallBackend(CeedFree(&basis_kernel_path));
202-
CeedCallBackend(CeedFree(&basis_kernel_source));
203196
}
204197

205198
// Get read/write access to u, v
@@ -419,8 +412,6 @@ static int CeedBasisDestroyNonTensor_Cuda(CeedBasis basis) {
419412
int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
420413
const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
421414
Ceed ceed;
422-
char *basis_kernel_source;
423-
const char *basis_kernel_path;
424415
CeedInt num_comp;
425416
const CeedInt q_bytes = Q_1d * sizeof(CeedScalar);
426417
const CeedInt interp_bytes = q_bytes * P_1d;
@@ -440,19 +431,15 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
440431
CeedCallCuda(ceed, cudaMemcpy(data->d_grad_1d, grad_1d, interp_bytes, cudaMemcpyHostToDevice));
441432

442433
// Compile basis kernels
434+
const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-tensor.h>\n";
435+
443436
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
444-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-tensor.h", &basis_kernel_path));
445-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
446-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
447-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
448437
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
449438
Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
450439
"BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
451440
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
452441
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Grad", &data->Grad));
453442
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
454-
CeedCallBackend(CeedFree(&basis_kernel_path));
455-
CeedCallBackend(CeedFree(&basis_kernel_source));
456443

457444
CeedCallBackend(CeedBasisSetData(basis, data));
458445

@@ -471,8 +458,6 @@ int CeedBasisCreateTensorH1_Cuda(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const
471458
int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
472459
const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
473460
Ceed ceed;
474-
char *basis_kernel_source;
475-
const char *basis_kernel_path;
476461
CeedInt num_comp, q_comp_interp, q_comp_grad;
477462
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
478463
CeedBasisNonTensor_Cuda *data;
@@ -501,20 +486,16 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
501486
}
502487

503488
// Compile basis kernels
489+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
490+
504491
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
505-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
506-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
507-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
508-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
509492
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
510493
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
511494
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
512495
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
513496
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
514497
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
515498
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
516-
CeedCallBackend(CeedFree(&basis_kernel_path));
517-
CeedCallBackend(CeedFree(&basis_kernel_source));
518499

519500
CeedCallBackend(CeedBasisSetData(basis, data));
520501

@@ -531,8 +512,6 @@ int CeedBasisCreateH1_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes
531512
int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div,
532513
const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
533514
Ceed ceed;
534-
char *basis_kernel_source;
535-
const char *basis_kernel_path;
536515
CeedInt num_comp, q_comp_interp, q_comp_div;
537516
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
538517
CeedBasisNonTensor_Cuda *data;
@@ -561,20 +540,16 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
561540
}
562541

563542
// Compile basis kernels
543+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
544+
564545
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
565-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
566-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
567-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
568-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
569546
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
570547
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
571548
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
572549
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
573550
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
574551
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
575552
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
576-
CeedCallBackend(CeedFree(&basis_kernel_path));
577-
CeedCallBackend(CeedFree(&basis_kernel_source));
578553

579554
CeedCallBackend(CeedBasisSetData(basis, data));
580555

@@ -591,8 +566,6 @@ int CeedBasisCreateHdiv_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nod
591566
int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
592567
const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
593568
Ceed ceed;
594-
char *basis_kernel_source;
595-
const char *basis_kernel_path;
596569
CeedInt num_comp, q_comp_interp, q_comp_curl;
597570
const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
598571
CeedBasisNonTensor_Cuda *data;
@@ -621,20 +594,16 @@ int CeedBasisCreateHcurl_Cuda(CeedElemTopology topo, CeedInt dim, CeedInt num_no
621594
}
622595

623596
// Compile basis kernels
597+
const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/cuda/cuda-ref-basis-nontensor.h>\n";
598+
624599
CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
625-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-basis-nontensor.h", &basis_kernel_path));
626-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n");
627-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source));
628-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n");
629600
CeedCallBackend(CeedCompile_Cuda(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
630601
q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
631602
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Interp", &data->Interp));
632603
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
633604
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Deriv", &data->Deriv));
634605
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
635606
CeedCallBackend(CeedGetKernel_Cuda(ceed, data->module, "Weight", &data->Weight));
636-
CeedCallBackend(CeedFree(&basis_kernel_path));
637-
CeedCallBackend(CeedFree(&basis_kernel_source));
638607

639608
CeedCallBackend(CeedBasisSetData(basis, data));
640609

backends/cuda-ref/ceed-cuda-ref-operator.c

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,8 +1286,6 @@ static inline int CeedOperatorAssembleDiagonalSetup_Cuda(CeedOperator op) {
12861286
//------------------------------------------------------------------------------
12871287
static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op, CeedInt use_ceedsize_idx, const bool is_point_block) {
12881288
Ceed ceed;
1289-
char *diagonal_kernel_source;
1290-
const char *diagonal_kernel_path;
12911289
CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0;
12921290
CeedInt num_comp, q_comp, num_nodes, num_qpts;
12931291
CeedBasis basis_in = NULL, basis_out = NULL;
@@ -1351,22 +1349,18 @@ static inline int CeedOperatorAssembleDiagonalSetupCompile_Cuda(CeedOperator op,
13511349
CeedOperatorDiag_Cuda *diag = impl->diag;
13521350

13531351
// Assemble kernel
1354-
CUmodule *module = is_point_block ? &diag->module_point_block : &diag->module;
1355-
CeedInt elems_per_block = 1;
1352+
const char diagonal_kernel_source[] = "// Diagonal assembly source\n#include <ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h>\n";
1353+
CUmodule *module = is_point_block ? &diag->module_point_block : &diag->module;
1354+
CeedInt elems_per_block = 1;
1355+
13561356
CeedCallBackend(CeedBasisGetNumNodes(basis_in, &num_nodes));
13571357
CeedCallBackend(CeedBasisGetNumComponents(basis_in, &num_comp));
13581358
if (basis_in == CEED_BASIS_NONE) num_qpts = num_nodes;
13591359
else CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
1360-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble-diagonal.h", &diagonal_kernel_path));
1361-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Kernel Source -----\n");
1362-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, diagonal_kernel_path, &diagonal_kernel_source));
1363-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Diagonal Assembly Source Complete! -----\n");
13641360
CeedCallCuda(ceed, CeedCompile_Cuda(ceed, diagonal_kernel_source, module, 8, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT",
13651361
num_eval_modes_out, "NUM_COMP", num_comp, "NUM_NODES", num_nodes, "NUM_QPTS", num_qpts, "USE_CEEDSIZE",
13661362
use_ceedsize_idx, "USE_POINT_BLOCK", is_point_block ? 1 : 0, "BLOCK_SIZE", num_nodes * elems_per_block));
13671363
CeedCallCuda(ceed, CeedGetKernel_Cuda(ceed, *module, "LinearDiagonal", is_point_block ? &diag->LinearPointBlock : &diag->LinearDiagonal));
1368-
CeedCallBackend(CeedFree(&diagonal_kernel_path));
1369-
CeedCallBackend(CeedFree(&diagonal_kernel_source));
13701364
CeedCallBackend(CeedBasisDestroy(&basis_in));
13711365
CeedCallBackend(CeedBasisDestroy(&basis_out));
13721366
return CEED_ERROR_SUCCESS;
@@ -1481,8 +1475,6 @@ static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Cuda(CeedOperator op,
14811475
static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_ceedsize_idx) {
14821476
Ceed ceed;
14831477
Ceed_Cuda *cuda_data;
1484-
char *assembly_kernel_source;
1485-
const char *assembly_kernel_path;
14861478
CeedInt num_input_fields, num_output_fields, num_eval_modes_in = 0, num_eval_modes_out = 0;
14871479
CeedInt elem_size_in, num_qpts_in = 0, num_comp_in, elem_size_out, num_qpts_out, num_comp_out, q_comp;
14881480
CeedEvalMode *eval_modes_in = NULL, *eval_modes_out = NULL;
@@ -1589,20 +1581,16 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee
15891581
}
15901582

15911583
// Compile kernels
1584+
const char assembly_kernel_source[] = "// Full assembly source\n#include <ceed/jit-source/cuda/cuda-ref-operator-assemble.h>\n";
1585+
15921586
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
15931587
CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
1594-
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-operator-assemble.h", &assembly_kernel_path));
1595-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Kernel Source -----\n");
1596-
CeedCallBackend(CeedLoadSourceToBuffer(ceed, assembly_kernel_path, &assembly_kernel_source));
1597-
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Assembly Source Complete! -----\n");
15981588
CeedCallBackend(CeedCompile_Cuda(ceed, assembly_kernel_source, &asmb->module, 10, "NUM_EVAL_MODES_IN", num_eval_modes_in, "NUM_EVAL_MODES_OUT",
15991589
num_eval_modes_out, "NUM_COMP_IN", num_comp_in, "NUM_COMP_OUT", num_comp_out, "NUM_NODES_IN", elem_size_in,
16001590
"NUM_NODES_OUT", elem_size_out, "NUM_QPTS", num_qpts_in, "BLOCK_SIZE",
16011591
asmb->block_size_x * asmb->block_size_y * asmb->elems_per_block, "BLOCK_SIZE_Y", asmb->block_size_y,
16021592
"USE_CEEDSIZE", use_ceedsize_idx));
16031593
CeedCallBackend(CeedGetKernel_Cuda(ceed, asmb->module, "LinearAssemble", &asmb->LinearAssemble));
1604-
CeedCallBackend(CeedFree(&assembly_kernel_path));
1605-
CeedCallBackend(CeedFree(&assembly_kernel_source));
16061594

16071595
// Load into B_in, in order that they will be used in eval_modes_in
16081596
{

0 commit comments

Comments
 (0)