Skip to content

Commit d3a5b9e

Browse files
committed
hip - gen fallback to shared if error
1 parent ddae501 commit d3a5b9e

File tree

9 files changed

+159
-84
lines changed

9 files changed

+159
-84
lines changed

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -937,9 +937,9 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
937937
return CEED_ERROR_SUCCESS;
938938
}
939939
}
940-
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
941940

942941
// Check field compatibility
942+
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
943943
{
944944
bool has_shared_bases = true, is_all_tensor = true, is_all_nontensor = true;
945945

backends/cuda-gen/ceed-cuda-gen-operator.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static size_t dynamicSMemSize(int threads) { return threads * sizeof(CeedScalar)
9999
// Apply and add to output
100100
//------------------------------------------------------------------------------
101101
static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
102-
bool is_at_points, is_tensor, is_good_run = true;
102+
bool is_at_points, is_tensor, is_run_good = true;
103103
Ceed ceed;
104104
Ceed_Cuda *cuda_data;
105105
CeedInt num_elem, num_input_fields, num_output_fields;
@@ -244,7 +244,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec,
244244
}
245245
CeedInt shared_mem = block[0] * block[1] * block[2] * sizeof(CeedScalar);
246246

247-
CeedCallBackend(CeedTryRunKernelDimShared_Cuda(ceed, data->op, grid, block[0], block[1], block[2], shared_mem, &is_good_run, opargs));
247+
CeedCallBackend(CeedTryRunKernelDimShared_Cuda(ceed, data->op, grid, block[0], block[1], block[2], shared_mem, &is_run_good, opargs));
248248

249249
// Restore input arrays
250250
for (CeedInt i = 0; i < num_input_fields; i++) {
@@ -306,7 +306,7 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec,
306306
CeedCallBackend(CeedQFunctionDestroy(&qf));
307307

308308
// Fallback if run was bad (out of resources)
309-
if (!is_good_run) {
309+
if (!is_run_good) {
310310
CeedOperator op_fallback;
311311

312312
data->use_fallback = true;

backends/cuda/ceed-cuda-compile.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,16 +132,14 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
132132
}
133133
CeedCallBackend(CeedFree(&opts));
134134
*is_compile_good = result == NVRTC_SUCCESS;
135-
if (!*is_compile_good) {
135+
if (!*is_compile_good && throw_error) {
136136
char *log;
137137
size_t log_size;
138138

139-
if (throw_error) {
140-
CeedCallNvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size));
141-
CeedCallBackend(CeedMalloc(log_size, &log));
142-
CeedCallNvrtc(ceed, nvrtcGetProgramLog(prog, log));
143-
return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", nvrtcGetErrorString(result), log);
144-
}
139+
CeedCallNvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size));
140+
CeedCallBackend(CeedMalloc(log_size, &log));
141+
CeedCallNvrtc(ceed, nvrtcGetProgramLog(prog, log));
142+
return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", nvrtcGetErrorString(result), log);
145143
}
146144

147145
#if CUDA_VERSION >= 11010

backends/hip-gen/ceed-hip-gen-operator-build.cpp

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, Ce
942942
//------------------------------------------------------------------------------
943943
// Build single operator kernel
944944
//------------------------------------------------------------------------------
945-
extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) {
945+
extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op, bool *is_good_build) {
946946
bool is_tensor = true, is_at_points = false, use_3d_slices = false;
947947
Ceed ceed;
948948
CeedInt Q_1d, num_input_fields, num_output_fields, dim = 1, max_num_points = 0, coords_comp_stride = 0;
@@ -953,18 +953,77 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) {
953953
CeedOperator_Hip_gen *data;
954954
std::ostringstream code;
955955

956+
CeedCallBackend(CeedOperatorGetData(op, &data));
956957
{
957958
bool is_setup_done;
958959

959960
CeedCallBackend(CeedOperatorIsSetupDone(op, &is_setup_done));
960-
if (is_setup_done) return CEED_ERROR_SUCCESS;
961+
if (is_setup_done) {
962+
*is_good_build = !data->use_fallback;
963+
return CEED_ERROR_SUCCESS;
964+
}
961965
}
962966

967+
// Check field compatibility
968+
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
969+
{
970+
bool has_shared_bases = true, is_all_tensor = true, is_all_nontensor = true;
971+
972+
for (CeedInt i = 0; i < num_input_fields; i++) {
973+
CeedBasis basis;
974+
975+
CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
976+
if (basis != CEED_BASIS_NONE) {
977+
bool is_tensor = true;
978+
const char *resource;
979+
char *resource_root;
980+
Ceed basis_ceed;
981+
982+
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
983+
is_all_tensor &= is_tensor;
984+
is_all_nontensor &= !is_tensor;
985+
CeedCallBackend(CeedBasisGetCeed(basis, &basis_ceed));
986+
CeedCallBackend(CeedGetResource(basis_ceed, &resource));
987+
CeedCallBackend(CeedGetResourceRoot(basis_ceed, resource, ":", &resource_root));
988+
has_shared_bases &= !strcmp(resource_root, "/gpu/hip/shared");
989+
CeedCallBackend(CeedFree(&resource_root));
990+
CeedCallBackend(CeedDestroy(&basis_ceed));
991+
}
992+
CeedCallBackend(CeedBasisDestroy(&basis));
993+
}
994+
995+
for (CeedInt i = 0; i < num_output_fields; i++) {
996+
CeedBasis basis;
997+
998+
CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis));
999+
if (basis != CEED_BASIS_NONE) {
1000+
bool is_tensor = true;
1001+
const char *resource;
1002+
char *resource_root;
1003+
Ceed basis_ceed;
1004+
1005+
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
1006+
is_all_tensor &= is_tensor;
1007+
is_all_nontensor &= !is_tensor;
1008+
1009+
CeedCallBackend(CeedBasisGetCeed(basis, &basis_ceed));
1010+
CeedCallBackend(CeedGetResource(basis_ceed, &resource));
1011+
CeedCallBackend(CeedGetResourceRoot(basis_ceed, resource, ":", &resource_root));
1012+
has_shared_bases &= !strcmp(resource_root, "/gpu/hip/shared");
1013+
CeedCallBackend(CeedFree(&resource_root));
1014+
CeedCallBackend(CeedDestroy(&basis_ceed));
1015+
}
1016+
CeedCallBackend(CeedBasisDestroy(&basis));
1017+
}
1018+
// -- Fallback to ref if not all bases are shared
1019+
if (!has_shared_bases || (!is_all_tensor && !is_all_nontensor)) {
1020+
*is_good_build = false;
1021+
return CEED_ERROR_SUCCESS;
1022+
}
1023+
}
9631024
CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
964-
CeedCallBackend(CeedOperatorGetData(op, &data));
9651025
CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
9661026
CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
967-
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
9681027
CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
9691028

9701029
// Get operator data
@@ -1225,9 +1284,19 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) {
12251284
// Compile
12261285
CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
12271286
CeedCallBackend(BlockGridCalculate_Hip_gen(is_tensor ? dim : 1, num_elem, data->max_P_1d, Q_1d, block_sizes));
1228-
CeedCallBackend(CeedCompile_Hip(ceed, code.str().c_str(), &data->module, 2, "T_1D", block_sizes[0], "BLOCK_SIZE",
1229-
block_sizes[0] * block_sizes[1] * block_sizes[2]));
1230-
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, operator_name.c_str(), &data->op));
1287+
{
1288+
bool is_compile_good = false;
1289+
1290+
CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, &data->module, 2, "T_1D", block_sizes[0], "BLOCK_SIZE",
1291+
block_sizes[0] * block_sizes[1] * block_sizes[2]));
1292+
if (is_compile_good) {
1293+
*is_build_good = true;
1294+
CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, operator_name.c_str(), &data->op));
1295+
} else {
1296+
*is_good_build = false;
1297+
data->use_fallback = true;
1298+
}
1299+
}
12311300
CeedCallBackend(CeedOperatorSetSetupDone(op));
12321301
CeedCallBackend(CeedDestroy(&ceed));
12331302
CeedCallBackend(CeedQFunctionDestroy(&qf));

backends/hip-gen/ceed-hip-gen-operator-build.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
#pragma once
88

99
CEED_INTERN int BlockGridCalculate_Hip_gen(CeedInt dim, CeedInt num_elem, CeedInt P_1d, CeedInt Q_1d, CeedInt *block_sizes);
10-
CEED_INTERN int CeedOperatorBuildKernel_Hip_gen(CeedOperator op);
10+
CEED_INTERN int CeedOperatorBuildKernel_Hip_gen(CeedOperator op, bool *is_good_build);

backends/hip-gen/ceed-hip-gen-operator.c

Lines changed: 26 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static int CeedOperatorDestroy_Hip_gen(CeedOperator op) {
3535
// Apply and add to output
3636
//------------------------------------------------------------------------------
3737
static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
38-
bool is_at_points, is_tensor;
38+
bool is_at_points, is_tensor, is_good_run = true;
3939
Ceed ceed;
4040
CeedInt num_elem, num_input_fields, num_output_fields;
4141
CeedEvalMode eval_mode;
@@ -46,62 +46,15 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
4646
CeedOperatorField *op_input_fields, *op_output_fields;
4747
CeedOperator_Hip_gen *data;
4848

49-
// Check for shared bases
50-
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
49+
// Creation of the operator
5150
{
52-
bool has_shared_bases = true, is_all_tensor = true, is_all_nontensor = true;
53-
54-
for (CeedInt i = 0; i < num_input_fields; i++) {
55-
CeedBasis basis;
56-
57-
CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis));
58-
if (basis != CEED_BASIS_NONE) {
59-
bool is_tensor = true;
60-
const char *resource;
61-
char *resource_root;
62-
Ceed basis_ceed;
63-
64-
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
65-
is_all_tensor &= is_tensor;
66-
is_all_nontensor &= !is_tensor;
67-
CeedCallBackend(CeedBasisGetCeed(basis, &basis_ceed));
68-
CeedCallBackend(CeedGetResource(basis_ceed, &resource));
69-
CeedCallBackend(CeedGetResourceRoot(basis_ceed, resource, ":", &resource_root));
70-
has_shared_bases &= !strcmp(resource_root, "/gpu/hip/shared");
71-
CeedCallBackend(CeedFree(&resource_root));
72-
CeedCallBackend(CeedDestroy(&basis_ceed));
73-
}
74-
CeedCallBackend(CeedBasisDestroy(&basis));
75-
}
51+
bool is_good_build = false;
7652

77-
for (CeedInt i = 0; i < num_output_fields; i++) {
78-
CeedBasis basis;
79-
80-
CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[i], &basis));
81-
if (basis != CEED_BASIS_NONE) {
82-
bool is_tensor = true;
83-
const char *resource;
84-
char *resource_root;
85-
Ceed basis_ceed;
86-
87-
CeedCallBackend(CeedBasisIsTensor(basis, &is_tensor));
88-
is_all_tensor &= is_tensor;
89-
is_all_nontensor &= !is_tensor;
90-
91-
CeedCallBackend(CeedBasisGetCeed(basis, &basis_ceed));
92-
CeedCallBackend(CeedGetResource(basis_ceed, &resource));
93-
CeedCallBackend(CeedGetResourceRoot(basis_ceed, resource, ":", &resource_root));
94-
has_shared_bases &= !strcmp(resource_root, "/gpu/hip/shared");
95-
CeedCallBackend(CeedFree(&resource_root));
96-
CeedCallBackend(CeedDestroy(&basis_ceed));
97-
}
98-
CeedCallBackend(CeedBasisDestroy(&basis));
99-
}
100-
// -- Fallback to ref if not all bases are shared
101-
if (!has_shared_bases || (!is_all_tensor && !is_all_nontensor)) {
53+
CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_good_build));
54+
if (!is_good_build) {
10255
CeedOperator op_fallback;
10356

104-
CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator due to unsupported bases");
57+
CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator due to code generation issue");
10558
CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
10659
CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request));
10760
return CEED_ERROR_SUCCESS;
@@ -113,11 +66,9 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
11366
CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
11467
CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
11568
CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
69+
CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
11670
CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
11771

118-
// Creation of the operator
119-
CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op));
120-
12172
// Input vectors
12273
for (CeedInt i = 0; i < num_input_fields; i++) {
12374
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
@@ -219,17 +170,20 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
219170
CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
220171
CeedInt sharedMem = block_sizes[2] * thread_1d * sizeof(CeedScalar);
221172

222-
CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, opargs));
173+
CeedCallBackend(
174+
CeedTryRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, &is_good_run, opargs));
223175
} else if (dim == 2) {
224176
CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
225177
CeedInt sharedMem = block_sizes[2] * thread_1d * thread_1d * sizeof(CeedScalar);
226178

227-
CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, opargs));
179+
CeedCallBackend(
180+
CeedTryRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, &is_good_run, opargs));
228181
} else if (dim == 3) {
229182
CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
230183
CeedInt sharedMem = block_sizes[2] * thread_1d * thread_1d * sizeof(CeedScalar);
231184

232-
CeedCallBackend(CeedRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, opargs));
185+
CeedCallBackend(
186+
CeedTryRunKernelDimShared_Hip(ceed, data->op, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, &is_good_run, opargs));
233187
}
234188

235189
// Restore input arrays
@@ -280,8 +234,21 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
280234

281235
// Restore context data
282236
CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c));
237+
238+
// Cleanup
283239
CeedCallBackend(CeedDestroy(&ceed));
284240
CeedCallBackend(CeedQFunctionDestroy(&qf));
241+
242+
// Fallback if run was bad (out of resources)
243+
if (!is_good_run) {
244+
CeedOperator op_fallback;
245+
246+
data->use_fallback = true;
247+
CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator due to kernel execution issue");
248+
CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
249+
CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request));
250+
return CEED_ERROR_SUCCESS;
251+
}
285252
return CEED_ERROR_SUCCESS;
286253
}
287254

backends/hip-gen/ceed-hip-gen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <hip/hip_runtime.h>
1313

1414
typedef struct {
15+
bool use_fallback;
1516
CeedInt dim;
1617
CeedInt Q_1d;
1718
CeedInt max_P_1d;

0 commit comments

Comments
 (0)