@@ -35,7 +35,7 @@ static int CeedOperatorDestroy_Hip_gen(CeedOperator op) {
3535// Apply and add to output
3636//------------------------------------------------------------------------------
3737static int CeedOperatorApplyAdd_Hip_gen (CeedOperator op , CeedVector input_vec , CeedVector output_vec , CeedRequest * request ) {
38- bool is_at_points , is_tensor ;
38+ bool is_at_points , is_tensor , is_good_run = true ;
3939 Ceed ceed ;
4040 CeedInt num_elem , num_input_fields , num_output_fields ;
4141 CeedEvalMode eval_mode ;
@@ -46,62 +46,15 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
4646 CeedOperatorField * op_input_fields , * op_output_fields ;
4747 CeedOperator_Hip_gen * data ;
4848
49- // Check for shared bases
50- CeedCallBackend (CeedOperatorGetFields (op , & num_input_fields , & op_input_fields , & num_output_fields , & op_output_fields ));
49+ // Creation of the operator
5150 {
52- bool has_shared_bases = true, is_all_tensor = true, is_all_nontensor = true;
53-
54- for (CeedInt i = 0 ; i < num_input_fields ; i ++ ) {
55- CeedBasis basis ;
56-
57- CeedCallBackend (CeedOperatorFieldGetBasis (op_input_fields [i ], & basis ));
58- if (basis != CEED_BASIS_NONE ) {
59- bool is_tensor = true;
60- const char * resource ;
61- char * resource_root ;
62- Ceed basis_ceed ;
63-
64- CeedCallBackend (CeedBasisIsTensor (basis , & is_tensor ));
65- is_all_tensor &= is_tensor ;
66- is_all_nontensor &= !is_tensor ;
67- CeedCallBackend (CeedBasisGetCeed (basis , & basis_ceed ));
68- CeedCallBackend (CeedGetResource (basis_ceed , & resource ));
69- CeedCallBackend (CeedGetResourceRoot (basis_ceed , resource , ":" , & resource_root ));
70- has_shared_bases &= !strcmp (resource_root , "/gpu/hip/shared" );
71- CeedCallBackend (CeedFree (& resource_root ));
72- CeedCallBackend (CeedDestroy (& basis_ceed ));
73- }
74- CeedCallBackend (CeedBasisDestroy (& basis ));
75- }
51+ bool is_good_build = false;
7652
77- for (CeedInt i = 0 ; i < num_output_fields ; i ++ ) {
78- CeedBasis basis ;
79-
80- CeedCallBackend (CeedOperatorFieldGetBasis (op_output_fields [i ], & basis ));
81- if (basis != CEED_BASIS_NONE ) {
82- bool is_tensor = true;
83- const char * resource ;
84- char * resource_root ;
85- Ceed basis_ceed ;
86-
87- CeedCallBackend (CeedBasisIsTensor (basis , & is_tensor ));
88- is_all_tensor &= is_tensor ;
89- is_all_nontensor &= !is_tensor ;
90-
91- CeedCallBackend (CeedBasisGetCeed (basis , & basis_ceed ));
92- CeedCallBackend (CeedGetResource (basis_ceed , & resource ));
93- CeedCallBackend (CeedGetResourceRoot (basis_ceed , resource , ":" , & resource_root ));
94- has_shared_bases &= !strcmp (resource_root , "/gpu/hip/shared" );
95- CeedCallBackend (CeedFree (& resource_root ));
96- CeedCallBackend (CeedDestroy (& basis_ceed ));
97- }
98- CeedCallBackend (CeedBasisDestroy (& basis ));
99- }
100- // -- Fallback to ref if not all bases are shared
101- if (!has_shared_bases || (!is_all_tensor && !is_all_nontensor )) {
53+ CeedCallBackend (CeedOperatorBuildKernel_Hip_gen (op , & is_good_build ));
54+ if (!is_good_build ) {
10255 CeedOperator op_fallback ;
10356
104- CeedDebug256 (CeedOperatorReturnCeed (op ), CEED_DEBUG_COLOR_SUCCESS , "Falling back to /gpu/hip/ref CeedOperator due to unsupported bases " );
57+ CeedDebug256 (CeedOperatorReturnCeed (op ), CEED_DEBUG_COLOR_SUCCESS , "Falling back to /gpu/hip/ref CeedOperator due to code generation issue " );
10558 CeedCallBackend (CeedOperatorGetFallback (op , & op_fallback ));
10659 CeedCallBackend (CeedOperatorApplyAdd (op_fallback , input_vec , output_vec , request ));
10760 return CEED_ERROR_SUCCESS ;
@@ -113,11 +66,9 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
11366 CeedCallBackend (CeedOperatorGetQFunction (op , & qf ));
11467 CeedCallBackend (CeedQFunctionGetData (qf , & qf_data ));
11568 CeedCallBackend (CeedOperatorGetNumElements (op , & num_elem ));
69+ CeedCallBackend (CeedOperatorGetFields (op , & num_input_fields , & op_input_fields , & num_output_fields , & op_output_fields ));
11670 CeedCallBackend (CeedQFunctionGetFields (qf , NULL , & qf_input_fields , NULL , & qf_output_fields ));
11771
118- // Creation of the operator
119- CeedCallBackend (CeedOperatorBuildKernel_Hip_gen (op ));
120-
12172 // Input vectors
12273 for (CeedInt i = 0 ; i < num_input_fields ; i ++ ) {
12374 CeedCallBackend (CeedQFunctionFieldGetEvalMode (qf_input_fields [i ], & eval_mode ));
@@ -219,17 +170,20 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
219170 CeedInt grid = num_elem / block_sizes [2 ] + ((num_elem / block_sizes [2 ] * block_sizes [2 ] < num_elem ) ? 1 : 0 );
220171 CeedInt sharedMem = block_sizes [2 ] * thread_1d * sizeof (CeedScalar );
221172
222- CeedCallBackend (CeedRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , opargs ));
173+ CeedCallBackend (
174+ CeedTryRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , & is_good_run , opargs ));
223175 } else if (dim == 2 ) {
224176 CeedInt grid = num_elem / block_sizes [2 ] + ((num_elem / block_sizes [2 ] * block_sizes [2 ] < num_elem ) ? 1 : 0 );
225177 CeedInt sharedMem = block_sizes [2 ] * thread_1d * thread_1d * sizeof (CeedScalar );
226178
227- CeedCallBackend (CeedRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , opargs ));
179+ CeedCallBackend (
180+ CeedTryRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , & is_good_run , opargs ));
228181 } else if (dim == 3 ) {
229182 CeedInt grid = num_elem / block_sizes [2 ] + ((num_elem / block_sizes [2 ] * block_sizes [2 ] < num_elem ) ? 1 : 0 );
230183 CeedInt sharedMem = block_sizes [2 ] * thread_1d * thread_1d * sizeof (CeedScalar );
231184
232- CeedCallBackend (CeedRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , opargs ));
185+ CeedCallBackend (
186+ CeedTryRunKernelDimShared_Hip (ceed , data -> op , grid , block_sizes [0 ], block_sizes [1 ], block_sizes [2 ], sharedMem , & is_good_run , opargs ));
233187 }
234188
235189 // Restore input arrays
@@ -280,8 +234,21 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
280234
281235 // Restore context data
282236 CeedCallBackend (CeedQFunctionRestoreInnerContextData (qf , & qf_data -> d_c ));
237+
238+ // Cleanup
283239 CeedCallBackend (CeedDestroy (& ceed ));
284240 CeedCallBackend (CeedQFunctionDestroy (& qf ));
241+
242+ // Fallback if run was bad (out of resources)
243+ if (!is_good_run ) {
244+ CeedOperator op_fallback ;
245+
246+ data -> use_fallback = true;
247+ CeedDebug256 (CeedOperatorReturnCeed (op ), CEED_DEBUG_COLOR_SUCCESS , "Falling back to /gpu/hip/ref CeedOperator due to kernel execution issue" );
248+ CeedCallBackend (CeedOperatorGetFallback (op , & op_fallback ));
249+ CeedCallBackend (CeedOperatorApplyAdd (op_fallback , input_vec , output_vec , request ));
250+ return CEED_ERROR_SUCCESS ;
251+ }
285252 return CEED_ERROR_SUCCESS ;
286253}
287254
0 commit comments