3535 CeedChk_Nvrtc (ceed, ierr_q_); \
3636 } while (0 )
3737
38+ // ------------------------------------------------------------------------------
39+ // Call system command and capture stdout + stderr
40+ // ------------------------------------------------------------------------------
41+ static int CeedCallSystem (Ceed ceed, const char *command, const char *message) {
42+ CeedDebug (ceed, " Running command:\n $ %s\n " , command);
43+ FILE *output_stream = popen ((command + std::string (" 2>&1" )).c_str (), " r" );
44+
45+ CeedCheck (output_stream != nullptr , ceed, CEED_ERROR_BACKEND, " Failed to %s with command: %s" , message, command);
46+
47+ char output[4 * CEED_MAX_RESOURCE_LEN];
48+
49+ while (fgets (output, sizeof (output), output_stream) != nullptr ) {
50+ }
51+ CeedDebug (ceed, " Command output:\n %s\n " , output);
52+
53+ CeedCheck (pclose (output_stream) == 0 , ceed, CEED_ERROR_BACKEND, " Failed to %s with error: %s" , message, output);
54+ return CEED_ERROR_SUCCESS;
55+ }
56+
3857// ------------------------------------------------------------------------------
3958// Compile CUDA kernel
4059// ------------------------------------------------------------------------------
@@ -61,9 +80,9 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
6180 CeedCallBackend (CeedGetIsClang (ceed, &using_clang));
6281
6382 CeedDebug256 (ceed, CEED_DEBUG_COLOR_SUCCESS,
64- using_clang
65- ? " Compiling CUDA with Clang backend (with Rust QFunction support)"
66- : " Compiling CUDA with NVRTC backend (without Rust QFunction support). To use Clang, set the environmental variable GPU_CLANG=1" );
83+ using_clang ? " Compiling CUDA with Clang backend (with Rust QFunction support) "
84+ : " Compiling CUDA with NVRTC backend (without Rust QFunction support). \n To use the Clang backend, set the environment "
85+ " variable GPU_CLANG=1" );
6786
6887 // Get kernel specific options, such as kernel constants
6988 if (num_defines > 0 ) {
@@ -198,12 +217,10 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
198217 CeedCallBackend (CeedFree (&ptx));
199218 return CEED_ERROR_SUCCESS;
200219 } else {
201- const char *full_filename = " temp-jit .cu" ;
220+ const char *full_filename = " temp_kernel_source .cu" ;
202221 FILE *file = fopen (full_filename, " w" );
203- if (!file) {
204- CeedDebug256 (ceed, CEED_DEBUG_COLOR_ERROR, " Failed to create file. Write access is required for cuda-clang\n " );
205- return 1 ;
206- }
222+
223+ CeedCheck (file, ceed, CEED_ERROR_BACKEND, " Failed to create file. Write access is required for cuda-clang\n " );
207224 fputs (code.str ().c_str (), file);
208225 fclose (file);
209226
@@ -226,26 +243,22 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
226243
227244 CeedCallBackend (CeedRestoreRustSourceRoots (ceed, &rust_source_dirs));
228245
229- // Compile with rust
230- int err;
231- std::string cmd;
246+ // Compile Rust crate(s) needed
247+ std::string command;
232248
233249 for (CeedInt i = 0 ; i < num_rust_source_dirs; i++) {
234- cmd = " cargo +nightly build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + " /.cargo/config.toml --manifest-path " +
235- rust_dirs[i] + " /Cargo.toml" ;
236- err = system (cmd.c_str ());
237- CeedCheck (!err, ceed, CEED_ERROR_BACKEND, " Failed to build Rust crates for GPU JiT.\n Failed to build Rust crate %d with command: %s" , i,
238- cmd.c_str ());
250+ command = " cargo +nightly build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + " /.cargo/config.toml --manifest-path " +
251+ rust_dirs[i] + " /Cargo.toml" ;
252+ CeedCallBackend (CeedCallSystem (ceed, command.c_str (), " build Rust crate" ));
239253 }
240254
241- cmd = " clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string (prop.major ) + std::to_string (prop.minor ) +
242- " --cuda-device-only -emit-llvm -S temp-jit.cu -o kern.ll " ;
243- cmd += opts[4 ];
244- err = system (cmd.c_str ());
245- CeedCheck (!err, ceed, CEED_ERROR_BACKEND, " Failed to compile QFunction source to LLVM IR" );
246-
247- cmd = " llvm-link-20 kern.ll --ignore-non-bitcode --internalize --only-needed -S -o kern2.ll " ;
255+ // Compile wrapper kernel
256+ command = " clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string (prop.major ) + std::to_string (prop.minor ) +
257+ " --cuda-device-only -emit-llvm -S temp_kernel_source.cu -o temp_kernel.ll " ;
258+ command += opts[4 ];
259+ CeedCallBackend (CeedCallSystem (ceed, command.c_str (), " JiT kernel source" ));
248260
261+ command = " llvm-link-20 temp_kernel.ll --ignore-non-bitcode --internalize --only-needed -S -o temp_kernel_linked.ll " ;
249262 // Searches for .a files in rust directoy
250263 // Note: this is necessary because rust crate names may not match the folder they are in
251264 for (CeedInt i = 0 ; i < num_rust_source_dirs; i++) {
@@ -260,33 +273,45 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
260273 std::string filename (entry->d_name );
261274
262275 if (filename.size () >= 2 && filename.substr (filename.size () - 2 ) == " .a" ) {
263- cmd += dir + " /" + filename + " " ;
276+ command += dir + " /" + filename + " " ;
264277 }
265278 }
266279 closedir (dp);
267- // Todo : when libceed switches to c++17, switch to std::filesystem for the loop above
280+ // TODO : when libCEED switches to c++17, switch to std::filesystem for the loop above
268281 }
269282
270- CeedDebug (ceed, " Running llvm-link: %s\n " , cmd.c_str ());
271- err = system (cmd.c_str ());
272- CeedCheck (!err, ceed, CEED_ERROR_BACKEND, " Failed to link C and Rust sources with LLVM\n llvm-link command: %s" , cmd.c_str ());
283+ // Link, optimize, and compile final CUDA kernel
284+ CeedCallBackend (CeedCallSystem (ceed, command.c_str (), " link C and Rust source" ));
285+ CeedCallBackend (
286+ CeedCallSystem (ceed, " opt --passes internalize,inline temp_kernel_linked.ll -o temp_kernel_opt.bc" , " optimize linked C and Rust source" ));
287+ CeedCallBackend (CeedCallSystem (
288+ ceed,
289+ (" llc -O3 -mcpu=sm_" + std::to_string (prop.major ) + std::to_string (prop.minor ) + " temp_kernel_opt.bc -o temp_kernel_final.ptx" ).c_str (),
290+ " compile final CUDA kernel" ));
273291
274- err = system (" opt --passes internalize,inline kern2.ll -o kern3.bc" );
275- CeedCheck (!err, ceed, CEED_ERROR_BACKEND, " Failed to Optimize QFunction LLVM IR" );
276-
277- err = system ((" llc -O3 -mcpu=sm_" + std::to_string (prop.major ) + std::to_string (prop.minor ) + " kern3.bc -o kern.ptx" ).c_str ());
278- CeedCheck (!err, ceed, CEED_ERROR_BACKEND, " Failed to compile QFunction LLVM IR)\n " );
279-
280- ifstream ptxfile (" kern.ptx" );
292+ ifstream ptxfile (" temp_kernel_final.ptx" );
281293 ostringstream sstr;
282294
283295 sstr << ptxfile.rdbuf ();
284296
285297 auto ptx_data = sstr.str ();
286298 ptx_size = ptx_data.length ();
287299
288- CeedCallCuda (ceed, cuModuleLoadData (module , ptx_data.c_str ()));
289- CeedCallBackend (CeedFree (&ptx_data));
300+ int result = cuModuleLoadData (module , ptx_data.c_str ());
301+
302+ *is_compile_good = result == 0 ;
303+ if (!*is_compile_good) {
304+ if (throw_error) {
305+ return CeedError (ceed, CEED_ERROR_BACKEND, " Failed to load module data" );
306+ } else {
307+ // LCOV_EXCL_START
308+ CeedDebug256 (ceed, CEED_DEBUG_COLOR_ERROR, " ---------- COMPILE ERROR DETECTED ----------\n " );
309+ CeedDebug (ceed, " Error: Failed to load module data" );
310+ CeedDebug256 (ceed, CEED_DEBUG_COLOR_ERROR, " ---------- BACKEND MAY FALLBACK ----------\n " );
311+ return CEED_ERROR_SUCCESS;
312+ // LCOV_EXCL_STOP
313+ }
314+ }
290315 }
291316 return CEED_ERROR_SUCCESS;
292317}
0 commit comments