Skip to content

Commit 91a4609

Browse files
author
Allen MacFarland
committed
Apply Jeremy's diff
1 parent 4ea74b2 commit 91a4609

File tree

1 file changed

+62
-37
lines changed

1 file changed

+62
-37
lines changed

backends/cuda/ceed-cuda-compile.cpp

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,25 @@
3535
CeedChk_Nvrtc(ceed, ierr_q_); \
3636
} while (0)
3737

38+
//------------------------------------------------------------------------------
39+
// Call system command and capture stdout + stderr
40+
//------------------------------------------------------------------------------
41+
static int CeedCallSystem(Ceed ceed, const char *command, const char *message) {
42+
CeedDebug(ceed, "Running command:\n$ %s\n", command);
43+
FILE *output_stream = popen((command + std::string(" 2>&1")).c_str(), "r");
44+
45+
CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command);
46+
47+
char output[4 * CEED_MAX_RESOURCE_LEN];
48+
49+
while (fgets(output, sizeof(output), output_stream) != nullptr) {
50+
}
51+
CeedDebug(ceed, "Command output:\n%s\n", output);
52+
53+
CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with error: %s", message, output);
54+
return CEED_ERROR_SUCCESS;
55+
}
56+
3857
//------------------------------------------------------------------------------
3958
// Compile CUDA kernel
4059
//------------------------------------------------------------------------------
@@ -61,9 +80,9 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
6180
CeedCallBackend(CeedGetIsClang(ceed, &using_clang));
6281

6382
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS,
64-
using_clang
65-
? "Compiling CUDA with Clang backend (with Rust QFunction support)"
66-
: "Compiling CUDA with NVRTC backend (without Rust QFunction support). To use Clang, set the environmental variable GPU_CLANG=1");
83+
using_clang ? "Compiling CUDA with Clang backend (with Rust QFunction support)"
84+
: "Compiling CUDA with NVRTC backend (without Rust QFunction support).\nTo use the Clang backend, set the environment "
85+
"variable GPU_CLANG=1");
6786

6887
// Get kernel specific options, such as kernel constants
6988
if (num_defines > 0) {
@@ -198,12 +217,10 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
198217
CeedCallBackend(CeedFree(&ptx));
199218
return CEED_ERROR_SUCCESS;
200219
} else {
201-
const char *full_filename = "temp-jit.cu";
220+
const char *full_filename = "temp_kernel_source.cu";
202221
FILE *file = fopen(full_filename, "w");
203-
if (!file) {
204-
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "Failed to create file. Write access is required for cuda-clang\n");
205-
return 1;
206-
}
222+
223+
CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang\n");
207224
fputs(code.str().c_str(), file);
208225
fclose(file);
209226

@@ -226,26 +243,22 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
226243

227244
CeedCallBackend(CeedRestoreRustSourceRoots(ceed, &rust_source_dirs));
228245

229-
// Compile with rust
230-
int err;
231-
std::string cmd;
246+
// Compile Rust crate(s) needed
247+
std::string command;
232248

233249
for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
234-
cmd = "cargo +nightly build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + "/.cargo/config.toml --manifest-path " +
235-
rust_dirs[i] + "/Cargo.toml";
236-
err = system(cmd.c_str());
237-
CeedCheck(!err, ceed, CEED_ERROR_BACKEND, "Failed to build Rust crates for GPU JiT.\nFailed to build Rust crate %d with command: %s", i,
238-
cmd.c_str());
250+
command = "cargo +nightly build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + "/.cargo/config.toml --manifest-path " +
251+
rust_dirs[i] + "/Cargo.toml";
252+
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "build Rust crate"));
239253
}
240254

241-
cmd = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) +
242-
" --cuda-device-only -emit-llvm -S temp-jit.cu -o kern.ll ";
243-
cmd += opts[4];
244-
err = system(cmd.c_str());
245-
CeedCheck(!err, ceed, CEED_ERROR_BACKEND, "Failed to compile QFunction source to LLVM IR");
246-
247-
cmd = "llvm-link-20 kern.ll --ignore-non-bitcode --internalize --only-needed -S -o kern2.ll ";
255+
// Compile wrapper kernel
256+
command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) +
257+
" --cuda-device-only -emit-llvm -S temp_kernel_source.cu -o temp_kernel.ll ";
258+
command += opts[4];
259+
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "JiT kernel source"));
248260

261+
command = "llvm-link-20 temp_kernel.ll --ignore-non-bitcode --internalize --only-needed -S -o temp_kernel_linked.ll ";
249262
// Searches for .a files in rust directoy
250263
// Note: this is necessary because rust crate names may not match the folder they are in
251264
for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
@@ -260,33 +273,45 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
260273
std::string filename(entry->d_name);
261274

262275
if (filename.size() >= 2 && filename.substr(filename.size() - 2) == ".a") {
263-
cmd += dir + "/" + filename + " ";
276+
command += dir + "/" + filename + " ";
264277
}
265278
}
266279
closedir(dp);
267-
// Todo: when libceed switches to c++17, switch to std::filesystem for the loop above
280+
// TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above
268281
}
269282

270-
CeedDebug(ceed, "Running llvm-link: %s\n", cmd.c_str());
271-
err = system(cmd.c_str());
272-
CeedCheck(!err, ceed, CEED_ERROR_BACKEND, "Failed to link C and Rust sources with LLVM\nllvm-link command: %s", cmd.c_str());
283+
// Link, optimize, and compile final CUDA kernel
284+
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "link C and Rust source"));
285+
CeedCallBackend(
286+
CeedCallSystem(ceed, "opt --passes internalize,inline temp_kernel_linked.ll -o temp_kernel_opt.bc", "optimize linked C and Rust source"));
287+
CeedCallBackend(CeedCallSystem(
288+
ceed,
289+
("llc -O3 -mcpu=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) + " temp_kernel_opt.bc -o temp_kernel_final.ptx").c_str(),
290+
"compile final CUDA kernel"));
273291

274-
err = system("opt --passes internalize,inline kern2.ll -o kern3.bc");
275-
CeedCheck(!err, ceed, CEED_ERROR_BACKEND, "Failed to Optimize QFunction LLVM IR");
276-
277-
err = system(("llc -O3 -mcpu=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) + " kern3.bc -o kern.ptx").c_str());
278-
CeedCheck(!err, ceed, CEED_ERROR_BACKEND, "Failed to compile QFunction LLVM IR)\n");
279-
280-
ifstream ptxfile("kern.ptx");
292+
ifstream ptxfile("temp_kernel_final.ptx");
281293
ostringstream sstr;
282294

283295
sstr << ptxfile.rdbuf();
284296

285297
auto ptx_data = sstr.str();
286298
ptx_size = ptx_data.length();
287299

288-
CeedCallCuda(ceed, cuModuleLoadData(module, ptx_data.c_str()));
289-
CeedCallBackend(CeedFree(&ptx_data));
300+
int result = cuModuleLoadData(module, ptx_data.c_str());
301+
302+
*is_compile_good = result == 0;
303+
if (!*is_compile_good) {
304+
if (throw_error) {
305+
return CeedError(ceed, CEED_ERROR_BACKEND, "Failed to load module data");
306+
} else {
307+
// LCOV_EXCL_START
308+
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
309+
CeedDebug(ceed, "Error: Failed to load module data");
310+
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- BACKEND MAY FALLBACK ----------\n");
311+
return CEED_ERROR_SUCCESS;
312+
// LCOV_EXCL_STOP
313+
}
314+
}
290315
}
291316
return CEED_ERROR_SUCCESS;
292317
}

0 commit comments

Comments
 (0)