Skip to content
Merged
Show file tree
Hide file tree
Changes from 58 commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
6023bab
Switch compiler to clang (not portable)
Jun 25, 2025
857b77a
test add_num
Jun 26, 2025
909af6b
compile with llvm tools
Jun 27, 2025
ad100b2
not working linking
Jul 1, 2025
ba3b719
not fixed
Jul 1, 2025
b71448f
Update ex1-volume.h
Jul 1, 2025
d47fb3d
update
Jul 1, 2025
24b03c2
remove global path
Jul 1, 2025
b089501
changes
Jul 7, 2025
111c301
changes 2
Jul 7, 2025
86b2eb5
crate works
Jul 9, 2025
0b4c4bf
basic gpu rust compilation
Jul 14, 2025
0c427df
still not working
Jul 15, 2025
db640a8
rust source roots basic support
Jul 15, 2025
142f1c0
nvrtc/clang selection
Jul 16, 2025
151965a
cleanup
Jul 16, 2025
789a422
update example (not working)
Jul 18, 2025
8b9b002
add rust example
Jul 21, 2025
40f43ad
Merge branch 'main' into allen-rust-jit
SirAlienTheGreat Jul 21, 2025
3230820
Merge pull request #1 from SirAlienTheGreat/allen-rust-jit
SirAlienTheGreat Jul 21, 2025
a6c33a2
fix merge issue
Jul 21, 2025
321e426
delete temp files
Jul 22, 2025
19ab554
cleanup
Jul 22, 2025
c4fac45
rust qfunc 2d array (needs doc)
Jul 22, 2025
07beb34
cleanup
Jul 22, 2025
10b8f19
more cleanup
Jul 23, 2025
824d903
downgrade back to c++11
Jul 23, 2025
5a32ce8
format
Jul 23, 2025
9a37183
final draft cleanup
Jul 23, 2025
bc6eb8f
formatting + CUDA_CLANG -> GPU_CLANG
Jul 23, 2025
e7a4165
Update cuda CEED_QFUNCTION_RUST
SirAlienTheGreat Jul 23, 2025
2d49fec
fix python
SirAlienTheGreat Jul 25, 2025
0626555
fix python and format
SirAlienTheGreat Jul 25, 2025
27426f6
format fr
SirAlienTheGreat Jul 25, 2025
012797b
update comment
SirAlienTheGreat Jul 25, 2025
5a808fe
fix python fr
SirAlienTheGreat Jul 25, 2025
1e54ea9
Apply error suggestions from code review
SirAlienTheGreat Jul 28, 2025
7d05196
update errors to libceed format
Jul 28, 2025
952d903
Apply suggestions from code review
SirAlienTheGreat Jul 28, 2025
f89960a
add optimization flag
Jul 28, 2025
b9af985
remove line breaks
SirAlienTheGreat Jul 28, 2025
74794ea
Apply suggestions from code review
SirAlienTheGreat Jul 28, 2025
a4de9fb
avoid python in macro better
Jul 28, 2025
df27cf8
Merge branch 'main' of github.com:SirAlienTheGreat/libCEED
Jul 28, 2025
165f8cc
add rust example
Jul 29, 2025
2ceed77
format
Jul 29, 2025
e8cdf68
Apply suggestions from code review
SirAlienTheGreat Jul 29, 2025
195d44f
move rust example to own directory
Jul 29, 2025
3af1088
Merge branch 'main' of github.com:SirAlienTheGreat/libCEED
Jul 29, 2025
a9d0164
Simplify python exclusion logic
SirAlienTheGreat Jul 29, 2025
d51415b
re-fix python
Jul 29, 2025
d8a2093
Update python/build_ceed_cffi.py
SirAlienTheGreat Jul 29, 2025
96e762f
change names and simplify makefile
Jul 30, 2025
4ea74b2
Revert "change names and simplify makefile"
Jul 30, 2025
91a4609
Apply Jeremy's diff
Jul 30, 2025
87760ac
Simplify CeedCallSystem
Jul 30, 2025
ae4cbba
use rust-install llvm tools
Jul 30, 2025
2e43fc9
add gitignores
Jul 30, 2025
46392e4
update paths
Jul 31, 2025
b655e22
example absolute path
Jul 31, 2025
d147647
fix comments
Jul 31, 2025
da3ab3c
apply partial diff
SirAlienTheGreat Jul 31, 2025
8c8bef2
add newline
Jul 31, 2025
af83de8
add makefile
Jul 31, 2025
cd5d2b9
Update examples/rust-qfunctions/Makefile
SirAlienTheGreat Jul 31, 2025
b2d0003
update makefile
Jul 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
[workspace]
members = [
"rust/libceed",
"rust/libceed-sys",
"examples/rust/ex1-volume",
"examples/rust/ex1-volume-vector",
"examples/rust/ex2-surface",
"examples/rust/ex2-surface-vector",
"examples/rust/ex3-volume",
"examples/rust/ex3-volume-vector",
"examples/rust/mesh",
"rust/libceed",
"rust/libceed-sys",
"examples/rust/ex1-volume",
"examples/rust/ex1-volume-vector",
"examples/rust/ex2-surface",
"examples/rust/ex2-surface-vector",
"examples/rust/ex3-volume",
"examples/rust/ex3-volume-vector",
"examples/rust/mesh",
]
exclude = ["examples/rust-qfunctions/ex1-volume-rs"]
236 changes: 190 additions & 46 deletions backends/cuda/ceed-cuda-compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
#include <ceed/backend.h>
#include <ceed/jit-tools.h>
#include <cuda_runtime.h>
#include <dirent.h>
#include <nvrtc.h>
#include <stdarg.h>
#include <string.h>
#include <sys/types.h>

#include <fstream>
#include <iostream>
#include <sstream>

#include "ceed-cuda-common.h"
Expand All @@ -31,9 +35,32 @@
CeedChk_Nvrtc(ceed, ierr_q_); \
} while (0)

//------------------------------------------------------------------------------
// Call system command and capture stdout + stderr
//------------------------------------------------------------------------------
static int CeedCallSystem(Ceed ceed, const char *command, const char *message) {
CeedDebug(ceed, "Running command:\n$ %s\n", command);
FILE *output_stream = popen((command + std::string(" 2>&1")).c_str(), "r");

CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command);

char output[4 * CEED_MAX_RESOURCE_LEN];

while (fgets(output, sizeof(output), output_stream) != nullptr) {
}
CeedDebug(ceed, "Command output:\n%s\n", output);

CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with error: %s", message, output);
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compile CUDA kernel
//------------------------------------------------------------------------------
using std::ifstream;
using std::ofstream;
using std::ostringstream;

static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_error, bool *is_compile_good, CUmodule *module,
const CeedInt num_defines, va_list args) {
size_t ptx_size;
Expand All @@ -48,6 +75,14 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
cudaFree(0); // Make sure a Context exists for nvrtc

std::ostringstream code;
bool using_clang;

CeedCallBackend(CeedGetIsClang(ceed, &using_clang));

CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS,
using_clang ? "Compiling CUDA with Clang backend (with Rust QFunction support)"
: "Compiling CUDA with NVRTC backend (without Rust QFunction support).\nTo use the Clang backend, set the environment "
"variable GPU_CLANG=1");

// Get kernel specific options, such as kernel constants
if (num_defines > 0) {
Expand Down Expand Up @@ -116,66 +151,175 @@ static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_
code << source;

// Create Program
CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));

// Compile kernel
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n");
if (CeedDebugFlag(ceed)) {
// LCOV_EXCL_START
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n");
for (CeedInt i = 0; i < num_opts + num_jit_source_dirs + num_jit_defines; i++) {
CeedDebug(ceed, "Option %d: %s", i, opts[i]);
}
CeedDebug(ceed, "");
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n");
// LCOV_EXCL_STOP
}
nvrtcResult result = nvrtcCompileProgram(prog, num_opts + num_jit_source_dirs + num_jit_defines, opts);

for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
CeedCallBackend(CeedFree(&opts[num_opts + i]));
}
for (CeedInt i = 0; i < num_jit_defines; i++) {
CeedCallBackend(CeedFree(&opts[num_opts + num_jit_source_dirs + i]));
}
CeedCallBackend(CeedFree(&opts));
*is_compile_good = result == NVRTC_SUCCESS;
if (!*is_compile_good) {
char *log;
size_t log_size;

CeedCallNvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size));
CeedCallBackend(CeedMalloc(log_size, &log));
CeedCallNvrtc(ceed, nvrtcGetProgramLog(prog, log));
if (throw_error) {
return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", nvrtcGetErrorString(result), log);
} else {
if (!using_clang) {
CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));

if (CeedDebugFlag(ceed)) {
// LCOV_EXCL_START
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
CeedDebug(ceed, "Error: %s\nCompile log:\n%s\n", nvrtcGetErrorString(result), log);
CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n");
CeedCallBackend(CeedFree(&log));
CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
return CEED_ERROR_SUCCESS;
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n");
for (CeedInt i = 0; i < num_opts + num_jit_source_dirs + num_jit_defines; i++) {
CeedDebug(ceed, "Option %d: %s", i, opts[i]);
}
CeedDebug(ceed, "");
CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n");
// LCOV_EXCL_STOP
}
}

nvrtcResult result = nvrtcCompileProgram(prog, num_opts + num_jit_source_dirs + num_jit_defines, opts);

for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
CeedCallBackend(CeedFree(&opts[num_opts + i]));
}
for (CeedInt i = 0; i < num_jit_defines; i++) {
CeedCallBackend(CeedFree(&opts[num_opts + num_jit_source_dirs + i]));
}
CeedCallBackend(CeedFree(&opts));
*is_compile_good = result == NVRTC_SUCCESS;
if (!*is_compile_good) {
char *log;
size_t log_size;

CeedCallNvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size));
CeedCallBackend(CeedMalloc(log_size, &log));
CeedCallNvrtc(ceed, nvrtcGetProgramLog(prog, log));
if (throw_error) {
return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", nvrtcGetErrorString(result), log);
} else {
// LCOV_EXCL_START
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
CeedDebug(ceed, "Error: %s\nCompile log:\n%s\n", nvrtcGetErrorString(result), log);
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- BACKEND MAY FALLBACK ----------\n");
CeedCallBackend(CeedFree(&log));
CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
return CEED_ERROR_SUCCESS;
// LCOV_EXCL_STOP
}
}

#if CUDA_VERSION >= 11010
CeedCallNvrtc(ceed, nvrtcGetCUBINSize(prog, &ptx_size));
CeedCallBackend(CeedMalloc(ptx_size, &ptx));
CeedCallNvrtc(ceed, nvrtcGetCUBIN(prog, ptx));
CeedCallNvrtc(ceed, nvrtcGetCUBINSize(prog, &ptx_size));
CeedCallBackend(CeedMalloc(ptx_size, &ptx));
CeedCallNvrtc(ceed, nvrtcGetCUBIN(prog, ptx));
#else
CeedCallNvrtc(ceed, nvrtcGetPTXSize(prog, &ptx_size));
CeedCallBackend(CeedMalloc(ptx_size, &ptx));
CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx));
CeedCallNvrtc(ceed, nvrtcGetPTXSize(prog, &ptx_size));
CeedCallBackend(CeedMalloc(ptx_size, &ptx));
CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx));
#endif
CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));

CeedCallCuda(ceed, cuModuleLoadData(module, ptx));
CeedCallBackend(CeedFree(&ptx));
return CEED_ERROR_SUCCESS;
} else {
const char *full_filename = "temp_kernel_source.cu";
FILE *file = fopen(full_filename, "w");

CeedCallCuda(ceed, cuModuleLoadData(module, ptx));
CeedCallBackend(CeedFree(&ptx));
CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang\n");
fputs(code.str().c_str(), file);
fclose(file);

// Get rust crate directories

const char **rust_source_dirs = nullptr;
int num_rust_source_dirs = 0;

CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs));

std::string rust_dirs[10];

if (num_rust_source_dirs > 0) {
CeedDebug(ceed, "There are %d source dirs, including %s\n", num_rust_source_dirs, rust_source_dirs[0]);
}

for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
rust_dirs[i] = std::string(rust_source_dirs[i]);
}

CeedCallBackend(CeedRestoreRustSourceRoots(ceed, &rust_source_dirs));

// Compile Rust crate(s) needed
std::string command;

for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
command = "cargo +nightly build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + "/.cargo/config.toml --manifest-path " +
rust_dirs[i] + "/Cargo.toml";
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "build Rust crate"));
}

// Compile wrapper kernel
command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) +
" --cuda-device-only -emit-llvm -S temp_kernel_source.cu -o temp_kernel.ll ";
command += opts[4];
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "JiT kernel source"));

// the $(find $(rustc +nightly --print sysroot) -name llvm-link) finds the rust-installed llvm-link tool and runs it
command =
"$(find $(rustc +nightly --print sysroot) -name llvm-link) temp_kernel.ll --ignore-non-bitcode --internalize --only-needed -S -o "
"temp_kernel_linked.ll ";

// Searches for .a files in rust directoy
// Note: this is necessary because rust crate names may not match the folder they are in
for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release";
DIR *dp = opendir(dir.c_str());

CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str());
struct dirent *entry;

// finds files ending in .a
while ((entry = readdir(dp)) != nullptr) {
std::string filename(entry->d_name);

if (filename.size() >= 2 && filename.substr(filename.size() - 2) == ".a") {
command += dir + "/" + filename + " ";
}
}
closedir(dp);
// TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above
}

// Link, optimize, and compile final CUDA kernel
// note that $(find $(rustc +nightly --print sysroot) -name [llvm tool]) is used to find the rust-installed llvm tool
CeedCallBackend(CeedCallSystem(ceed, command.c_str(), "link C and Rust source"));
CeedCallBackend(CeedCallSystem(
ceed, "$(find $(rustc +nightly --print sysroot) -name opt) --passes internalize,inline temp_kernel_linked.ll -o temp_kernel_opt.bc",
"optimize linked C and Rust source"));
CeedCallBackend(CeedCallSystem(ceed,
("$(find $(rustc +nightly --print sysroot) -name llc) -O3 -mcpu=sm_" + std::to_string(prop.major) +
std::to_string(prop.minor) + " temp_kernel_opt.bc -o temp_kernel_final.ptx")
.c_str(),
"compile final CUDA kernel"));

ifstream ptxfile("temp_kernel_final.ptx");
ostringstream sstr;

sstr << ptxfile.rdbuf();

auto ptx_data = sstr.str();
ptx_size = ptx_data.length();

int result = cuModuleLoadData(module, ptx_data.c_str());

*is_compile_good = result == 0;
if (!*is_compile_good) {
if (throw_error) {
return CeedError(ceed, CEED_ERROR_BACKEND, "Failed to load module data");
} else {
// LCOV_EXCL_START
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
CeedDebug(ceed, "Error: Failed to load module data");
CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- BACKEND MAY FALLBACK ----------\n");
return CEED_ERROR_SUCCESS;
// LCOV_EXCL_STOP
}
}
}
return CEED_ERROR_SUCCESS;
}

Expand Down
9 changes: 7 additions & 2 deletions examples/ceed/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ OPT ?= -O -g

# Ceed directory
CEED_DIR ?= ../..
CEED_FLAGS ?= -I$(CEED_DIR)/include -std=c11 $(OPT)
CEED_LIBS ?= -Wl,-rpath,$(abspath $(CEED_DIR)/lib) -L$(CEED_DIR)/lib -lceed -lm
CEED_FLAGS ?= -I$(CEED_DIR)/include -std=c11 $(OPT)
CEED_LIBS ?= -Wl,-rpath,$(abspath $(CEED_DIR)/lib) -L$(CEED_DIR)/lib -lceed -L$(CEED_DIR)/examples/ceed -lm

EXAMPLES.c = $(wildcard ex*.c)
EXAMPLES = $(EXAMPLES.c:%.c=%)
Expand All @@ -24,6 +24,11 @@ all: $(EXAMPLES)
# Remove built-in rules
%: %.c

# Special build rule for example 4 (rust)
ex4-volume-rust: ex4-volume-rust.c
cargo +nightly build --release --manifest-path ex4-volume-rs/Cargo.toml --config ex4-volume-rs/.cargo/config.toml
$(LINK.c) $(CEED_FLAGS) $(CEED_LDFLAGS) $< -o $@ $(CEED_LIBS) -L$(CEED_DIR)/examples/ceed/ex4-volume-rs/target/release -lex4_volume_rs

# Rules for building the examples
%: %.c
$(LINK.c) $(CEED_FLAGS) $(CEED_LDFLAGS) $< -o $@ $(CEED_LIBS)
Expand Down
1 change: 1 addition & 0 deletions examples/rust-qfunctions/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
temp_*
6 changes: 6 additions & 0 deletions examples/rust-qfunctions/ex1-volume-rs/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[target.nvptx64-nvidia-cuda]
rustflags = [
"-C", "linker-plugin-lto",
]
[unstable]
build-std = ["panic_abort","core", "alloc"]
3 changes: 3 additions & 0 deletions examples/rust-qfunctions/ex1-volume-rs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
target
registry
Cargo.lock
17 changes: 17 additions & 0 deletions examples/rust-qfunctions/ex1-volume-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "ex1-volume-rs"
version = "0.1.0"
edition = "2021"

[profile.dev]
panic = "abort"

[profile.release]
panic = "abort"

# Compiles the crate as a lib (for GPU) and staticlib (for CPU)
[lib]
crate-type = ["staticlib"]

[dependencies]
ndarray = {version = "0.16.1", default-features = false}
2 changes: 2 additions & 0 deletions examples/rust-qfunctions/ex1-volume-rs/rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"
Loading