Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ object = { version = "0.37", default-features = false, features = [
compiletest_rs = { version = "0.11.0" }
regex = { version = "1.11.1", default-features = false }
rustc-build-sysroot = { workspace = true }
tempfile = { version = "3.13" }
which = { version = "8.0.0", default-features = false, features = ["real-sys", "regex"] }

[lints]
Expand Down
42 changes: 41 additions & 1 deletion src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ pub enum LinkerError {
#[error("failure linking module {0}")]
LinkModuleError(PathBuf),

/// Parsing an IR module failed.
#[error("failure parsing IR module `{0}`: {1}")]
IRParseError(PathBuf, String),

/// Linking a module included in an archive failed.
#[error("failure linking module {1} from {0}")]
LinkArchiveModuleError(PathBuf, PathBuf),
Expand Down Expand Up @@ -168,6 +172,8 @@ enum InputType {
MachO,
/// Archive file. (.a)
Archive,
/// IR file (.ll)
Ir,
}

impl std::fmt::Display for InputType {
Expand All @@ -180,6 +186,7 @@ impl std::fmt::Display for InputType {
Self::Elf => "elf",
Self::MachO => "Mach-O",
Self::Archive => "archive",
Self::Ir => "ir",
}
)
}
Expand Down Expand Up @@ -538,6 +545,21 @@ fn link_data<'ctx>(
data: &[u8],
in_type: InputType,
) -> Result<(), LinkerError> {
if in_type == InputType::Ir {
let mut ir_data = data.to_vec();
ir_data.push(0);
let c_str = CStr::from_bytes_with_nul(&ir_data).expect("We just added the null terminator");

return llvm::link_ir_buffer(context, module, c_str)
.map_err(|e| LinkerError::IRParseError(path.to_owned(), e))
.and_then(|linked| {
if linked {
Ok(())
} else {
Err(LinkerError::LinkModuleError(path.to_owned()))
}
});
}
let bitcode = match in_type {
InputType::Bitcode => Cow::Borrowed(data),
InputType::Elf => match llvm::find_embedded_bitcode(context, data) {
Expand All @@ -551,6 +573,7 @@ fn link_data<'ctx>(
InputType::MachO => return Err(LinkerError::InvalidInputType(path.to_owned())),
// this can't really happen
InputType::Archive => panic!("nested archives not supported duh"),
InputType::Ir => unreachable!(),
};

if !llvm::link_bitcode_buffer(context, module, &bitcode) {
Expand Down Expand Up @@ -825,21 +848,38 @@ fn detect_input_type(data: &[u8]) -> Option<InputType> {
if data.len() < 8 {
return None;
}

match &data[..4] {
b"\x42\x43\xC0\xDE" | b"\xDE\xC0\x17\x0b" => Some(InputType::Bitcode),
b"\x7FELF" => Some(InputType::Elf),
b"\xcf\xfa\xed\xfe" => Some(InputType::MachO),
_ => {
if &data[..8] == b"!<arch>\x0A" {
Some(InputType::Archive)
} else if is_llvm_ir(data) {
Some(InputType::Ir)
} else {
None
}
}
}
}

fn is_llvm_ir(data: &[u8]) -> bool {
const PREFIXES: &[&[u8]] = &[
b"; ModuleID",
b"source_filename",
b"target datalayout",
b"target triple",
b"define ",
b"declare ",
b"!llvm",
];

let trimmed = data.trim_ascii_start();

PREFIXES.iter().any(|p| trimmed.starts_with(p))
}

pub struct LinkerOutput {
inner: MemoryBuffer,
}
Expand Down
50 changes: 50 additions & 0 deletions src/llvm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use llvm_sys::{
error::{
LLVMDisposeErrorMessage, LLVMGetErrorMessage, LLVMGetErrorTypeId, LLVMGetStringErrorTypeId,
},
ir_reader::LLVMParseIRInContext,
linker::LLVMLinkModules2,
object::{
LLVMCreateBinary, LLVMDisposeBinary, LLVMDisposeSectionIterator, LLVMGetSectionContents,
Expand Down Expand Up @@ -141,6 +142,55 @@ pub(crate) fn link_bitcode_buffer<'ctx>(
linked
}

/// Links an LLVM IR buffer into the given module.
///
/// The buffer must be null-terminated (hence `CStr`), because LLVM's IR parser
/// requires `RequiresNullTerminator=true` when creating the memory buffer.
/// See `getMemBuffer` with default `RequiresNullTerminator = true`:
/// https://github.com/llvm/llvm-project/blob/bde90624185ea2cead0a8d7231536e2625d78798/llvm/include/llvm/Support/MemoryBuffer.h#L134
/// Called by `LLVMParseIRInContext` follows this path parseIR => parseAssembly => parseAssemblyInto
/// Deeper inside LLVM parser's they rely on the null termination due performance optimization.
/// LLVM's C API does not enforce this at the type level, so callers must guarantee the invariant themselves.
/// See the relevant code inside LLVM's parser:
/// https://github.com/llvm/llvm-project/blob/bde90624185ea2cead0a8d7231536e2625d78798/llvm/lib/AsmParser/Parser.cpp#L30
///
/// Without the null terminator, LLVM hits an assertion in debug builds.
pub(crate) fn link_ir_buffer<'ctx>(
context: &'ctx LLVMContext,
module: &mut LLVMModule<'ctx>,
buffer: &CStr,
) -> Result<bool, String> {
let buffer_name = c"ir_buffer";
let buffer = buffer.to_bytes();
let mem_buffer = unsafe {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're leaking this you need to call LLVMDisposeMemoryBuffer before returning

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

LLVMCreateMemoryBufferWithMemoryRange(
buffer.as_ptr().cast(),
buffer.len(),
buffer_name.as_ptr(),
0,
)
};

let mut temp_module = ptr::null_mut();
let (ret, message) = Message::with(|error_msg| unsafe {
// LLVMParseIRInContext takes ownership of mem_buffer, so we don't need to dispose of it ourselves.
// https://github.com/llvm/llvm-project/blob/00276b67d36a665119a6a7b39dbba69f45c44e58/llvm/lib/IRReader/IRReader.cpp#L122
LLVMParseIRInContext(
context.as_mut_ptr(),
mem_buffer,
&mut temp_module,
error_msg,
)
});

if ret == 0 {
let linked = unsafe { LLVMLinkModules2(module.as_mut_ptr(), temp_module) } == 0;
Ok(linked)
} else {
Err(message.as_string_lossy().to_string())
}
}

pub(crate) fn target_from_triple(triple: &CStr) -> Result<LLVMTargetRef, String> {
let mut target = ptr::null_mut();
let (ret, message) = Message::with(|message| unsafe {
Expand Down
90 changes: 90 additions & 0 deletions tests/ir_file_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#![expect(unused_crate_dependencies, reason = "used in lib/bin")]
#![cfg(all(
any(feature = "llvm-20", feature = "llvm-21"),
not(feature = "no-llvm-linking")
))]

use std::ffi::CString;

use bpf_linker::{Linker, LinkerInput, LinkerOptions, OutputType};

fn create_test_ir_content(name: &str) -> String {
format!(
r#"; ModuleID = '{name}'
source_filename = "{name}"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "bpfel-unknown-none"

define i32 @test_{name}(i32 %x) #0 {{
entry:
%result = add i32 %x, 1
ret i32 %result
}}

attributes #0 = {{ noinline nounwind optnone }}

!llvm.module.flags = !{{!0}}
!0 = !{{i32 1, !"wchar_size", i32 4}}
"#
)
}

#[test]
fn test_link_ir_files() {
let options = LinkerOptions {
target: None,
cpu: bpf_linker::Cpu::Generic,
cpu_features: CString::default(),
optimize: bpf_linker::OptLevel::No,
unroll_loops: false,
ignore_inline_never: false,
llvm_args: vec![],
disable_expand_memcpy_in_order: false,
disable_memory_builtins: false,
btf: false,
allow_bpf_trap: false,
};

let linker = Linker::new(options);

// Test 1: Valid IR should link successfully
{
let ir_content = create_test_ir_content("valid");

let result = linker.link_to_buffer(
[LinkerInput::Buffer {
name: "valid.ll",
bytes: ir_content.as_bytes(),
}],
OutputType::Object,
["test_valid"],
);

assert!(
result.is_ok(),
"Linking valid IR should succeed: {:?}",
result.err()
);

let output = result.unwrap();
assert!(!output.as_slice().is_empty(), "Output should not be empty");
}

// Test 2: Invalid IR should fail to link
{
let valid_content = create_test_ir_content("invalid");
let invalid_content =
valid_content.replace("; ModuleID = 'invalid'", ": ModuleXX = 'corrupted'");

let result = linker.link_to_buffer(
[LinkerInput::Buffer {
name: "corrupted.ll",
bytes: invalid_content.as_bytes(),
}],
OutputType::Object,
Vec::<&str>::new(),
);

assert!(result.is_err(), "Linking corrupted IR should fail");
}
}
Loading