Skip to content

Commit b4cd414

Browse files
authored
feat: enhance C ABI compatibility, type inference, and JSON serialization (#293)
This commit improves the robustness of the LLVM backend regarding type resolution under opaque pointers, refines C ABI lowering for aggregates, and introduces JSON serialization utilities. Changes: - **LLVM Codegen & ABI**: - **C ABI Refinement**: Updated `abi_c.rs` to pass small "mixed" aggregates as direct values, allowing LLVM to handle register assignment (INTEGER vs. SSE) more effectively for the C calling convention. - **Implicit Main Return**: Implemented implicit `return 0` for the `main` function. If `main` is defined as void-like in Wave, it is now lowered to an `i32` return in LLVM to satisfy standard execution environments. - **Symbol Resolution**: Fixed a bug in `gen_function_call` where extern C functions were being looked up by their Wave name instead of their redirected LLVM symbol name. - **Address Generation**: Introduced `generate_address_and_type_ir` to provide both the memory address and its corresponding LLVM type, simplifying indexing and field access logic. - **Type System & Pointers**: - **Enhanced Inference**: Implemented `infer_wave_type_of_expr` and `basic_ty_to_wave_ty` to improve Wave-to-LLVM type mapping, especially for complex lvalues. - **Legacy Compatibility**: Updated `deref` handling to allow redundant dereferences on already-addressable lvalues (e.g., `deref array[i]`), maintaining compatibility with existing code. - **Parser & Verification**: - **ABI Validation**: Added a verification pass to ensure only `extern(c)` is used, as other ABIs are not yet supported. - **JSON Utilities**: - Added `write_pretty_to` and `write_compact_to` to the `Json` enum. - Implemented full JSON string escaping (handles newlines, quotes, slashes, and control characters). - **CLI & Flags**: - Added support for `-Os` (optimize for size) and `-Ofast` (aggressive optimization) flags in the CLI and LLVM pass builder. These updates stabilize the transition to opaque pointers and provide a more predictable interface for system-level FFI and serialization. Signed-off-by: LunaStev <luna@lunastev.org>
1 parent bf79572 commit b4cd414

File tree

15 files changed

+387
-315
lines changed

15 files changed

+387
-315
lines changed

front/parser/src/verification.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,15 @@ fn validate_node(
247247
scopes.pop();
248248
}
249249

250+
ASTNode::ExternFunction(ext) => {
251+
if !ext.abi.eq_ignore_ascii_case("c") {
252+
return Err(format!(
253+
"unsupported extern ABI '{}' for function '{}': only extern(c) is currently supported",
254+
ext.abi, ext.name
255+
));
256+
}
257+
}
258+
250259
_ => {}
251260
}
252261

llvm/src/codegen/abi_c.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub enum RetLowering<'ctx> {
3838

3939
#[derive(Clone)]
4040
pub struct ExternCInfo<'ctx> {
41+
pub llvm_name: String, // actual LLVM symbol name
4142
pub wave_ret: WaveType, // Wave-level return type (needed when sret => llvm void)
4243
pub ret: RetLowering<'ctx>,
4344
pub params: Vec<ParamLowering<'ctx>>, // per-wave param
@@ -168,9 +169,9 @@ fn classify_param<'ctx>(
168169
return ParamLowering::Direct(it.as_basic_type_enum());
169170
}
170171

171-
// mixed small aggregate: safest is byval (conservative but correct)
172-
let align = td.get_abi_alignment(&t) as u32;
173-
return ParamLowering::ByVal { ty: t.as_any_type_enum(), align };
172+
// mixed small aggregate: keep as direct aggregate value.
173+
// Let LLVM's C ABI lowering split/register-assign correctly.
174+
return ParamLowering::Direct(t);
174175
}
175176

176177
// non-aggregate: direct
@@ -246,9 +247,9 @@ fn classify_ret<'ctx>(
246247
}
247248
}
248249

249-
// mixed small aggregate ret: safest sret
250-
let align = td.get_abi_alignment(&t) as u32;
251-
return RetLowering::SRet { ty: t.as_any_type_enum(), align };
250+
// mixed small aggregate ret: keep direct aggregate value.
251+
// Let LLVM's C ABI lowering pick mixed INTEGER/SSE return registers.
252+
return RetLowering::Direct(t);
252253
}
253254

254255
RetLowering::Direct(t)
@@ -261,6 +262,7 @@ pub fn lower_extern_c<'ctx>(
261262
struct_types: &HashMap<String, inkwell::types::StructType<'ctx>>,
262263
) -> LoweredExtern<'ctx> {
263264
let llvm_name = ext.symbol.as_deref().unwrap_or(ext.name.as_str()).to_string();
265+
let info_llvm_name = llvm_name.clone();
264266

265267
// wave types -> layout types
266268
let wave_param_layout: Vec<BasicTypeEnum<'ctx>> = ext.params.iter()
@@ -311,6 +313,7 @@ pub fn lower_extern_c<'ctx>(
311313
llvm_name,
312314
fn_type,
313315
info: ExternCInfo {
316+
llvm_name: info_llvm_name,
314317
wave_ret: ext.return_type.clone(),
315318
ret,
316319
params,

llvm/src/codegen/address.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,10 @@ fn addr_and_ty<'ctx>(
198198
);
199199

200200
if !slot_ty.is_pointer_type() {
201-
panic!("Cannot deref non-pointer lvalue: {:?}", inner);
201+
// Legacy compatibility:
202+
// allow redundant `deref` on already-addressable lvalues
203+
// like `deref q.rear` and `deref visited[x]`.
204+
return (slot_ptr, slot_ty);
202205
}
203206

204207
let pv = load_ptr_from_slot(context, builder, slot_ptr, "deref_target");
@@ -383,3 +386,23 @@ pub fn generate_address_ir<'ctx>(
383386
)
384387
.0
385388
}
389+
390+
pub fn generate_address_and_type_ir<'ctx>(
391+
context: &'ctx Context,
392+
builder: &'ctx Builder<'ctx>,
393+
expr: &Expression,
394+
variables: &mut HashMap<String, VariableInfo<'ctx>>,
395+
module: &'ctx Module<'ctx>,
396+
struct_types: &HashMap<String, StructType<'ctx>>,
397+
struct_field_indices: &HashMap<String, HashMap<String, u32>>,
398+
) -> (PointerValue<'ctx>, BasicTypeEnum<'ctx>) {
399+
addr_and_ty(
400+
context,
401+
builder,
402+
expr,
403+
variables,
404+
module,
405+
struct_types,
406+
struct_field_indices,
407+
)
408+
}

llvm/src/codegen/ir.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ use crate::codegen::abi_c::{
2828
ExternCInfo, lower_extern_c, apply_extern_c_attrs,
2929
};
3030

31+
fn is_implicit_i32_main(name: &str, return_type: &Option<WaveType>) -> bool {
32+
name == "main" && matches!(return_type, None | Some(WaveType::Void))
33+
}
34+
35+
fn is_supported_extern_abi(abi: &str) -> bool {
36+
abi.eq_ignore_ascii_case("c")
37+
}
38+
3139
pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String {
3240
let context: &'static Context = Box::leak(Box::new(Context::create()));
3341
let module: &'static _ = Box::leak(Box::new(context.create_module("main")));
@@ -189,11 +197,15 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String {
189197
.map(|p| wave_type_to_llvm_type(context, &p.param_type, &struct_types, TypeFlavor::AbiC).into())
190198
.collect();
191199

192-
let fn_type = match return_type {
193-
None | Some(WaveType::Void) => context.void_type().fn_type(&param_types, false),
194-
Some(wave_ret_ty) => {
195-
let llvm_ret_type = wave_type_to_llvm_type(context, wave_ret_ty, &struct_types, TypeFlavor::AbiC);
196-
llvm_ret_type.fn_type(&param_types, false)
200+
let fn_type = if is_implicit_i32_main(name, return_type) {
201+
context.i32_type().fn_type(&param_types, false)
202+
} else {
203+
match return_type {
204+
None | Some(WaveType::Void) => context.void_type().fn_type(&param_types, false),
205+
Some(wave_ret_ty) => {
206+
let llvm_ret_type = wave_type_to_llvm_type(context, wave_ret_ty, &struct_types, TypeFlavor::AbiC);
207+
llvm_ret_type.fn_type(&param_types, false)
208+
}
197209
}
198210
};
199211

@@ -202,6 +214,13 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String {
202214
}
203215

204216
for ext in &extern_functions {
217+
if !is_supported_extern_abi(&ext.abi) {
218+
panic!(
219+
"unsupported extern ABI '{}' for function '{}': only extern(c) is currently supported",
220+
ext.abi, ext.name
221+
);
222+
}
223+
205224
let lowered = lower_extern_c(context, td, ext, &struct_types);
206225

207226
let f = module.add_function(&lowered.llvm_name, lowered.fn_type, None);
@@ -263,13 +282,17 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String {
263282

264283
let current_block = builder.get_insert_block().unwrap();
265284
if current_block.get_terminator().is_none() {
285+
let implicit_i32_main = is_implicit_i32_main(&func_node.name, &func_node.return_type);
266286
let is_void_like = match &func_node.return_type {
267287
None => true,
268288
Some(WaveType::Void) => true,
269289
_ => false,
270290
};
271291

272-
if is_void_like {
292+
if implicit_i32_main {
293+
let zero = context.i32_type().const_zero();
294+
builder.build_return(Some(&zero)).unwrap();
295+
} else if is_void_like {
273296
builder.build_return(None).unwrap();
274297
} else {
275298
panic!("Non-void function '{}' is missing a return statement", func_node.name);
@@ -295,6 +318,7 @@ fn pipeline_from_opt_flag(opt_flag: &str) -> &'static str {
295318
"-O3" => "default<O3>",
296319
"-Os" => "default<Os>",
297320
"-Oz" => "default<Oz>",
321+
"-Ofast" => "default<O3>",
298322
other => panic!("unknown opt flag for LLVM passes: {}", other),
299323
}
300324
}
@@ -522,4 +546,4 @@ fn add_enum_consts_to_globals(
522546

523547
next += 1;
524548
}
525-
}
549+
}

llvm/src/codegen/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub mod legacy;
1818
pub mod plan;
1919
pub mod abi_c;
2020

21-
pub use address::generate_address_ir;
21+
pub use address::{generate_address_and_type_ir, generate_address_ir};
2222
pub use format::{wave_format_to_c, wave_format_to_scanf};
2323
pub use ir::generate_ir;
2424
pub use types::{wave_type_to_llvm_type, VariableInfo};

llvm/src/expression/rvalue/assign.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,27 @@ fn wave_to_basic<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, wt: &WaveType) -> BasicTy
4343
wave_type_to_llvm_type(env.context, wt, env.struct_types, TypeFlavor::Value)
4444
}
4545

46+
fn basic_to_wave<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, bt: BasicTypeEnum<'ctx>) -> Option<WaveType> {
47+
match bt {
48+
BasicTypeEnum::IntType(it) => {
49+
let bw = it.get_bit_width() as u16;
50+
if bw == 1 {
51+
Some(WaveType::Bool)
52+
} else {
53+
Some(WaveType::Int(bw))
54+
}
55+
}
56+
BasicTypeEnum::FloatType(ft) => Some(WaveType::Float(ft.get_bit_width() as u16)),
57+
BasicTypeEnum::PointerType(_) => Some(WaveType::Pointer(Box::new(WaveType::Byte))),
58+
BasicTypeEnum::ArrayType(at) => {
59+
let elem = basic_to_wave(env, at.get_element_type())?;
60+
Some(WaveType::Array(Box::new(elem), at.len()))
61+
}
62+
BasicTypeEnum::StructType(st) => Some(WaveType::Struct(resolve_struct_key(env, st))),
63+
_ => None,
64+
}
65+
}
66+
4667
fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) -> Option<WaveType> {
4768
match e {
4869
Expression::Variable(name) => env.variables.get(name).map(|vi| vi.ty.clone()),
@@ -54,7 +75,7 @@ fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) ->
5475
match inner_ty {
5576
WaveType::Pointer(t) => Some(*t),
5677
WaveType::String => Some(WaveType::Byte),
57-
_ => None,
78+
other => Some(other),
5879
}
5980
}
6081
Expression::IndexAccess { target, .. } => {
@@ -66,6 +87,26 @@ fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) ->
6687
_ => None,
6788
}
6889
}
90+
Expression::FieldAccess { object, field } => {
91+
let object_ty = wave_type_of_lvalue(env, object)?;
92+
let struct_name = match object_ty {
93+
WaveType::Struct(name) => name,
94+
WaveType::Pointer(inner) => match *inner {
95+
WaveType::Struct(name) => name,
96+
_ => return None,
97+
},
98+
_ => return None,
99+
};
100+
101+
let st = *env.struct_types.get(&struct_name)?;
102+
let field_index = env
103+
.struct_field_indices
104+
.get(&struct_name)
105+
.and_then(|m| m.get(field))
106+
.copied()?;
107+
let field_bt = st.get_field_type_at_index(field_index)?;
108+
basic_to_wave(env, field_bt)
109+
}
69110
_ => None,
70111
}
71112
}

llvm/src/expression/rvalue/calls.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ pub(crate) fn gen_function_call<'ctx, 'a>(
336336

337337
if let Some(info) = env.extern_c_info.get(name) {
338338
let function = env.module
339-
.get_function(name)
339+
.get_function(&info.llvm_name)
340340
.unwrap_or_else(|| panic!("Extern function '{}' not found in module (symbol alias?)", name));
341341

342342
if args.len() != info.params.len() {

0 commit comments

Comments
 (0)