Skip to content

Commit 38430a1

Browse files
0xrinegadeclaude
andcommitted
refactor(ovsm): Split monolithic ir.rs into modular ir/ directory
Split the 6101-line ir.rs into a well-organized directory module: ir/ ├── mod.rs (52 lines) - Module exports + comprehensive docs ├── types.rs (166 lines) - PrimitiveType, FieldType, StructDef ├── instruction.rs (98 lines) - IrReg, IrInstruction 3AC enum ├── program.rs (58 lines) - BasicBlock, IrProgram CFG types └── generator.rs (5797 lines) - IrGenerator with 60+ macro impls Benefits: - Clear separation of concerns (types, instructions, program, generator) - Comprehensive rustdoc with macro category tables - Navigation aids for the large generator.rs file - Public API unchanged (same re-exports from mod.rs) - All 113 ovsm lib tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent f68998b commit 38430a1

File tree

5 files changed

+411
-342
lines changed

5 files changed

+411
-342
lines changed

crates/ovsm/src/compiler/ir.rs renamed to crates/ovsm/src/compiler/ir/generator.rs

Lines changed: 38 additions & 342 deletions
Original file line numberDiff line numberDiff line change
@@ -1,330 +1,46 @@
1-
//! # Intermediate Representation (IR) for OVSM Compilation
1+
//! IR Generator - transforms typed AST to IR
22
//!
3-
//! Three-address code IR that bridges the gap between OVSM AST
4-
//! and sBPF bytecode. This representation makes optimization
5-
//! and register allocation tractable.
3+
//! This is the main code generation module containing all macro implementations.
4+
//!
5+
//! ## Module Organization
6+
//!
7+
//! The macro implementations are organized by domain. Use your editor's search
8+
//! to jump to sections:
9+
//!
10+
//! | Section | Search Term | Line Range |
11+
//! |---------|-------------|------------|
12+
//! | Struct Macros | `STRUCT MACROS` | ~350-880 |
13+
//! | Account Access | `ACCOUNT ACCESS` | ~920-1380 |
14+
//! | Memory Operations | `MEMORY OPERATIONS` | ~1380-1500 |
15+
//! | Logging & Debug | `LOGGING MACROS` | ~1520-1600 |
16+
//! | System CPI | `SYSTEM PROGRAM CPI` | ~1600-2330 |
17+
//! | SPL Token CPI | `SPL TOKEN CPI` | ~2330-2950 |
18+
//! | System Create/Allocate | `SYSTEM ALLOCATE/ASSIGN` | ~2950-3970 |
19+
//! | Anchor Errors | `ANCHOR ERROR HANDLING` | ~3970-4080 |
20+
//! | PDA Operations | `PDA OPERATIONS` | ~4080-4280 |
21+
//! | Account Assertions | `ACCOUNT ASSERTIONS` | ~4280-4480 |
22+
//! | Zerocopy Access | `ZEROCOPY ACCESS` | ~4480-4620 |
23+
//! | Events | `EVENT EMISSION` | ~4680-4800 |
24+
//! | Sysvars | `SYSVAR ACCESS` | ~4800-5020 |
25+
//! | PDA Cache | `PDA CACHE` | ~5020-5220 |
26+
//! | Control Flow | `CONTROL FLOW` | ~5330-5480 |
27+
//! | Helper Macros | `HELPER MACROS` | ~5480-5700 |
28+
//!
29+
//! ## Adding New Macros
30+
//!
31+
//! 1. Find the appropriate section based on functionality
32+
//! 2. Add an `if name == "macro-name" && args.len() == N` block
33+
//! 3. Use `self.alloc_reg()` for temp registers
34+
//! 4. Use `self.emit(IrInstruction::...)` to generate IR
35+
//! 5. Return `Ok(Some(result_reg))` or `Ok(None)` for void
636
737
use std::collections::HashMap;
838
use crate::{Result, Error};
9-
use super::types::{TypedProgram, TypedStatement, OvsmType};
39+
use crate::compiler::types::{TypedProgram, TypedStatement, OvsmType};
1040
use crate::{Statement, Expression, BinaryOp, UnaryOp};
11-
12-
// =============================================================================
13-
// STRUCT TYPES (compile-time layout)
14-
// =============================================================================
15-
16-
/// Primitive field types (fixed-size scalars)
17-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18-
pub enum PrimitiveType {
19-
U8, // 1 byte
20-
U16, // 2 bytes
21-
U32, // 4 bytes
22-
U64, // 8 bytes (default for untyped)
23-
I8, // 1 byte signed
24-
I16, // 2 bytes signed
25-
I32, // 4 bytes signed
26-
I64, // 8 bytes signed (default)
27-
}
28-
29-
impl PrimitiveType {
30-
pub fn size(&self) -> i64 {
31-
match self {
32-
PrimitiveType::U8 | PrimitiveType::I8 => 1,
33-
PrimitiveType::U16 | PrimitiveType::I16 => 2,
34-
PrimitiveType::U32 | PrimitiveType::I32 => 4,
35-
PrimitiveType::U64 | PrimitiveType::I64 => 8,
36-
}
37-
}
38-
39-
pub fn from_str(s: &str) -> Option<Self> {
40-
match s {
41-
"u8" => Some(PrimitiveType::U8),
42-
"u16" => Some(PrimitiveType::U16),
43-
"u32" => Some(PrimitiveType::U32),
44-
"u64" => Some(PrimitiveType::U64),
45-
"i8" => Some(PrimitiveType::I8),
46-
"i16" => Some(PrimitiveType::I16),
47-
"i32" => Some(PrimitiveType::I32),
48-
"i64" => Some(PrimitiveType::I64),
49-
_ => None,
50-
}
51-
}
52-
53-
pub fn to_idl_type(&self) -> &'static str {
54-
match self {
55-
PrimitiveType::U8 => "u8",
56-
PrimitiveType::U16 => "u16",
57-
PrimitiveType::U32 => "u32",
58-
PrimitiveType::U64 => "u64",
59-
PrimitiveType::I8 => "i8",
60-
PrimitiveType::I16 => "i16",
61-
PrimitiveType::I32 => "i32",
62-
PrimitiveType::I64 => "i64",
63-
}
64-
}
65-
}
66-
67-
/// Extended field type supporting primitives, arrays, pubkeys, and nested structs
68-
#[derive(Debug, Clone, PartialEq, Eq)]
69-
pub enum FieldType {
70-
/// Primitive integer types (u8-u64, i8-i64)
71-
Primitive(PrimitiveType),
72-
/// Fixed-size array: [element_type count], e.g., [u32 10] = 40 bytes
73-
Array { element_type: PrimitiveType, count: usize },
74-
/// Solana public key (32 bytes, special handling)
75-
Pubkey,
76-
/// Nested struct reference (resolved at struct definition time)
77-
Struct(String),
78-
}
79-
80-
impl FieldType {
81-
/// Get size in bytes (for Array and Struct, needs struct_defs for resolution)
82-
pub fn size(&self) -> i64 {
83-
match self {
84-
FieldType::Primitive(p) => p.size(),
85-
FieldType::Array { element_type, count } => element_type.size() * (*count as i64),
86-
FieldType::Pubkey => 32, // Solana pubkey is always 32 bytes
87-
FieldType::Struct(_) => 0, // Requires struct_defs lookup - use size_with_structs
88-
}
89-
}
90-
91-
/// Get size with struct definitions for nested struct resolution
92-
pub fn size_with_structs(&self, struct_defs: &HashMap<String, StructDef>) -> i64 {
93-
match self {
94-
FieldType::Struct(name) => {
95-
struct_defs.get(name).map(|s| s.total_size).unwrap_or(0)
96-
}
97-
_ => self.size(),
98-
}
99-
}
100-
101-
/// Parse from type string (simple types only - arrays/structs handled separately)
102-
pub fn from_str(s: &str) -> Option<Self> {
103-
if s == "pubkey" {
104-
return Some(FieldType::Pubkey);
105-
}
106-
PrimitiveType::from_str(s).map(FieldType::Primitive)
107-
}
108-
109-
/// Convert to Anchor IDL type string
110-
pub fn to_idl_type(&self) -> String {
111-
match self {
112-
FieldType::Primitive(p) => p.to_idl_type().to_string(),
113-
FieldType::Array { element_type, count } => {
114-
format!("{{ \"array\": [\"{}\", {}] }}", element_type.to_idl_type(), count)
115-
}
116-
FieldType::Pubkey => "publicKey".to_string(),
117-
FieldType::Struct(name) => format!("{{ \"defined\": \"{}\" }}", name),
118-
}
119-
}
120-
121-
/// Check if this is a primitive type for load/store instruction selection
122-
pub fn primitive(&self) -> Option<PrimitiveType> {
123-
match self {
124-
FieldType::Primitive(p) => Some(*p),
125-
_ => None,
126-
}
127-
}
128-
}
129-
130-
/// A field in a struct definition
131-
#[derive(Debug, Clone)]
132-
pub struct StructField {
133-
pub name: String,
134-
pub field_type: FieldType,
135-
pub offset: i64,
136-
/// For array types, the element size
137-
pub element_size: Option<i64>,
138-
/// For array types, the element count
139-
pub array_count: Option<usize>,
140-
}
141-
142-
/// A struct definition (compile-time metadata)
143-
#[derive(Debug, Clone)]
144-
pub struct StructDef {
145-
pub name: String,
146-
pub fields: Vec<StructField>,
147-
pub total_size: i64,
148-
}
149-
150-
impl StructDef {
151-
/// Generate Anchor IDL JSON for this struct
152-
/// This enables TypeScript clients to interact with OVSM programs
153-
pub fn to_anchor_idl(&self) -> String {
154-
let mut fields_json = Vec::new();
155-
for field in &self.fields {
156-
fields_json.push(format!(
157-
r#" {{ "name": "{}", "type": "{}" }}"#,
158-
field.name,
159-
field.field_type.to_idl_type()
160-
));
161-
}
162-
163-
format!(
164-
r#"{{
165-
"name": "{}",
166-
"type": {{
167-
"kind": "struct",
168-
"fields": [
169-
{}
170-
]
171-
}}
172-
}}"#,
173-
self.name,
174-
fields_json.join(",\n")
175-
)
176-
}
177-
}
178-
179-
/// Virtual register (infinite supply, mapped to physical during codegen)
180-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181-
pub struct IrReg(pub u32);
182-
183-
impl IrReg {
184-
pub fn new(id: u32) -> Self {
185-
Self(id)
186-
}
187-
}
188-
189-
/// IR instruction (three-address code)
190-
#[derive(Debug, Clone)]
191-
pub enum IrInstruction {
192-
// Constants
193-
/// Load 64-bit integer constant into register
194-
ConstI64(IrReg, i64),
195-
/// Load 64-bit float constant (as bits)
196-
ConstF64(IrReg, u64),
197-
/// Load boolean constant
198-
ConstBool(IrReg, bool),
199-
/// Load null
200-
ConstNull(IrReg),
201-
/// Load string literal (index into string table)
202-
ConstString(IrReg, usize),
203-
204-
// Arithmetic (dst = src1 op src2)
205-
Add(IrReg, IrReg, IrReg),
206-
Sub(IrReg, IrReg, IrReg),
207-
Mul(IrReg, IrReg, IrReg),
208-
Div(IrReg, IrReg, IrReg),
209-
Mod(IrReg, IrReg, IrReg),
210-
211-
// Comparison (dst = src1 op src2, result is 0 or 1)
212-
Eq(IrReg, IrReg, IrReg),
213-
Ne(IrReg, IrReg, IrReg),
214-
Lt(IrReg, IrReg, IrReg),
215-
Le(IrReg, IrReg, IrReg),
216-
Gt(IrReg, IrReg, IrReg),
217-
Ge(IrReg, IrReg, IrReg),
218-
219-
// Logical
220-
And(IrReg, IrReg, IrReg),
221-
Or(IrReg, IrReg, IrReg),
222-
Not(IrReg, IrReg),
223-
224-
// Unary
225-
Neg(IrReg, IrReg),
226-
227-
// Register operations
228-
Move(IrReg, IrReg),
229-
230-
// Control flow
231-
Label(String),
232-
Jump(String),
233-
/// Jump if register is non-zero
234-
JumpIf(IrReg, String),
235-
/// Jump if register is zero
236-
JumpIfNot(IrReg, String),
237-
238-
// Function calls
239-
/// Call function, store result in optional dst
240-
Call(Option<IrReg>, String, Vec<IrReg>),
241-
/// Return with optional value
242-
Return(Option<IrReg>),
243-
244-
// Memory operations
245-
/// Load from memory: dst = *(base + offset) (64-bit)
246-
Load(IrReg, IrReg, i64),
247-
/// Load 1 byte (8-bit) from memory: dst = (u8)*(base + offset)
248-
Load1(IrReg, IrReg, i64),
249-
/// Load 2 bytes (16-bit) from memory: dst = (u16)*(base + offset)
250-
Load2(IrReg, IrReg, i64),
251-
/// Load 4 bytes (32-bit) from memory: dst = (u32)*(base + offset)
252-
Load4(IrReg, IrReg, i64),
253-
/// Store to memory: *(base + offset) = src (64-bit)
254-
Store(IrReg, IrReg, i64),
255-
/// Store 1 byte to memory: *(base + offset) = (u8)src
256-
Store1(IrReg, IrReg, i64),
257-
/// Store 2 bytes (16-bit) to memory: *(base + offset) = (u16)src
258-
Store2(IrReg, IrReg, i64),
259-
/// Store 4 bytes (32-bit) to memory: *(base + offset) = (u32)src
260-
Store4(IrReg, IrReg, i64),
261-
/// Allocate heap memory: dst = alloc(size)
262-
Alloc(IrReg, IrReg),
263-
264-
// Syscalls (Solana-specific)
265-
/// dst = syscall(name, args...)
266-
Syscall(Option<IrReg>, String, Vec<IrReg>),
267-
268-
// Debug
269-
/// Debug log (will be sol_log syscall): Log(ptr_reg, length)
270-
Log(IrReg, usize),
271-
272-
// No-op (placeholder, removed by optimizer)
273-
Nop,
274-
}
275-
276-
/// Basic block in the control flow graph
277-
#[derive(Debug, Clone)]
278-
pub struct BasicBlock {
279-
pub label: String,
280-
pub instructions: Vec<IrInstruction>,
281-
pub successors: Vec<String>,
282-
pub predecessors: Vec<String>,
283-
}
284-
285-
impl BasicBlock {
286-
pub fn new(label: &str) -> Self {
287-
Self {
288-
label: label.to_string(),
289-
instructions: Vec::new(),
290-
successors: Vec::new(),
291-
predecessors: Vec::new(),
292-
}
293-
}
294-
}
295-
296-
/// Complete IR program
297-
#[derive(Debug, Clone)]
298-
pub struct IrProgram {
299-
/// All instructions in linear order
300-
pub instructions: Vec<IrInstruction>,
301-
/// Basic blocks for CFG analysis
302-
pub blocks: HashMap<String, BasicBlock>,
303-
/// String table for string literals
304-
pub string_table: Vec<String>,
305-
/// Entry point label
306-
pub entry_label: String,
307-
/// Variable to register mapping
308-
pub var_registers: HashMap<String, IrReg>,
309-
}
310-
311-
impl IrProgram {
312-
pub fn new() -> Self {
313-
Self {
314-
instructions: Vec::new(),
315-
blocks: HashMap::new(),
316-
string_table: Vec::new(),
317-
entry_label: "entry".to_string(),
318-
var_registers: HashMap::new(),
319-
}
320-
}
321-
}
322-
323-
impl Default for IrProgram {
324-
fn default() -> Self {
325-
Self::new()
326-
}
327-
}
41+
use super::types::{PrimitiveType, FieldType, StructField, StructDef};
42+
use super::instruction::{IrReg, IrInstruction};
43+
use super::program::{BasicBlock, IrProgram};
32844

32945
/// IR Generator - transforms typed AST to IR
33046
pub struct IrGenerator {
@@ -6079,23 +5795,3 @@ impl Default for IrGenerator {
60795795
Self::new()
60805796
}
60815797
}
6082-
6083-
#[cfg(test)]
6084-
mod tests {
6085-
use super::*;
6086-
6087-
#[test]
6088-
fn test_ir_reg() {
6089-
let r1 = IrReg::new(0);
6090-
let r2 = IrReg::new(1);
6091-
assert_ne!(r1, r2);
6092-
}
6093-
6094-
#[test]
6095-
fn test_basic_block() {
6096-
let mut block = BasicBlock::new("test");
6097-
block.instructions.push(IrInstruction::ConstI64(IrReg(0), 42));
6098-
assert_eq!(block.label, "test");
6099-
assert_eq!(block.instructions.len(), 1);
6100-
}
6101-
}

0 commit comments

Comments
 (0)