|
1 | | -//! # Intermediate Representation (IR) for OVSM Compilation |
| 1 | +//! IR Generator - transforms typed AST to IR |
2 | 2 | //! |
3 | | -//! Three-address code IR that bridges the gap between OVSM AST |
4 | | -//! and sBPF bytecode. This representation makes optimization |
5 | | -//! and register allocation tractable. |
| 3 | +//! This is the main code generation module containing all macro implementations. |
| 4 | +//! |
| 5 | +//! ## Module Organization |
| 6 | +//! |
| 7 | +//! The macro implementations are organized by domain. Use your editor's search |
| 8 | +//! to jump to sections: |
| 9 | +//! |
| 10 | +//! | Section | Search Term | Line Range | |
| 11 | +//! |---------|-------------|------------| |
| 12 | +//! | Struct Macros | `STRUCT MACROS` | ~350-880 | |
| 13 | +//! | Account Access | `ACCOUNT ACCESS` | ~920-1380 | |
| 14 | +//! | Memory Operations | `MEMORY OPERATIONS` | ~1380-1500 | |
| 15 | +//! | Logging & Debug | `LOGGING MACROS` | ~1520-1600 | |
| 16 | +//! | System CPI | `SYSTEM PROGRAM CPI` | ~1600-2330 | |
| 17 | +//! | SPL Token CPI | `SPL TOKEN CPI` | ~2330-2950 | |
| 18 | +//! | System Create/Allocate | `SYSTEM ALLOCATE/ASSIGN` | ~2950-3970 | |
| 19 | +//! | Anchor Errors | `ANCHOR ERROR HANDLING` | ~3970-4080 | |
| 20 | +//! | PDA Operations | `PDA OPERATIONS` | ~4080-4280 | |
| 21 | +//! | Account Assertions | `ACCOUNT ASSERTIONS` | ~4280-4480 | |
| 22 | +//! | Zerocopy Access | `ZEROCOPY ACCESS` | ~4480-4620 | |
| 23 | +//! | Events | `EVENT EMISSION` | ~4680-4800 | |
| 24 | +//! | Sysvars | `SYSVAR ACCESS` | ~4800-5020 | |
| 25 | +//! | PDA Cache | `PDA CACHE` | ~5020-5220 | |
| 26 | +//! | Control Flow | `CONTROL FLOW` | ~5330-5480 | |
| 27 | +//! | Helper Macros | `HELPER MACROS` | ~5480-5700 | |
| 28 | +//! |
| 29 | +//! ## Adding New Macros |
| 30 | +//! |
| 31 | +//! 1. Find the appropriate section based on functionality |
| 32 | +//! 2. Add an `if name == "macro-name" && args.len() == N` block |
| 33 | +//! 3. Use `self.alloc_reg()` for temp registers |
| 34 | +//! 4. Use `self.emit(IrInstruction::...)` to generate IR |
| 35 | +//! 5. Return `Ok(Some(result_reg))` or `Ok(None)` for void |
6 | 36 |
|
7 | 37 | use std::collections::HashMap; |
8 | 38 | use crate::{Result, Error}; |
9 | | -use super::types::{TypedProgram, TypedStatement, OvsmType}; |
| 39 | +use crate::compiler::types::{TypedProgram, TypedStatement, OvsmType}; |
10 | 40 | use crate::{Statement, Expression, BinaryOp, UnaryOp}; |
11 | | - |
12 | | -// ============================================================================= |
13 | | -// STRUCT TYPES (compile-time layout) |
14 | | -// ============================================================================= |
15 | | - |
16 | | -/// Primitive field types (fixed-size scalars) |
17 | | -#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
18 | | -pub enum PrimitiveType { |
19 | | - U8, // 1 byte |
20 | | - U16, // 2 bytes |
21 | | - U32, // 4 bytes |
22 | | - U64, // 8 bytes (default for untyped) |
23 | | - I8, // 1 byte signed |
24 | | - I16, // 2 bytes signed |
25 | | - I32, // 4 bytes signed |
26 | | - I64, // 8 bytes signed (default) |
27 | | -} |
28 | | - |
29 | | -impl PrimitiveType { |
30 | | - pub fn size(&self) -> i64 { |
31 | | - match self { |
32 | | - PrimitiveType::U8 | PrimitiveType::I8 => 1, |
33 | | - PrimitiveType::U16 | PrimitiveType::I16 => 2, |
34 | | - PrimitiveType::U32 | PrimitiveType::I32 => 4, |
35 | | - PrimitiveType::U64 | PrimitiveType::I64 => 8, |
36 | | - } |
37 | | - } |
38 | | - |
39 | | - pub fn from_str(s: &str) -> Option<Self> { |
40 | | - match s { |
41 | | - "u8" => Some(PrimitiveType::U8), |
42 | | - "u16" => Some(PrimitiveType::U16), |
43 | | - "u32" => Some(PrimitiveType::U32), |
44 | | - "u64" => Some(PrimitiveType::U64), |
45 | | - "i8" => Some(PrimitiveType::I8), |
46 | | - "i16" => Some(PrimitiveType::I16), |
47 | | - "i32" => Some(PrimitiveType::I32), |
48 | | - "i64" => Some(PrimitiveType::I64), |
49 | | - _ => None, |
50 | | - } |
51 | | - } |
52 | | - |
53 | | - pub fn to_idl_type(&self) -> &'static str { |
54 | | - match self { |
55 | | - PrimitiveType::U8 => "u8", |
56 | | - PrimitiveType::U16 => "u16", |
57 | | - PrimitiveType::U32 => "u32", |
58 | | - PrimitiveType::U64 => "u64", |
59 | | - PrimitiveType::I8 => "i8", |
60 | | - PrimitiveType::I16 => "i16", |
61 | | - PrimitiveType::I32 => "i32", |
62 | | - PrimitiveType::I64 => "i64", |
63 | | - } |
64 | | - } |
65 | | -} |
66 | | - |
67 | | -/// Extended field type supporting primitives, arrays, pubkeys, and nested structs |
68 | | -#[derive(Debug, Clone, PartialEq, Eq)] |
69 | | -pub enum FieldType { |
70 | | - /// Primitive integer types (u8-u64, i8-i64) |
71 | | - Primitive(PrimitiveType), |
72 | | - /// Fixed-size array: [element_type count], e.g., [u32 10] = 40 bytes |
73 | | - Array { element_type: PrimitiveType, count: usize }, |
74 | | - /// Solana public key (32 bytes, special handling) |
75 | | - Pubkey, |
76 | | - /// Nested struct reference (resolved at struct definition time) |
77 | | - Struct(String), |
78 | | -} |
79 | | - |
80 | | -impl FieldType { |
81 | | - /// Get size in bytes (for Array and Struct, needs struct_defs for resolution) |
82 | | - pub fn size(&self) -> i64 { |
83 | | - match self { |
84 | | - FieldType::Primitive(p) => p.size(), |
85 | | - FieldType::Array { element_type, count } => element_type.size() * (*count as i64), |
86 | | - FieldType::Pubkey => 32, // Solana pubkey is always 32 bytes |
87 | | - FieldType::Struct(_) => 0, // Requires struct_defs lookup - use size_with_structs |
88 | | - } |
89 | | - } |
90 | | - |
91 | | - /// Get size with struct definitions for nested struct resolution |
92 | | - pub fn size_with_structs(&self, struct_defs: &HashMap<String, StructDef>) -> i64 { |
93 | | - match self { |
94 | | - FieldType::Struct(name) => { |
95 | | - struct_defs.get(name).map(|s| s.total_size).unwrap_or(0) |
96 | | - } |
97 | | - _ => self.size(), |
98 | | - } |
99 | | - } |
100 | | - |
101 | | - /// Parse from type string (simple types only - arrays/structs handled separately) |
102 | | - pub fn from_str(s: &str) -> Option<Self> { |
103 | | - if s == "pubkey" { |
104 | | - return Some(FieldType::Pubkey); |
105 | | - } |
106 | | - PrimitiveType::from_str(s).map(FieldType::Primitive) |
107 | | - } |
108 | | - |
109 | | - /// Convert to Anchor IDL type string |
110 | | - pub fn to_idl_type(&self) -> String { |
111 | | - match self { |
112 | | - FieldType::Primitive(p) => p.to_idl_type().to_string(), |
113 | | - FieldType::Array { element_type, count } => { |
114 | | - format!("{{ \"array\": [\"{}\", {}] }}", element_type.to_idl_type(), count) |
115 | | - } |
116 | | - FieldType::Pubkey => "publicKey".to_string(), |
117 | | - FieldType::Struct(name) => format!("{{ \"defined\": \"{}\" }}", name), |
118 | | - } |
119 | | - } |
120 | | - |
121 | | - /// Check if this is a primitive type for load/store instruction selection |
122 | | - pub fn primitive(&self) -> Option<PrimitiveType> { |
123 | | - match self { |
124 | | - FieldType::Primitive(p) => Some(*p), |
125 | | - _ => None, |
126 | | - } |
127 | | - } |
128 | | -} |
129 | | - |
130 | | -/// A field in a struct definition |
131 | | -#[derive(Debug, Clone)] |
132 | | -pub struct StructField { |
133 | | - pub name: String, |
134 | | - pub field_type: FieldType, |
135 | | - pub offset: i64, |
136 | | - /// For array types, the element size |
137 | | - pub element_size: Option<i64>, |
138 | | - /// For array types, the element count |
139 | | - pub array_count: Option<usize>, |
140 | | -} |
141 | | - |
142 | | -/// A struct definition (compile-time metadata) |
143 | | -#[derive(Debug, Clone)] |
144 | | -pub struct StructDef { |
145 | | - pub name: String, |
146 | | - pub fields: Vec<StructField>, |
147 | | - pub total_size: i64, |
148 | | -} |
149 | | - |
150 | | -impl StructDef { |
151 | | - /// Generate Anchor IDL JSON for this struct |
152 | | - /// This enables TypeScript clients to interact with OVSM programs |
153 | | - pub fn to_anchor_idl(&self) -> String { |
154 | | - let mut fields_json = Vec::new(); |
155 | | - for field in &self.fields { |
156 | | - fields_json.push(format!( |
157 | | - r#" {{ "name": "{}", "type": "{}" }}"#, |
158 | | - field.name, |
159 | | - field.field_type.to_idl_type() |
160 | | - )); |
161 | | - } |
162 | | - |
163 | | - format!( |
164 | | - r#"{{ |
165 | | - "name": "{}", |
166 | | - "type": {{ |
167 | | - "kind": "struct", |
168 | | - "fields": [ |
169 | | -{} |
170 | | - ] |
171 | | - }} |
172 | | -}}"#, |
173 | | - self.name, |
174 | | - fields_json.join(",\n") |
175 | | - ) |
176 | | - } |
177 | | -} |
178 | | - |
179 | | -/// Virtual register (infinite supply, mapped to physical during codegen) |
180 | | -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
181 | | -pub struct IrReg(pub u32); |
182 | | - |
183 | | -impl IrReg { |
184 | | - pub fn new(id: u32) -> Self { |
185 | | - Self(id) |
186 | | - } |
187 | | -} |
188 | | - |
189 | | -/// IR instruction (three-address code) |
190 | | -#[derive(Debug, Clone)] |
191 | | -pub enum IrInstruction { |
192 | | - // Constants |
193 | | - /// Load 64-bit integer constant into register |
194 | | - ConstI64(IrReg, i64), |
195 | | - /// Load 64-bit float constant (as bits) |
196 | | - ConstF64(IrReg, u64), |
197 | | - /// Load boolean constant |
198 | | - ConstBool(IrReg, bool), |
199 | | - /// Load null |
200 | | - ConstNull(IrReg), |
201 | | - /// Load string literal (index into string table) |
202 | | - ConstString(IrReg, usize), |
203 | | - |
204 | | - // Arithmetic (dst = src1 op src2) |
205 | | - Add(IrReg, IrReg, IrReg), |
206 | | - Sub(IrReg, IrReg, IrReg), |
207 | | - Mul(IrReg, IrReg, IrReg), |
208 | | - Div(IrReg, IrReg, IrReg), |
209 | | - Mod(IrReg, IrReg, IrReg), |
210 | | - |
211 | | - // Comparison (dst = src1 op src2, result is 0 or 1) |
212 | | - Eq(IrReg, IrReg, IrReg), |
213 | | - Ne(IrReg, IrReg, IrReg), |
214 | | - Lt(IrReg, IrReg, IrReg), |
215 | | - Le(IrReg, IrReg, IrReg), |
216 | | - Gt(IrReg, IrReg, IrReg), |
217 | | - Ge(IrReg, IrReg, IrReg), |
218 | | - |
219 | | - // Logical |
220 | | - And(IrReg, IrReg, IrReg), |
221 | | - Or(IrReg, IrReg, IrReg), |
222 | | - Not(IrReg, IrReg), |
223 | | - |
224 | | - // Unary |
225 | | - Neg(IrReg, IrReg), |
226 | | - |
227 | | - // Register operations |
228 | | - Move(IrReg, IrReg), |
229 | | - |
230 | | - // Control flow |
231 | | - Label(String), |
232 | | - Jump(String), |
233 | | - /// Jump if register is non-zero |
234 | | - JumpIf(IrReg, String), |
235 | | - /// Jump if register is zero |
236 | | - JumpIfNot(IrReg, String), |
237 | | - |
238 | | - // Function calls |
239 | | - /// Call function, store result in optional dst |
240 | | - Call(Option<IrReg>, String, Vec<IrReg>), |
241 | | - /// Return with optional value |
242 | | - Return(Option<IrReg>), |
243 | | - |
244 | | - // Memory operations |
245 | | - /// Load from memory: dst = *(base + offset) (64-bit) |
246 | | - Load(IrReg, IrReg, i64), |
247 | | - /// Load 1 byte (8-bit) from memory: dst = (u8)*(base + offset) |
248 | | - Load1(IrReg, IrReg, i64), |
249 | | - /// Load 2 bytes (16-bit) from memory: dst = (u16)*(base + offset) |
250 | | - Load2(IrReg, IrReg, i64), |
251 | | - /// Load 4 bytes (32-bit) from memory: dst = (u32)*(base + offset) |
252 | | - Load4(IrReg, IrReg, i64), |
253 | | - /// Store to memory: *(base + offset) = src (64-bit) |
254 | | - Store(IrReg, IrReg, i64), |
255 | | - /// Store 1 byte to memory: *(base + offset) = (u8)src |
256 | | - Store1(IrReg, IrReg, i64), |
257 | | - /// Store 2 bytes (16-bit) to memory: *(base + offset) = (u16)src |
258 | | - Store2(IrReg, IrReg, i64), |
259 | | - /// Store 4 bytes (32-bit) to memory: *(base + offset) = (u32)src |
260 | | - Store4(IrReg, IrReg, i64), |
261 | | - /// Allocate heap memory: dst = alloc(size) |
262 | | - Alloc(IrReg, IrReg), |
263 | | - |
264 | | - // Syscalls (Solana-specific) |
265 | | - /// dst = syscall(name, args...) |
266 | | - Syscall(Option<IrReg>, String, Vec<IrReg>), |
267 | | - |
268 | | - // Debug |
269 | | - /// Debug log (will be sol_log syscall): Log(ptr_reg, length) |
270 | | - Log(IrReg, usize), |
271 | | - |
272 | | - // No-op (placeholder, removed by optimizer) |
273 | | - Nop, |
274 | | -} |
275 | | - |
276 | | -/// Basic block in the control flow graph |
277 | | -#[derive(Debug, Clone)] |
278 | | -pub struct BasicBlock { |
279 | | - pub label: String, |
280 | | - pub instructions: Vec<IrInstruction>, |
281 | | - pub successors: Vec<String>, |
282 | | - pub predecessors: Vec<String>, |
283 | | -} |
284 | | - |
285 | | -impl BasicBlock { |
286 | | - pub fn new(label: &str) -> Self { |
287 | | - Self { |
288 | | - label: label.to_string(), |
289 | | - instructions: Vec::new(), |
290 | | - successors: Vec::new(), |
291 | | - predecessors: Vec::new(), |
292 | | - } |
293 | | - } |
294 | | -} |
295 | | - |
296 | | -/// Complete IR program |
297 | | -#[derive(Debug, Clone)] |
298 | | -pub struct IrProgram { |
299 | | - /// All instructions in linear order |
300 | | - pub instructions: Vec<IrInstruction>, |
301 | | - /// Basic blocks for CFG analysis |
302 | | - pub blocks: HashMap<String, BasicBlock>, |
303 | | - /// String table for string literals |
304 | | - pub string_table: Vec<String>, |
305 | | - /// Entry point label |
306 | | - pub entry_label: String, |
307 | | - /// Variable to register mapping |
308 | | - pub var_registers: HashMap<String, IrReg>, |
309 | | -} |
310 | | - |
311 | | -impl IrProgram { |
312 | | - pub fn new() -> Self { |
313 | | - Self { |
314 | | - instructions: Vec::new(), |
315 | | - blocks: HashMap::new(), |
316 | | - string_table: Vec::new(), |
317 | | - entry_label: "entry".to_string(), |
318 | | - var_registers: HashMap::new(), |
319 | | - } |
320 | | - } |
321 | | -} |
322 | | - |
323 | | -impl Default for IrProgram { |
324 | | - fn default() -> Self { |
325 | | - Self::new() |
326 | | - } |
327 | | -} |
| 41 | +use super::types::{PrimitiveType, FieldType, StructField, StructDef}; |
| 42 | +use super::instruction::{IrReg, IrInstruction}; |
| 43 | +use super::program::{BasicBlock, IrProgram}; |
328 | 44 |
|
329 | 45 | /// IR Generator - transforms typed AST to IR |
330 | 46 | pub struct IrGenerator { |
@@ -6079,23 +5795,3 @@ impl Default for IrGenerator { |
6079 | 5795 | Self::new() |
6080 | 5796 | } |
6081 | 5797 | } |
6082 | | - |
6083 | | -#[cfg(test)] |
6084 | | -mod tests { |
6085 | | - use super::*; |
6086 | | - |
6087 | | - #[test] |
6088 | | - fn test_ir_reg() { |
6089 | | - let r1 = IrReg::new(0); |
6090 | | - let r2 = IrReg::new(1); |
6091 | | - assert_ne!(r1, r2); |
6092 | | - } |
6093 | | - |
6094 | | - #[test] |
6095 | | - fn test_basic_block() { |
6096 | | - let mut block = BasicBlock::new("test"); |
6097 | | - block.instructions.push(IrInstruction::ConstI64(IrReg(0), 42)); |
6098 | | - assert_eq!(block.label, "test"); |
6099 | | - assert_eq!(block.instructions.len(), 1); |
6100 | | - } |
6101 | | -} |
0 commit comments