-
Notifications
You must be signed in to change notification settings - Fork 34
Expand file tree
/
Copy pathIRGen.h
More file actions
362 lines (297 loc) · 13.4 KB
/
IRGen.h
File metadata and controls
362 lines (297 loc) · 13.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// Copyright (c) 2024-present, Trail of Bits, Inc.
//
// This source code is licensed in accordance with the terms specified in
// the LICENSE file found in the root directory of this source tree.
#pragma once
#include <cstdint>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <multiplier/Types.h>
#include <multiplier/IR/OpCode.h>
#include <multiplier/IR/BlockKind.h>
#include <multiplier/IR/ObjectKind.h>
#include <multiplier/IR/FunctionKind.h>
#include <multiplier/IR/StructureKind.h>
namespace clang {
class ASTContext;
} // namespace clang
namespace pasta {
class AST;
class Decl;
class Expr;
class FunctionDecl;
class Stmt;
class Type;
class VarDecl;
} // namespace pasta
namespace indexer {
class EntityMapper;
namespace ir {
// ---------------------------------------------------------------------------
// In-memory IR data structures (populated during generation, then serialized)
// ---------------------------------------------------------------------------
struct BranchTargetIR {
uint32_t block_index{0};
};
struct InstructionIR {
mx::ir::OpCode opcode{mx::ir::OpCode::UNREACHABLE};
mx::RawEntityId source_entity_id{mx::kInvalidEntityId};
// Operand instruction indices (into FunctionIR::instructions).
std::vector<uint32_t> operand_indices;
// Parent instruction index (UINT32_MAX for top-level roots).
// Roots have their parent block determined by which BlockIR contains them.
uint32_t parent_instruction_index{UINT32_MAX};
// Block this instruction was emitted into (set by EmitInstruction).
uint32_t parent_block_index{UINT32_MAX};
// True if this instruction was emitted via EmitTopLevel (is a block root).
// SetOperandParents will not reparent root instructions.
bool is_root{false};
// OpCode-specific fields.
uint32_t object_index{0};
mx::RawEntityId type_entity_id{mx::kInvalidEntityId};
mx::RawEntityId target_entity_id{mx::kInvalidEntityId};
int64_t int_value{0}; // sign-extended
uint64_t uint_value{0}; // zero-extended
double float_value{0.0};
uint8_t width{0};
uint32_t size_bytes{0};
uint8_t flags{0};
mx::ir::OpCode compound_op{mx::ir::OpCode::ADD_64};
uint8_t alloca_kind{0}; // AllocaKind sub-opcode for ALLOCA instructions
uint8_t const_op{0}; // ConstOp sub-opcode for CONST instructions
uint8_t cast_op{0}; // CastOp sub-opcode for CAST instructions
uint8_t bitwise_op{0}; // BitwiseOp sub-opcode for BITWISE instructions
uint8_t mem_op{0}; // MemOp sub-opcode for MEMORY instructions
uint8_t float_op{0}; // FloatOp sub-opcode for FLOAT instructions
bool is_big_endian{false}; // target endianness (for READ_MODIFY_WRITE)
uint32_t bit_offset{0}; // BIT_READ/BIT_WRITE: bit offset into the object
uint32_t bit_width{0}; // BIT_READ/BIT_WRITE: number of bits
// Structure index for ENTER_SCOPE/EXIT_SCOPE (into FunctionIR::structures).
uint32_t structure_index{UINT32_MAX};
// Return alloca instruction index for CALL (UINT32_MAX if void).
uint32_t return_alloca_index{UINT32_MAX};
// Terminator data.
std::vector<BranchTargetIR> branch_targets;
// Switch cases: one per case/default in a switch statement.
// Each maps to an IRStructure entity (kind SWITCH_CASE) in the serialized output.
struct SwitchCaseIR {
int64_t low{0};
int64_t high{0};
uint32_t block_index{0};
uint32_t structure_index{UINT32_MAX}; // Index into func.structures
mx::RawEntityId source_entity_id{mx::kInvalidEntityId}; // CaseStmt/DefaultStmt
bool is_default{false};
};
std::vector<SwitchCaseIR> switch_cases;
};
struct BlockIR {
mx::ir::BlockKind kind{mx::ir::BlockKind::GENERIC};
uint32_t parent_structure_index{UINT32_MAX};
// Top-level instruction indices (roots of expression trees).
std::vector<uint32_t> instruction_indices;
// Block indices.
std::vector<uint32_t> successor_indices;
std::vector<uint32_t> predecessor_indices;
// Dominator tree.
std::vector<uint32_t> dominator_indices;
std::vector<uint32_t> post_dominator_indices;
uint32_t immediate_dominator{UINT32_MAX};
uint32_t immediate_post_dominator{UINT32_MAX};
};
struct ObjectIR {
mx::RawEntityId source_decl_id{mx::kInvalidEntityId};
mx::RawEntityId type_entity_id{mx::kInvalidEntityId};
uint32_t size_bytes{0};
uint32_t align_bytes{1};
uint32_t frame_offset{0}; // Offset within the stack frame.
mx::ir::ObjectKind kind{mx::ir::ObjectKind::LOCAL};
};
struct StructureIR {
mx::ir::StructureKind kind{mx::ir::StructureKind::SCOPE};
mx::RawEntityId source_entity_id{mx::kInvalidEntityId};
uint32_t parent_structure_index{UINT32_MAX}; // index into FunctionIR::structures
// Children: interleaved structure and block indices (in source order).
// Each entry is either a structure index (with is_structure=true) or block index.
struct ChildRef {
uint32_t index;
bool is_structure;
};
std::vector<ChildRef> children;
// Object indices for scope kinds (ALLOCAs declared in this scope).
std::vector<uint32_t> object_indices;
// Switch case data (for SWITCH_CASE kind).
int64_t case_low{0};
int64_t case_high{0};
bool is_default{false};
};
struct FunctionIR {
mx::RawEntityId func_decl_entity_id{mx::kInvalidEntityId};
mx::ir::FunctionKind kind{mx::ir::FunctionKind::NORMAL};
std::vector<InstructionIR> instructions;
std::vector<BlockIR> blocks;
std::vector<ObjectIR> objects;
std::vector<StructureIR> structures;
uint32_t entry_block_index{0};
uint32_t body_scope_index{UINT32_MAX}; // FUNCTION_SCOPE structure
std::vector<uint32_t> rpo_block_order;
// Stack frame layout (computed after all objects are collected).
uint32_t frame_size_bytes{0}; // Total fixed frame size.
bool has_dynamic_allocas{false}; // True if frame needs to grow at runtime.
};
// ---------------------------------------------------------------------------
// IRGenerator: builds IR from a pasta::FunctionDecl by walking the PASTA AST
// directly. Builds a statement-level CFG (no Clang CFG dependency).
// Expressions are kept as nested instruction trees within blocks.
// ---------------------------------------------------------------------------
class IRGenerator {
public:
IRGenerator(const pasta::AST &ast, const EntityMapper &em);
std::optional<FunctionIR> Generate(const pasta::FunctionDecl &func);
std::optional<FunctionIR> GenerateGlobalInit(const pasta::VarDecl &var);
private:
const pasta::AST &ast_;
const EntityMapper &em_;
clang::ASTContext &ctx_;
// Cached entity IDs for built-in types (size_t, ptrdiff_t).
mx::RawEntityId size_type_eid_{mx::kInvalidEntityId};
uint8_t size_type_width_{64};
mx::RawEntityId ptrdiff_type_eid_{mx::kInvalidEntityId};
uint8_t ptrdiff_type_width_{64};
FunctionIR func_;
uint32_t current_block_index_{0};
uint32_t next_obj_index_{0};
std::unordered_map<mx::RawEntityId, uint32_t> entity_to_object_;
std::unordered_map<uint32_t, uint32_t> object_to_alloca_; // obj_idx → alloca inst idx
std::unordered_set<mx::RawEntityId> address_taken_;
// Break/continue targets: maps source entity ID of the enclosing
// loop/switch to its exit (break) and continue-target blocks.
struct LoopContext {
uint32_t break_block; // where break goes
uint32_t continue_block; // where continue goes (loops only)
uint32_t structure_index; // structure index of the loop/switch
bool is_switch; // true = switch (break goes here, continue goes to enclosing loop)
};
std::vector<LoopContext> loop_stack_;
// Structure stack for nesting.
uint32_t current_structure_index_{UINT32_MAX};
std::vector<uint32_t> structure_stack_;
// Goto label targets: maps label name to block index.
// Forward gotos create the block on first reference.
std::unordered_map<std::string, uint32_t> label_blocks_;
// Maps case/default statement entity IDs to their block indices,
// so fallthrough can branch to the right block.
std::unordered_map<mx::RawEntityId, uint32_t> case_blocks_;
// Goto compensation: records pending gotos that need scope transitions.
struct PendingGoto {
uint32_t goto_inst_idx; // index of the GOTO instruction
uint32_t source_block_idx; // block containing the goto
uint32_t target_block_idx; // label block
uint32_t source_structure_idx; // structure at goto site
};
std::vector<PendingGoto> pending_gotos_;
// Maps label block index to the structure index active when label was emitted.
std::unordered_map<uint32_t, uint32_t> label_structure_;
// Expression scope for call argument/return allocas.
// UINT32_MAX means no active expression scope.
uint32_t expression_scope_index_{UINT32_MAX};
// --- Structure management ---
uint32_t PushStructure(mx::ir::StructureKind kind,
mx::RawEntityId source_eid = mx::kInvalidEntityId);
void PopStructure();
void AssociateBlockWithStructure(uint32_t block_idx);
void AssociateObjectWithScope(uint32_t obj_idx);
// Emit EXIT_SCOPE for all enclosing scopes up to (but not including)
// the scope at stop_structure_index. Used by break/continue/return/goto.
void EmitScopeExits(uint32_t stop_structure_index);
// --- Block management ---
uint32_t NewBlock(mx::ir::BlockKind kind = mx::ir::BlockKind::GENERIC);
void SwitchToBlock(uint32_t block_idx);
void AddEdge(uint32_t from, uint32_t to);
// --- Branch helpers (reduce boilerplate) ---
// EmitBranch emits an IMPLICIT_GOTO (structural edge).
uint32_t EmitBranch(uint32_t target_block,
mx::RawEntityId source_eid = mx::kInvalidEntityId);
uint32_t EmitBranchWithOpCode(mx::ir::OpCode opcode, uint32_t target_block,
mx::RawEntityId source_eid = mx::kInvalidEntityId);
uint32_t EmitCondBranch(uint32_t cond_idx, uint32_t true_block,
uint32_t false_block,
mx::RawEntityId source_eid = mx::kInvalidEntityId);
// --- Load from lvalue helper ---
uint32_t EmitLoadFromLValue(const pasta::Expr &e);
// --- Object management ---
uint32_t MakeObject(mx::ir::ObjectKind kind,
const pasta::Decl *decl = nullptr);
uint32_t GetOrMakeObject(const pasta::Decl &decl);
// --- Instruction emission ---
// Emits an instruction into the flat list. Does NOT add it as a top-level
// instruction in the current block -- that happens only for statement-level
// roots. Sub-expressions are linked via operand_indices and
// parent_instruction_index.
uint32_t EmitInstruction(InstructionIR inst);
// Emit a top-level instruction (added to the current block's root list).
uint32_t EmitTopLevel(InstructionIR inst);
// Set parent_instruction_index on all operands of inst at `inst_idx`.
void SetOperandParents(uint32_t inst_idx);
// --- Statement emission (builds the CFG) ---
void EmitBody(const pasta::Stmt &body);
void EmitStmt(const pasta::Stmt &s);
void EmitIfStmt(const pasta::Stmt &s);
void EmitWhileStmt(const pasta::Stmt &s);
void EmitDoStmt(const pasta::Stmt &s);
void EmitForStmt(const pasta::Stmt &s);
void EmitSwitchStmt(const pasta::Stmt &s);
void EmitReturnStmt(const pasta::Stmt &s);
void EmitDeclStmt(const pasta::Stmt &s);
void EmitBreakStmt(const pasta::Stmt &s);
void EmitContinueStmt(const pasta::Stmt &s);
void EmitGotoStmt(const pasta::Stmt &s);
void EmitLabelStmt(const pasta::Stmt &s);
// --- Expression scope for calls ---
// Ensures an EXPRESSION_SCOPE exists for the current full-expression.
// Returns the structure index. Pushes one if not already active.
uint32_t EnsureExpressionScope(mx::RawEntityId source_eid);
// Pop the expression scope (called at top-level expression boundaries).
void PopExpressionScope();
// Check if an expression contains any function calls.
bool ContainsCall(const pasta::Expr &e);
// True if the current block already has a terminator.
bool CurrentBlockTerminated() const;
// Switch to a new dead block (after break/return/goto/continue).
void SwitchToDeadBlock();
// --- Initializer emission (decomposes aggregates into element stores) ---
void EmitInitializer(uint32_t dest_addr_idx, const pasta::Expr &init,
mx::RawEntityId source_eid);
// --- Expression emission (builds nested instruction trees) ---
uint32_t EmitRValue(const pasta::Expr &e);
uint32_t EmitLValue(const pasta::Expr &e);
// --- Conditionally-executed flag propagation ---
void MarkConditionallyExecuted(uint32_t inst_idx);
// --- Entity ID helpers ---
mx::RawEntityId EntityIdOf(const pasta::Stmt &s);
mx::RawEntityId EntityIdOf(const pasta::Decl &d);
mx::RawEntityId TypeEntityIdOf(const pasta::Type &t);
// --- Type helpers ---
std::optional<uint32_t> TypeSizeBytes(const pasta::Type &t);
std::optional<uint32_t> TypeAlignBytes(const pasta::Type &t);
// Target's size_t / pointer width in bits.
uint8_t SizeTypeWidth() const;
// Emit a CONST instruction holding a size_t value (target-width unsigned).
uint32_t EmitSizeConst(uint64_t val,
mx::RawEntityId source_eid = mx::kInvalidEntityId);
mx::ir::MemOp DetermineMemOp(bool is_store, bool is_atomic,
unsigned size_bytes, bool is_float = false);
// --- Pre-scan ---
void ScanAddressTaken(const pasta::Stmt &s);
void EmitEntryBlockAllocas(const pasta::Stmt &body);
// --- Post-processing ---
void InsertGotoCompensationBlocks();
void ComputeDominators();
void ComputeRPO();
void VerifyBlocks();
void ComputeFrameLayout();
};
} // namespace ir
} // namespace indexer