Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
168 commits
Select commit Hold shift + click to select a range
a46be7b
Add IR code generation to the indexer
pgoodman Mar 27, 2026
d5467f4
Register IR entities in MX_FOR_EACH_ENTITY_CATEGORY
pgoodman Mar 27, 2026
60bc017
Regenerate Python bindings for IR entity types
pgoodman Mar 27, 2026
ce47696
Update PythonBindings.py for IR entities and regenerate bindings
pgoodman Mar 27, 2026
d637c86
Wire IR entity getters through EntityProvider and fix linker errors
pgoodman Mar 27, 2026
6c29184
Handle PredefinedExpr and fix SQLite IR entity stubs
pgoodman Mar 28, 2026
540261e
Add INIT_LIST opcode for aggregate initialization
pgoodman Mar 28, 2026
af7ea53
Add bidirectional AST↔IR entity mapping
pgoodman Mar 28, 2026
6b7a55b
Handle CXXThisExpr and PredefinedExpr in IR generation
pgoodman Mar 28, 2026
b9d4b6f
Add METHOD_CALL, VIRTUAL_METHOD_CALL, NEW, DELETE opcodes
pgoodman Mar 28, 2026
c364b37
Distinguish PLACEMENT_NEW from NEW opcodes
pgoodman Mar 28, 2026
623a1b7
Handle CXXNullPtrLiteralExpr, CXXBoolLiteralExpr, ParenListExpr
pgoodman Mar 28, 2026
a6b3a1c
Distinguish known-unsupported exprs from truly unhandled ones
pgoodman Mar 28, 2026
7e45400
Remove dead block argument infrastructure
pgoodman Mar 28, 2026
7c5b1eb
Implement read-side IR API
pgoodman Mar 28, 2026
6e013b6
Wire up IR entity providers and implement IRFunction::from
pgoodman Apr 2, 2026
0961d7a
Merge GEP_INDEX into PTR_ADD and fix pointer arithmetic
pgoodman Apr 4, 2026
77e99dd
PTR_ADD carries element type and scale; fix pointer arithmetic
pgoodman Apr 4, 2026
fac9a2c
Remove raw field accessors from IRInstruction base class
pgoodman Apr 4, 2026
a2468c5
Remove operator bool from IR entity classes
pgoodman Apr 4, 2026
46ca1fc
Add field-level comments to IR.capnp
pgoodman Apr 4, 2026
1e63f19
Redesign IR serialization with pool-based compact encoding
pgoodman Apr 4, 2026
590c646
Strip incomplete C++ support from IR
pgoodman Apr 5, 2026
55793da
Add result types to pool, merge parent into pool, verify blocks
pgoodman Apr 5, 2026
2960b68
Move all allocas to the entry block
pgoodman Apr 5, 2026
a042fb8
Add use-def tracking: per-instruction users list
pgoodman Apr 5, 2026
9e9b479
Add IndexVersion with unique index ID and version number
pgoodman Apr 5, 2026
abbb990
Implement IR instruction class hierarchy for program analysis
pgoodman Apr 5, 2026
8f5021b
Fix switch case handling with range support and SwitchCaseValue
pgoodman Apr 5, 2026
1aec6a8
Add IRSwitchCase as a first-class entity type
pgoodman Apr 6, 2026
bca8dfc
Wire SwitchCase entities into switch instruction serialization
pgoodman Apr 6, 2026
8cb3adc
Add VA* instruction classes and make types non-optional
pgoodman Apr 6, 2026
b7c5afe
Add IMPLICIT_UNREACHABLE, FunctionKind, AST→IR entity mapping, and fixes
pgoodman Apr 7, 2026
1f9f10e
Add IRStructure entity type with StructureKind enum (Phase 2 foundation)
pgoodman Apr 7, 2026
f4b92a9
Wire structure generation and serialization into IR pipeline
pgoodman Apr 7, 2026
c6a8b92
Add ENTER_SCOPE/EXIT_SCOPE opcodes and nested SCOPE structures
pgoodman Apr 7, 2026
6169d49
Add block parent structure, scope object association, and body_scope …
pgoodman Apr 7, 2026
ce1245e
Emit SWITCH_CASE structures for each case/default in switch statements
pgoodman Apr 7, 2026
d5e1627
Update IR_TODOS with Phase 2 completion status
pgoodman Apr 7, 2026
d38775e
Emit EXIT_SCOPE on all non-local exit paths
pgoodman Apr 7, 2026
30ece4d
Add implicit SCOPE for for-loop init declarations
pgoodman Apr 7, 2026
c2d08ed
Add MEMSET/MEMCPY opcodes and lower memory intrinsics
pgoodman Apr 7, 2026
9f599f6
Implement Phase 4: global initializer functions
pgoodman Apr 7, 2026
ba5479b
Update IR_TODOS: all planned phases complete
pgoodman Apr 7, 2026
805045b
Add derived IRStructure classes with typed APIs
pgoodman Apr 7, 2026
c3e5cdb
Add FRAME block, PARAM_READ opcode, and function scope entry/exit
pgoodman Apr 7, 2026
e83236f
Add mx-interpret-ir: concrete IR interpreter for testing
pgoodman Apr 7, 2026
6491f75
Global initializer receives pointer to global via PARAM_READ
pgoodman Apr 7, 2026
ed84160
Add GLOBAL_ADDR and FUNC_ADDR opcodes, distinguish address-of kinds
pgoodman Apr 7, 2026
e369801
Skip ALLOCAs for static locals, generate GLOBAL_INITIALIZERs for them
pgoodman Apr 7, 2026
d16f60a
Decompose aggregate initializers into element-wise stores
pgoodman Apr 7, 2026
f43f0f9
Fix init list decomposition and scope/memory semantics
pgoodman Apr 7, 2026
01b8355
Replace SIZE_OF, INC_DEC, COMPOUND_ASSIGN, INIT_LIST with READ_MODIFY…
pgoodman Apr 7, 2026
29b8cb9
Add overflow opcodes, builtin intrinsics, and memmove/choose/generic …
pgoodman Apr 7, 2026
89dd104
Add MEMMOVE, BITWISE_OP, UNDEFINED, overflow RMW, and builtin lowerings
pgoodman Apr 7, 2026
efffb51
Add MULTIMEM, FLOAT_OP, atomics, dynamic alloca, and type query lower…
pgoodman Apr 7, 2026
68eecc4
Rename BITWISE_OP→BITWISE, FLOAT_OP→FLOAT; add size-specific string-t…
pgoodman Apr 7, 2026
e346eae
Merge CONST_INT/CONST_FLOAT/CONST_NULL into unified CONST opcode and …
pgoodman Apr 7, 2026
9be0398
Remove ADDRESS_OF opcode; ALLOCA instructions are now direct pointer …
pgoodman Apr 7, 2026
f0982e1
Add object_to_alloca_ map declaration to IRGenerator header
pgoodman Apr 7, 2026
5668a22
Add goto compensation blocks for scope transitions
pgoodman Apr 7, 2026
242b7a8
Add COMPENSATION block kind and rewrite docs/IR.md
pgoodman Apr 7, 2026
a77a64d
Add LOOP_PREHEADER block kind and reorganize BlockKind enum
pgoodman Apr 7, 2026
79cdf9f
Fix CFG predecessor/successor mismatch in goto compensation blocks
pgoodman Apr 7, 2026
1d25c5c
IRFunction::from() follows redeclarations to find the definition's IR
pgoodman Apr 8, 2026
323b72d
Decl::ir() follows redeclarations to find IR
pgoodman Apr 8, 2026
d594d9f
Add IRFunction::containing(Decl) and IRFunction::containing(Stmt)
pgoodman Apr 8, 2026
68140f1
Add IR gap list and loop prompt for gap-fixing work
pgoodman Apr 8, 2026
dd624e4
Unify LOAD, STORE, ATOMIC_LOAD, ATOMIC_STORE into single MEM opcode
pgoodman Apr 8, 2026
786c665
Add LAST_VALUE opcode for comma operator (Phase B)
pgoodman Apr 8, 2026
3458e9d
Fix pointer decrement bug in RMW emission (Phase C)
pgoodman Apr 8, 2026
f509c97
Implement IRObject::source_declaration() and type() (Phase D)
pgoodman Apr 8, 2026
eb651e3
Update IR_GAPS.md: Phases A-D done
pgoodman Apr 8, 2026
73d1668
Add EnterScopeInst and ExitScopeInst classes (Phase E)
pgoodman Apr 8, 2026
88bbbb9
Add IRBlock::parent_function() and IRFunction::containing(IRBlock/IRI…
pgoodman Apr 8, 2026
9ebc95f
Handle DesignatedInitExpr and ImplicitValueInitExpr (Phase G)
pgoodman Apr 8, 2026
0030152
Update IR_GAPS.md: Phases E-G done
pgoodman Apr 8, 2026
ae2d910
Add 27 missing float builtins to FloatOp enum (Phase H)
pgoodman Apr 8, 2026
d7322f1
Skip bit-field initialization in EmitInitializer (Phase I)
pgoodman Apr 8, 2026
8929450
Interpreter: scope tracking and string ops (Phase J)
pgoodman Apr 8, 2026
fda2950
Update IR_GAPS.md: all planned phases A-J complete
pgoodman Apr 8, 2026
ffe5964
Merge MEM and MULTIMEM opcodes into unified MEMORY opcode
pgoodman Apr 8, 2026
6a3f0bb
Merge MEM+MULTIMEM→MEMORY, add THREAD_LOCAL_PTR, fix pointer compound…
pgoodman Apr 8, 2026
fca53fe
Add size/align accessors to AllocaInst, object accessor to DynamicAll…
pgoodman Apr 8, 2026
2da0666
Add THREAD_LOCAL_INITIALIZER, fix GNU block expressions
pgoodman Apr 8, 2026
4dc3b1d
Rewrite docs/IR.md to reflect current state
pgoodman Apr 8, 2026
8b30633
Fix PTR_DIFF element_size and non-standard memory access sizes
pgoodman Apr 8, 2026
97c9d00
Add BIT_READ/BIT_WRITE for bit-fields, fix atomic RMW size_bytes
pgoodman Apr 8, 2026
4cb9867
Split BIT_READ/BIT_WRITE into endian-specific variants, store bit par…
pgoodman Apr 8, 2026
b164cd8
Remove ATOMIC_CMPXCHG opcode, add endianness to RMW int pool
pgoodman Apr 8, 2026
3cf230e
Add ReadModifyWriteInst::is_atomic(), handle _Atomic compound assign
pgoodman Apr 8, 2026
ee0e395
Emit BIT_READ for bit-fields, atomic loads/stores, switch case compen…
pgoodman Apr 8, 2026
c3cede7
Update docs/IR.md for bit-field access, _Atomic, compensation blocks
pgoodman Apr 8, 2026
c94917b
Implement all missing interpreter sub-opcodes for MEMORY and FLOAT
pgoodman Apr 8, 2026
d66f499
Add mx-print-ir tool and comprehensive IR test suite
pgoodman Apr 8, 2026
825fd5a
Wire up --entity_id in mx-print-ir using index.entity() lookup
pgoodman Apr 8, 2026
773edc8
Fix compile_commands.json to use full clang path
pgoodman Apr 8, 2026
b2e00c4
Add mx-workspace/ to gitignore
pgoodman Apr 8, 2026
858602f
Remove system header includes from IR tests
pgoodman Apr 8, 2026
ae23701
Fix type errors from typed enum fields in IR entity ID structs
pgoodman Apr 8, 2026
fb0980c
Fix MX_VISIT_ENTITY_ID macro redefinition warning, add typed enum fie…
pgoodman Apr 8, 2026
7f939c5
Fix DB corruption: separate try/catch in DatabaseWriterImpl destructor
pgoodman Apr 8, 2026
1f9c124
Remove try/catch from DatabaseWriterImpl destructor — let failures crash
pgoodman Apr 8, 2026
8b01a3b
Use ExclusiveTransaction for DB teardown, assert on failures
pgoodman Apr 9, 2026
4a0b315
Use sqlite3_close_v2 to handle deferred connection close
pgoodman Apr 9, 2026
09f3286
Revert DB destructor to original structure (no try/catch)
pgoodman Apr 9, 2026
f1fcb0c
Revert sqlite3_close_v2 back to sqlite3_close
pgoodman Apr 9, 2026
289ed0a
Isolate ExitRecords in try/catch to prevent checkpoint skip
pgoodman Apr 9, 2026
192c263
Isolate all teardown steps with try/catch
pgoodman Apr 9, 2026
363c522
Remove IRSwitchCase entity type; use IRSwitchCaseStructure instead
pgoodman Apr 9, 2026
5c2f905
Implement EXPRESSION_SCOPE calling convention, ALLOCA sub-opcodes, an…
pgoodman Apr 9, 2026
d3a12d5
Fix interpreter eval model: lazy GetValue + per-block cache clear + s…
pgoodman Apr 9, 2026
f1e8993
Fix switch break emission and interpreter sign-extension
pgoodman Apr 9, 2026
1d321d6
Add pointer shadow map to interpreter; fix switch break codegen
pgoodman Apr 9, 2026
95efbaf
Add test_conditional_exec and test_unsigned test files
pgoodman Apr 9, 2026
10b37f6
Fix interpreter CAST handling: proper SEXT/ZEXT/TRUNC with int64 model
pgoodman Apr 9, 2026
9417b53
Fix PTR_DIFF element size and serialize PTR_DIFF to int pool
pgoodman Apr 9, 2026
ffb1c5a
Fix unsigned CONST values, PTR_DIFF element size, CAST width handling
pgoodman Apr 9, 2026
355d249
Fix dynamic alloca, auto-grow memory, and restore values_.clear()
pgoodman Apr 9, 2026
f92ed95
Fix bit-field assignment codegen and auto-grow memory
pgoodman Apr 9, 2026
2b2748c
Fix float-to-int cast: reinterpret raw double bits from LOAD
pgoodman Apr 9, 2026
3487ba7
Add coerce_float() for float arithmetic with LOADed values
pgoodman Apr 9, 2026
59cacee
Fix pre/post decrement codegen, float coerce, and printer entity IDs
pgoodman Apr 9, 2026
ca9aa8e
Add float LOAD/STORE sub-opcodes and unsigned arithmetic opcodes
pgoodman Apr 9, 2026
282b63c
Add named instruction references: format_ref(), name(), format(), to_…
pgoodman Apr 9, 2026
79c2a70
Add PtrDiffInst class, fix VLA codegen, remove coerce_float kludge
pgoodman Apr 9, 2026
8719161
Fix entity ID packing overflow for new opcodes; fix BinaryInst::from …
pgoodman Apr 9, 2026
d777de1
Fix nested switch case collection for empty case fallthrough
pgoodman Apr 9, 2026
ac7d864
Revert nested case collection — causes indexer crash
pgoodman Apr 9, 2026
7cf26bb
Add expected IR annotations to test_arithmetic, test_pointers, test_c…
pgoodman Apr 9, 2026
fba1b8a
Width-aware unsigned arithmetic and remove hardcoded sizes
pgoodman Apr 9, 2026
aca9cae
Fix empty case fallthrough for nested CaseStmts
pgoodman Apr 9, 2026
096bdf5
Emit UNREACHABLE after noreturn function calls
pgoodman Apr 9, 2026
f34a76f
Add full expected IR output to all 23 test files
pgoodman Apr 9, 2026
f61f2e9
Fix UINT constant display and set both signed/unsigned values
pgoodman Apr 10, 2026
05ecda2
Fix missing float STORE sub-opcodes and regenerate IR annotations
pgoodman Apr 10, 2026
9a76cff
Regenerate IR annotations with float STORE sub-opcodes and correct UI…
pgoodman Apr 10, 2026
be021b0
Remove callee-side RETURN_SLOT object
pgoodman Apr 10, 2026
2929885
Remove callee-side RETURN_SLOT object, clean up interpreter allocation
pgoodman Apr 10, 2026
a059d50
Fix float LOAD for dereference, VLA detection with canonical type
pgoodman Apr 10, 2026
d8b75f6
Regenerate IR annotations from fresh index (RETURN_SLOT removed, floa…
pgoodman Apr 10, 2026
49d9c3b
Use IsVariablyModifiedType() for VLA ALLOCA/DYNAMIC detection
pgoodman Apr 10, 2026
45f0977
Move VLA ALLOCAs from FRAME to declaration point
pgoodman Apr 10, 2026
f4ac29c
Fix string literal MEMCPY size: use actual string length, not padded …
pgoodman Apr 10, 2026
5292a7a
Fix string literal sizes: use ByteLength() not token data size
pgoodman Apr 10, 2026
b92b48b
Fix string literal size: use CharacterByteWidth() for null terminator
pgoodman Apr 10, 2026
82c0ea4
Add wide/unicode string literal tests
pgoodman Apr 10, 2026
4334ac7
Add C23 test file and #embed gap
pgoodman Apr 10, 2026
2429e22
Add MakeIntConst/MakeUint64Const helpers, fix character literal uint_…
pgoodman Apr 10, 2026
b156f61
Use APInt for character literal sign/zero extension
pgoodman Apr 10, 2026
c15bd74
Document byte order assumption in IR serialization
pgoodman Apr 10, 2026
bfd93ac
Revert per-character string literal stores; use AST bytes instead
pgoodman Apr 10, 2026
3b60662
Document STRING_LITERAL object bytes: no trailing null in Bytes()
pgoodman Apr 10, 2026
852c680
Regenerate IR annotations with string literal size fixes
pgoodman Apr 10, 2026
8d177bc
Fix string literal init: always MEMCPY, never scalar STORE
pgoodman Apr 10, 2026
f22452a
Regenerate IR annotations after string literal MEMCPY fix
pgoodman Apr 10, 2026
a699cb1
Use MEMCPY for all aggregate type initialization, not just > 8 bytes
pgoodman Apr 10, 2026
ae4c652
IR codegen: fix type-safety, structural integrity, and dead-code hand…
pgoodman Apr 12, 2026
c7a7dd5
Fix nested switch: collect_cases and emit_case_bodies must not descen…
pgoodman Apr 12, 2026
c6265d7
Remove tracked database/workspace/config files and update .gitignore
pgoodman Apr 12, 2026
52fd45e
Stop swallowing database teardown errors and fix statement reset
pgoodman Apr 12, 2026
4c0bd8e
Width-specific opcodes for all integer, pointer, and atomic operations
pgoodman Apr 13, 2026
71ecd8f
Size BITWISE opcodes, move ABS out of BitwiseOp, remove EXPECT/ASSUME
pgoodman Apr 13, 2026
2dfc002
Size FloatOp sub-opcodes (_32/_64 for all 41 float builtins)
pgoodman Apr 13, 2026
bec076a
Rename BSWAP16/32/64 to BSWAP_16/_32/_64 for consistent naming
pgoodman Apr 13, 2026
a843bc2
Update IR documentation for sized opcodes, bitwise, and float changes
pgoodman Apr 13, 2026
7da098f
Add DCHECK for operand count uint8_t overflow in SerializeIR
pgoodman Apr 14, 2026
31434f9
Add DCHECK for opcode uint8_t overflow in SerializeIR
pgoodman Apr 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -577,3 +577,10 @@ builds/
install/

*.db
*.db-shm
*.db-wal
mx-workspace/
.claude/
.mcp.json
*_PROMPT.md
tests/InterpretIR/compile_commands.json
74 changes: 74 additions & 0 deletions IR_GAPS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# IR Gaps and Issues

## DONE
- **MEM opcode** — Unified MEMORY opcode with 70 sub-opcodes (load/store x atomic x LE/BE x 8/16/32/64, bulk memory, string ops, bit-field access, cmpxchg, CONSUME_VA_PARAM).
- **LAST_VALUE** — Comma operator: evaluates all operands, returns last.
- **Pointer decrement fix** — `--ptr` correctly emits CONST(-1) with PTR_ADD.
- **IRObject methods** — source_declaration() and type() implemented.
- **EnterScopeInst/ExitScopeInst** — Instruction classes with scope() accessor.
- **Convenience methods** — IRBlock::parent_function(), IRFunction::containing(IRBlock/IRInstruction).
- **DesignatedInitExpr** — Unwrap to Initializer(). ImplicitValueInitExpr as zero.
- **Float builtins** — 41 FloatOp sub-opcodes.
- **Bit-field init fix** — Uses BIT_WRITE with exact bit_offset/bit_width.
- **Interpreter improvements** — Scope tracking (poison on EXIT_SCOPE), all string/memory ops.
- **Pointer compound assign fix** — `ptr += n` uses PTR_ADD, not ADD.
- **PTR_DIFF element_size** — Extracted from pointee type.
- **_Atomic plain load/store** — Correctly uses atomic ops.
- **BIT_READ** — Emitted in EmitLoadFromLValue for bit-field reads.
- **Switch case compensation** — Compensation blocks for switch→case scope crossings.
- **CFG predecessor fix** — Correct predecessor list updates in compensation.
- **Database teardown** — Isolated try/catch per step, leaked statement detection.
- **Sized opcodes** — All integer, pointer, atomic, overflow, and bitwise opcodes are width-specific (_8/_16/_32/_64). Float opcodes have _32/_64 variants. FloatOp and BitwiseOp sub-opcodes carry width. No unsized arithmetic/pointer opcodes remain in the enum. Interpreter is width-correct at all sizes. OpCode is uint8_t (251 values, gap-packed).
- **ABS opcode** — Moved from BitwiseOp sub-opcode to sized top-level opcode (ABS_8/16/32/64).
- **EXPECT/ASSUME removed** — Compiler hints with no runtime semantics; not emitted to IR.
- **Interpreter precision** — Float _32 ops use float precision, _64 use double. All casts (SEXT, ZEXT, TRUNC, int↔float, float↔float, BITCAST) are width-correct. Unsigned operations use correct unsigned types at each width.
- **Entity ID type safety** — Typed enums in IRBlockId, IRInstructionId, IRStructureId.
- **IRSwitchCase → IRSwitchCaseStructure** — Switch cases are now IRStructure entities.
- **ALLOCA sub-opcodes** — AllocaKind: LOCAL, ARG, RETURN, DYNAMIC. DynamicAllocaInst derived from AllocaInst.
- **EXPRESSION_SCOPE** — New StructureKind for call argument/return allocas.
- **PARAM_PTR** — Renamed from PARAM_READ. Returns pointer to caller's argument alloca.
- **RETURN_ADDRESS** — Renamed from RETURN_PTR (for __builtin_return_address).
- **VA_PACK removed** — Variadic args are regular operands.
- **VA_ARG → CONSUME_VA_PARAM** — New MemOp sub-opcode.
- **ArrayToPointerDecay fix** — Uses EmitLValue (address), not EmitRValue (load).
- **MEMCPY for direct assignment** — `a = b` with lvalue RHS always uses MEMCPY.
- **Scalar size guards** — IsScalarSize() check before DetermineMemOp; MEMCPY fallback for non-1/2/4/8.
- **String literal init** — Non-power-of-2 sizes use MEMCPY.
- **source_statement() assertion fix** — Checks entity ID is StmtId before calling StmtFor.
- **IRObject::source_declaration() assertion fix** — Checks entity ID is DeclId before calling DeclFor.
- **Interpreter moved to bin/InterpretIR/** — Separate from Examples.
- **mx-print-ir human-readable output** — Uses EnumeratorName() for all opcodes, sub-opcodes, kinds.
- **MX_EXPORT on IR enum EnumeratorName** — All IR enum name functions exported from shared library.
- **GNU statement expression** — Already handled: SCOPE + emit children + last expr value.
- **Compound literal** — Already handled: ALLOCA + EmitInitializer + scope-tracked.
- **EXPRESSION_SCOPE at call sites** — Calls wrapped in EXPRESSION_SCOPE with ALLOCA/ARG for each argument and ALLOCA/RETURN for return value. Scope popped at full-expression boundary.
- **RETURN_PTR in callee** — Callee emits RETURN_PTR to get pointer to caller's return storage, stores return value into it before RET.
- **PARAM_PTR without local copy** — Parameters no longer copied into local allocas. PARAM_PTR directly gives pointer to caller's ARG alloca. DeclRefExpr resolves to PARAM_PTR.
- **CallInst::return_alloca()** — Returns the ALLOCA/RETURN instruction for the return value. `has_return_value()` for void check.
- **IRObject string literal bytes** — Not needed; content accessible via AST StringLiteral through source_declaration().

## Remaining Gaps

### Codegen
1. **C++ expressions** — Lambda, new/delete, this, constructors, destructors, etc. emit UNKNOWN. (C-only for now.)

### API
6. **No IRInstruction::result_type() on base class** — Must downcast to get result type. A base-class method would simplify interpreters/printers. Deferred.
7. **No Index::ir_functions() enumerator** — Can't iterate all IR functions from an Index.

### Interpreter
10. **Interpreter is monolithic** — ~1500-line switch in bin/InterpretIR. Plan exists to extract into lib/IR/Interpret/ with ValueFactory/Memory/Driver/Checker policy classes. See docs/InterpreterLibraryPlan.md.
11. **No multi-path exploration** — Single-path concrete execution only. Need COW memory + fork.
12. **No call inlining** — Interpreter doesn't step into callees.
13. **No external function modeling** — malloc/free/memcpy/printf etc. not modeled.

### Codegen / Types
14. **`__int128` / `_BitInt(N>64)`** — Sized integer opcodes only cover 8/16/32/64-bit widths. Wider types (e.g., `__int128`, `_BitInt(128)`) currently round down to `_64`. These could be decomposed into paired 64-bit operations.

### C23
15. **`#embed` directive** — C23's `#embed` for embedding binary data. Not handled.

### Documentation
15. **IR_GAPS.md** — This file (kept up to date).
16. **docs/IR.md** — Updated.
17. **docs/InterpreterLibraryPlan.md** — Written.
57 changes: 57 additions & 0 deletions IR_TODOS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# IR Implementation Progress

## Plan: Structural IR Entities (robust-plotting-rocket.md)

### Phase 1: FunctionKind + sourceDeclEntityId rename — COMPLETE

### Phase 2: IRStructure entity + scope tracking — COMPLETE
- [x] StructureKind enum (18 kinds), IRStructure entity, IRStructureId
- [x] Structure capnp schema, entity providers, pack/unpack
- [x] Structure generation (PushStructure/PopStructure) for all control flow
- [x] FUNCTION_SCOPE, nested SCOPE for CompoundStmt, implicit SCOPE for for-init
- [x] ENTER_SCOPE/EXIT_SCOPE opcodes on all paths (including break/continue/return/goto)
- [x] Block parentStructureId, scope object association via AssociateObjectWithScope
- [x] IRFunction::body_scope(), IRBlock::parent_structure() accessors
- [x] SWITCH_CASE structures for each case/default
- [ ] IRSwitchCase → IRStructure(SWITCH_CASE) full migration (deferred)
- [ ] Full Python bindings (stub exists, requires bootstrap regen)

### Phase 3: Control flow region structures — MERGED INTO PHASE 2

### Phase 4: Global initializer functions — COMPLETE
- [x] GenerateGlobalInit creates synthetic FunctionIR for globals with initializers
- [x] FunctionKind::GLOBAL_INITIALIZER, sourceDeclEntityId = VarDecl
- [x] Entry block with ADDRESS_OF → EmitRValue(init) → STORE → RET
- [x] VarDecl maps to its GLOBAL_INITIALIZER IRFunction

## Known Extensions — PARTIALLY COMPLETE
- [x] MEMSET (opcode 68): dest, byte_value, size — lowered from memset/builtin calls
- [x] MEMCPY (opcode 69): dest, src, size — lowered from memcpy/memmove/builtin calls
- [x] MemsetInst, MemcpyInst instruction class wrappers
- [ ] VAR_INIT block kind + structure kind (for variable initialization regions)

## Known Issues / Lies Remaining
1. IRSwitchCase still separate entity type (coexists with SWITCH_CASE structures)
2. string_bytes() missing on IRObject for string literals
3. Python bindings are stub only for IRStructure
4. Global initializer quality depends on EmitRValue handling of all init expressions
5. **Goto/Duff's device scope compensation**: `goto` that jumps into the middle
of a scope bypasses the normal ENTER_SCOPE path. An interpreter following
the goto would not see the scope entry for variables in that scope. Similarly,
Duff's device-style switch cases can interleave with loop bodies, creating
scope entry paths that don't go through ENTER_SCOPE. We need "compensation
blocks" — synthetic blocks inserted on goto/case edges that emit the
ENTER_SCOPE instructions for any scopes being entered. This is tricky because
the label/case might also be reachable from normal control flow (which already
has the ENTER_SCOPE), so we can't just add ENTER_SCOPE at the label — we need
it on the specific edge. Always using compensation blocks (even for the normal
case) would be the simplest correct solution.

## Decisions Made
- Phase 3 merged into Phase 2
- SWITCH_CASE structures coexist with IRSwitchCase for backward compat
- ENTER_SCOPE/EXIT_SCOPE carry IRStructureId extra in entity pool
- StructureKind embedded in IRStructureId (18 sub_kind offsets)
- Global initializer uses same EmitRValue path as function body codegen
- MEMSET/MEMCPY lowered from memset/memcpy/memmove and all __builtin_ variants
- Goto conservatively exits all scopes to FUNCTION_SCOPE
50 changes: 50 additions & 0 deletions bin/Bootstrap/PASTA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,53 @@ MethodListPtr CodeGenerator::RunOnClass(
make_parent("parent_statement", "MX_VISIT_STMT_LINK", "Stmt");
}

// `*::ir()` -- the IR entity corresponding to this AST entity.
// For FunctionDecl, returns IRFunction. For Stmt/Expr, returns IRInstruction.
// For CaseStmt/DefaultStmt, returns IRStructure (SWITCH_CASE). Etc.
if (class_name == "Decl" || class_name == "Stmt") {
auto sd = storage.AddMethod("UInt64"); // IR entity ID (any IR kind).
auto [cd_getter_name, cd_setter_name, cd_init_name] = NamesFor(sd);

class_os
<< " std::optional<VariantEntity> ir(void) const;\n";

serialize_inc_os
<< " MX_VISIT_ENTITY_ID(" << class_name
<< ", ir, " << sd << ")\n";

serialize_cpp_os
<< " b." << cd_setter_name << "(es.IREntityId(e));\n";

lib_cpp_os
<< "std::optional<VariantEntity> " << class_name << "::ir(void) const {\n"
<< " auto raw = impl->reader." << cd_getter_name << "();\n"
<< " if (raw == kInvalidEntityId) return std::nullopt;\n"
<< " auto vid = EntityId(raw).Unpack();\n"
<< " if (auto *p = std::get_if<IRFunctionId>(&vid)) {\n"
<< " if (auto ptr = impl->ep->IRFunctionFor(impl->ep, raw)) {\n"
<< " return IRFunction(std::move(ptr));\n"
<< " }\n"
<< " } else if (auto *p = std::get_if<IRBlockId>(&vid)) {\n"
<< " if (auto ptr = impl->ep->IRBlockFor(impl->ep, raw)) {\n"
<< " return IRBlock(std::move(ptr));\n"
<< " }\n"
<< " } else if (auto *p = std::get_if<IRInstructionId>(&vid)) {\n"
<< " if (auto ptr = impl->ep->IRInstructionFor(impl->ep, raw)) {\n"
<< " return IRInstruction(std::move(ptr));\n"
<< " }\n"
<< " } else if (auto *p = std::get_if<IRObjectId>(&vid)) {\n"
<< " if (auto ptr = impl->ep->IRObjectFor(impl->ep, raw)) {\n"
<< " return IRObject(std::move(ptr));\n"
<< " }\n"
<< " } else if (auto *p = std::get_if<IRStructureId>(&vid)) {\n"
<< " if (auto ptr = impl->ep->IRStructureFor(impl->ep, raw)) {\n"
<< " return IRStructure(std::move(ptr));\n"
<< " }\n"
<< " }\n"
<< " return std::nullopt;\n"
<< "}\n\n";
}

// `Decl::is_definition`
if (class_name == "Decl") {
const auto def = storage.AddMethod("Bool");
Expand Down Expand Up @@ -4089,6 +4136,9 @@ void CodeGenerator::RunOnClassHierarchies(void) {
<< "#ifndef MX_VISIT_BASE\n"
<< "# define MX_VISIT_BASE(...)\n"
<< "#endif\n"
<< "#ifndef MX_VISIT_ENTITY_ID\n"
<< "# define MX_VISIT_ENTITY_ID(...)\n"
<< "#endif\n"
<< "#ifndef MX_VISIT_DECL_LINK\n"
<< "# define MX_VISIT_DECL_LINK(...)\n"
<< "#endif\n"
Expand Down
8 changes: 8 additions & 0 deletions bin/Bootstrap/Python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,13 @@
#include <multiplier/Frontend.h>
#include <multiplier/Index.h>

#include <multiplier/IR/Function.h>
#include <multiplier/IR/Block.h>
#include <multiplier/IR/Instruction.h>
#include <multiplier/IR/Object.h>
#include <multiplier/IR/OpCode.h>
#include <multiplier/IR/ObjectKind.h>
#include <multiplier/IR/BlockKind.h>

#include <multiplier/Re2.h>
#include <multiplier/Reference.h>
12 changes: 12 additions & 0 deletions bin/Bootstrap/PythonBindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,13 @@ class UserToken;
#include <multiplier/Fragment.h>
#include <multiplier/Frontend.h>
#include <multiplier/Index.h>
#include <multiplier/IR/Function.h>
#include <multiplier/IR/Block.h>
#include <multiplier/IR/Instruction.h>
#include <multiplier/IR/Object.h>
#include <multiplier/IR/OpCode.h>
#include <multiplier/IR/ObjectKind.h>
#include <multiplier/IR/BlockKind.h>
#include <multiplier/Re2.h>
#include <multiplier/Reference.h>

Expand Down Expand Up @@ -1794,6 +1801,11 @@ def wrap(schemas: Iterable[Schema], renamer: Renamer):
"TemplateParameterList",
"Macro",
"Operation",
"IRFunction",
"IRBlock",
"IRInstruction",
"IRObject",
"IRStructure",
)

VariantEntitySchema = make_schema_class("VariantEntity", "Entity", Schema)
Expand Down
1 change: 1 addition & 0 deletions bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@

add_subdirectory("Examples")
add_subdirectory("Index")
add_subdirectory("InterpretIR")
1 change: 1 addition & 0 deletions bin/Examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,4 @@ define_example("mx-print-type-token-graph" "PrintTypeTokenGraph.cpp")
define_example("mx-find-linked-structures" "FindLinkedStructures.cpp")
define_example("mx-list-declarations-overlapping-macro-expansion"
"ListDeclOverlappingMacroExpansions.cpp")
define_example("mx-print-ir" "PrintIR.cpp")
57 changes: 31 additions & 26 deletions bin/Examples/Harness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

DEFINE_uint64(entity_id, mx::kInvalidEntityId, "ID of the entity to harness");
DEFINE_string(entity_name, "", "Name of the entity to harness");
DEFINE_bool(deduplicate, true, "Deduplicate like names");

using SeenSet = std::set<mx::PackedFragmentId>;
using WorkList = std::vector<mx::PackedFragmentId>;
Expand Down Expand Up @@ -566,33 +567,35 @@ int main(int argc, char *argv[]) {
std::unordered_map<std::string, mx::RawEntityId> canon_id;

// Figure out what top-level entities need to be renamed.
for (mx::PackedFragmentId frag_id : frags) {
for (mx::Decl tld : index.fragment(frag_id)->top_level_declarations()) {
tld = tld.canonical_declaration();
std::optional<mx::NamedDecl> nd = mx::NamedDecl::from(tld);
if (!nd) {
continue;
}
if (FLAGS_deduplicate) {
for (mx::PackedFragmentId frag_id : frags) {
for (mx::Decl tld : index.fragment(frag_id)->top_level_declarations()) {
tld = tld.canonical_declaration();
std::optional<mx::NamedDecl> nd = mx::NamedDecl::from(tld);
if (!nd) {
continue;
}

std::string_view name_view = nd->name();
if (name_view.empty()) {
continue;
}
std::string_view name_view = nd->name();
if (name_view.empty()) {
continue;
}

std::string name(name_view.data(), name_view.size());
mx::RawEntityId eid = nd->id().Pack();
mx::RawEntityId stored_eid = canon_id.emplace(name, eid).first->second;
if (eid != stored_eid) {
std::cout << "// Renaming " << name_view << '\n';
needs_rename.insert(eid);
needs_rename.insert(stored_eid);
std::string name(name_view.data(), name_view.size());
mx::RawEntityId eid = nd->id().Pack();
mx::RawEntityId stored_eid = canon_id.emplace(name, eid).first->second;
if (eid != stored_eid) {
std::cout << "// Renaming " << name_view << '\n';
needs_rename.insert(eid);
needs_rename.insert(stored_eid);
}
}
}
}

// Make sure our original entity isn't subject to renaming.
for (mx::Decl redecl : entity->redeclarations()) {
needs_rename.erase(redecl.id().Pack());
// Make sure our original entity isn't subject to renaming.
for (mx::Decl redecl : entity->redeclarations()) {
needs_rename.erase(redecl.id().Pack());
}
}

std::cerr
Expand Down Expand Up @@ -642,9 +645,11 @@ int main(int argc, char *argv[]) {
}

std::cout << tag->name();
if (mx::RawEntityId eid = tld.canonical_declaration().id().Pack();
needs_rename.contains(eid)) {
std::cout << '_' << eid;
if (FLAGS_deduplicate) {
if (mx::RawEntityId eid = tld.canonical_declaration().id().Pack();
needs_rename.contains(eid)) {
std::cout << '_' << eid;
}
}
std::cout << ";\n";
}
Expand Down Expand Up @@ -685,7 +690,7 @@ int main(int argc, char *argv[]) {

mx::Decl decl = std::get<mx::Decl>(ent).canonical_declaration();
mx::RawEntityId eid = decl.id().Pack();
if (needs_rename.contains(eid)) {
if (FLAGS_deduplicate && needs_rename.contains(eid)) {
std::cout << '_' << eid;
}
}
Expand Down
Loading
Loading