|
33 | 33 | following the VeloxVM bytecode format specification. |
34 | 34 | """ |
35 | 35 |
|
| 36 | +import io |
36 | 37 | import struct |
37 | 38 | from pathlib import Path |
38 | 39 | from typing import Union, List |
|
41 | 42 |
|
42 | 43 | def write_bytecode_file(path: Union[str, Path], bc: Bytecode): |
43 | 44 | """ |
44 | | - Write bytecode to a .vm file. |
45 | | -
|
46 | | - File format: |
47 | | - - Header (3 bytes): 0x5E, 0xB5, version |
| 45 | + Write bytecode to a .vm file in the v6 format. |
| 46 | +
|
| 47 | + File layout: |
| 48 | + - Header (9 fixed bytes + N-byte program name): |
| 49 | + 0x00 2 Magic (0x5E 0xB5) |
| 50 | + 0x02 2 Version (uint16 LE) |
| 51 | + 0x04 4 Total file length (uint32 LE) |
| 52 | + 0x08 1 Program name length N |
| 53 | + 0x09 N Program name (UTF-8, no terminator) |
48 | 54 | - String table: count (16-bit) + items (16-bit length + data) |
49 | 55 | - Symbol table: count (16-bit) + items (16-bit length + data) |
50 | 56 | - Expression table: count (16-bit) + items (16-bit length + data) |
51 | | - - Captures section: count (16-bit) + entries. Each entry is |
52 | | - (length:uint16, expr_id:uint16, symbol_id:uint16 ...). The entry |
53 | | - length is the byte count of the entry's payload, i.e. |
54 | | - 2 + 2 * len(symbol_ids). |
| 57 | + - Captures section: count (16-bit) + entries (length:uint16, |
| 58 | + expr_id:uint16, symbol_id:uint16 ...). |
55 | 59 |
|
56 | 60 | Args: |
57 | 61 | path: Output file path |
58 | 62 | bc: Bytecode container to write |
59 | 63 | """ |
60 | | - with open(path, 'wb') as f: |
61 | | - # Write header (3 bytes) |
62 | | - f.write(bytes([0x5E, 0xB5, bc.version])) |
63 | | - |
64 | | - # Write string table |
65 | | - _write_table(f, bc.symbol_table.strings, _encode_string_item) |
| 64 | + path = Path(path) |
| 65 | + prog_name = path.stem.encode('utf-8') |
| 66 | + if len(prog_name) > 255: |
| 67 | + raise ValueError(f"Program name too long ({len(prog_name)} bytes, " |
| 68 | + f"max 255): {path.stem}") |
| 69 | + |
| 70 | + # Build the body in memory so we can fill in the total-length header |
| 71 | + # field before any bytes hit disk. |
| 72 | + body = io.BytesIO() |
| 73 | + _write_table(body, bc.symbol_table.strings, _encode_string_item) |
| 74 | + _write_table(body, bc.symbol_table.symbols, _encode_string_item) |
| 75 | + _write_table(body, bc.expressions, _encode_bytes_item) |
| 76 | + _write_captures_section(body, bc.captures) |
| 77 | + body_bytes = body.getvalue() |
| 78 | + |
| 79 | + total_len = 9 + len(prog_name) + len(body_bytes) |
66 | 80 |
|
67 | | - # Write symbol table |
68 | | - _write_table(f, bc.symbol_table.symbols, _encode_string_item) |
69 | | - |
70 | | - # Write expression table |
71 | | - _write_table(f, bc.expressions, _encode_bytes_item) |
72 | | - |
73 | | - # Write captures section |
74 | | - _write_captures_section(f, bc.captures) |
| 81 | + with open(path, 'wb') as f: |
| 82 | + f.write(bytes([0x5E, 0xB5])) # Magic |
| 83 | + f.write(struct.pack('<H', bc.version)) # Version (uint16 LE) |
| 84 | + f.write(struct.pack('<I', total_len)) # Total length (uint32 LE) |
| 85 | + f.write(struct.pack('<B', len(prog_name))) # Name length |
| 86 | + f.write(prog_name) # Name bytes |
| 87 | + f.write(body_bytes) |
75 | 88 |
|
76 | 89 |
|
77 | 90 | def _write_captures_section(f, captures): |
@@ -150,12 +163,16 @@ def read_bytecode_file(path: Union[str, Path]) -> Bytecode: |
150 | 163 | has the authoritative bytecode loader. |
151 | 164 | """ |
152 | 165 | with open(path, 'rb') as f: |
153 | | - # Read header |
| 166 | + # Read v6 fixed prefix (9 bytes) |
154 | 167 | magic_bytes = f.read(2) |
155 | 168 | if magic_bytes != bytes([0x5E, 0xB5]): |
156 | 169 | raise ValueError(f"Invalid magic number: {magic_bytes.hex()}") |
157 | 170 |
|
158 | | - version = f.read(1)[0] |
| 171 | + version = struct.unpack('<H', f.read(2))[0] |
| 172 | + _total_len = struct.unpack('<I', f.read(4))[0] |
| 173 | + name_len = struct.unpack('<B', f.read(1))[0] |
| 174 | + if name_len > 0: |
| 175 | + _ = f.read(name_len) # consume program name |
159 | 176 |
|
160 | 177 | # Create bytecode container |
161 | 178 | bc = Bytecode() |
|
0 commit comments