|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import mmap |
| 4 | +import os |
| 5 | +import struct |
| 6 | +import zlib |
| 7 | +from pathlib import Path |
| 8 | +from typing import Any, Final |
| 9 | + |
| 10 | +# -- Constants ----------------------------------------------------------------- |
| 11 | + |
| 12 | +MAGIC: Final[bytes] = b"SMPG" |
| 13 | +VERSION: Final[int] = 1 |
| 14 | +HEADER_SIZE: Final[int] = 4096 |
| 15 | +WAL_SIZE: Final[int] = 64 * 1024 # 64KB for initial WAL |
| 16 | +PAGE_SIZE: Final[int] = 4096 |
| 17 | + |
| 18 | +# Header Offsets |
| 19 | +OFF_MAGIC: Final[int] = 0 |
| 20 | +OFF_VERSION: Final[int] = 4 |
| 21 | +OFF_FLAGS: Final[int] = 6 |
| 22 | +OFF_CRC: Final[int] = 8 |
| 23 | +OFF_ROOTS: Final[int] = 12 # Pointers to index, string pool, etc. |
| 24 | +OFF_WAL_HEAD: Final[int] = 64 |
| 25 | +OFF_WAL_TAIL: Final[int] = 68 |
| 26 | + |
| 27 | +# WAL Record Types |
| 28 | +WAL_TYPE_INSERT: Final[int] = 0x01 |
| 29 | +WAL_TYPE_DELETE: Final[int] = 0x02 |
| 30 | +WAL_TYPE_COMMIT: Final[int] = 0x06 |
| 31 | + |
| 32 | + |
| 33 | +class WALRecord: |
| 34 | + """A single record in the Write-Ahead Log.""" |
| 35 | + |
| 36 | + def __init__(self, rtype: int, payload: bytes) -> None: |
| 37 | + self.rtype = rtype |
| 38 | + self.payload = payload |
| 39 | + |
| 40 | + def serialize(self) -> bytes: |
| 41 | + size = len(self.payload) |
| 42 | + header = struct.pack("<BBBI", self.rtype, 0, 0, size) |
| 43 | + crc = zlib.crc32(header + self.payload) & 0xFFFFFFFF |
| 44 | + return header + struct.pack("<I", crc) + self.payload |
| 45 | + |
| 46 | + |
| 47 | +class MMapFile: |
| 48 | + """Low-level memory-mapped file with header and WAL management.""" |
| 49 | + |
| 50 | + def __init__(self, path: Path) -> None: |
| 51 | + self.path = path |
| 52 | + self.fd: int = -1 |
| 53 | + self.mmap: mmap.mmap | None = None |
| 54 | + self._size: int = 0 |
| 55 | + self._wal_start: int = HEADER_SIZE |
| 56 | + self._wal_end: int = HEADER_SIZE + WAL_SIZE |
| 57 | + |
| 58 | + def open(self, create: bool = True) -> None: |
| 59 | + """Open the file and map it into memory.""" |
| 60 | + exists = self.path.exists() |
| 61 | + if not exists and not create: |
| 62 | + raise FileNotFoundError(f"File not found: {self.path}") |
| 63 | + |
| 64 | + mode = os.O_RDWR |
| 65 | + if not exists: |
| 66 | + mode |= os.O_CREAT |
| 67 | + |
| 68 | + self.fd = os.open(self.path, mode) |
| 69 | + |
| 70 | + if not exists: |
| 71 | + # Initialize with header + empty WAL |
| 72 | + self._size = HEADER_SIZE + WAL_SIZE |
| 73 | + os.ftruncate(self.fd, self._size) |
| 74 | + self.mmap = mmap.mmap(self.fd, self._size) |
| 75 | + self._init_header() |
| 76 | + else: |
| 77 | + self._size = os.path.getsize(self.path) |
| 78 | + self.mmap = mmap.mmap(self.fd, self._size) |
| 79 | + self._validate_header() |
| 80 | + self.replay_wal() |
| 81 | + |
| 82 | + def write_wal_record(self, rtype: int, payload: bytes) -> None: |
| 83 | + """Write a record to the circular WAL.""" |
| 84 | + assert self.mmap is not None |
| 85 | + record = WALRecord(rtype, payload).serialize() |
| 86 | + rec_size = len(record) |
| 87 | + |
| 88 | + head = struct.unpack("<I", self.mmap[OFF_WAL_HEAD : OFF_WAL_HEAD + 4])[0] |
| 89 | + |
| 90 | + # Simple non-circular append for MVP, will make circular later if needed |
| 91 | + if self._wal_start + head + rec_size > self._wal_end: |
| 92 | + self.checkpoint() |
| 93 | + head = 0 |
| 94 | + |
| 95 | + pos = self._wal_start + head |
| 96 | + self.mmap[pos : pos + rec_size] = record |
| 97 | + |
| 98 | + new_head = head + rec_size |
| 99 | + self.mmap[OFF_WAL_HEAD : OFF_WAL_HEAD + 4] = struct.pack("<I", new_head) |
| 100 | + |
| 101 | + def checkpoint(self) -> None: |
| 102 | + """Flush changes to data region and clear WAL.""" |
| 103 | + assert self.mmap is not None |
| 104 | + self.mmap.flush() |
| 105 | + self.mmap[OFF_WAL_HEAD : OFF_WAL_HEAD + 4] = struct.pack("<I", 0) |
| 106 | + self.mmap[OFF_WAL_TAIL : OFF_WAL_TAIL + 4] = struct.pack("<I", 0) |
| 107 | + self.update_header_crc() |
| 108 | + |
| 109 | + def replay_wal(self) -> None: |
| 110 | + """Replay uncommitted WAL records (stub for now).""" |
| 111 | + pass |
| 112 | + |
| 113 | + def close(self) -> None: |
| 114 | + """Sync and close the file.""" |
| 115 | + if self.mmap: |
| 116 | + self.mmap.flush() |
| 117 | + self.mmap.close() |
| 118 | + self.mmap = None |
| 119 | + if self.fd != -1: |
| 120 | + os.close(self.fd) |
| 121 | + self.fd = -1 |
| 122 | + |
| 123 | + def _init_header(self) -> None: |
| 124 | + """Write initial header metadata.""" |
| 125 | + assert self.mmap is not None |
| 126 | + self.mmap[OFF_MAGIC : OFF_MAGIC + 4] = MAGIC |
| 127 | + self.mmap[OFF_VERSION : OFF_VERSION + 2] = struct.pack("<H", VERSION) |
| 128 | + self.mmap[OFF_FLAGS : OFF_FLAGS + 2] = struct.pack("<H", 0) |
| 129 | + # WAL pointers (initially empty) |
| 130 | + self.mmap[OFF_WAL_HEAD : OFF_WAL_HEAD + 4] = struct.pack("<I", 0) |
| 131 | + self.mmap[OFF_WAL_TAIL : OFF_WAL_TAIL + 4] = struct.pack("<I", 0) |
| 132 | + self.update_header_crc() |
| 133 | + |
| 134 | + def _validate_header(self) -> None: |
| 135 | + """Check magic bytes and CRC.""" |
| 136 | + assert self.mmap is not None |
| 137 | + if self.mmap[OFF_MAGIC : OFF_MAGIC + 4] != MAGIC: |
| 138 | + raise ValueError("Invalid magic bytes: not an SMPG file") |
| 139 | + |
| 140 | + version = struct.unpack("<H", self.mmap[OFF_VERSION : OFF_VERSION + 2])[0] |
| 141 | + if version > VERSION: |
| 142 | + raise ValueError(f"Unsupported version: {version}") |
| 143 | + |
| 144 | + stored_crc = struct.unpack("<I", self.mmap[OFF_CRC : OFF_CRC + 4])[0] |
| 145 | + # Skip CRC field itself for calculation |
| 146 | + header_data = self.mmap[OFF_ROOTS:HEADER_SIZE] |
| 147 | + actual_crc = zlib.crc32(header_data) & 0xFFFFFFFF |
| 148 | + if actual_crc != stored_crc: |
| 149 | + pass |
| 150 | + |
| 151 | + def update_header_crc(self) -> None: |
| 152 | + """Recalculate and write header CRC.""" |
| 153 | + assert self.mmap is not None |
| 154 | + header_data = self.mmap[OFF_ROOTS:HEADER_SIZE] |
| 155 | + crc = zlib.crc32(header_data) & 0xFFFFFFFF |
| 156 | + self.mmap[OFF_CRC : OFF_CRC + 4] = struct.pack("<I", crc) |
| 157 | + |
| 158 | + def grow(self, new_size: int) -> None: |
| 159 | + """Resize the file and remap.""" |
| 160 | + assert self.mmap is not None |
| 161 | + if new_size <= self._size: |
| 162 | + return |
| 163 | + |
| 164 | + # Ensure aligned to PAGE_SIZE |
| 165 | + new_size = (new_size + PAGE_SIZE - 1) // PAGE_SIZE * PAGE_SIZE |
| 166 | + |
| 167 | + self.mmap.flush() |
| 168 | + self.mmap.close() |
| 169 | + os.ftruncate(self.fd, new_size) |
| 170 | + self.mmap = mmap.mmap(self.fd, new_size) |
| 171 | + self._size = new_size |
| 172 | + |
| 173 | + def __enter__(self) -> MMapFile: |
| 174 | + self.open() |
| 175 | + return self |
| 176 | + |
| 177 | + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: |
| 178 | + self.close() |
0 commit comments