diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..4dc88fe --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-24 - Optimizing Safety Manager Regex Compilation +**Learning:** Pre-compiling `re.Pattern` objects inside a list comprehension causes a `NameError` in class bodies under Python 3, so a generator cast to a `tuple` should be used instead. By bypassing `re.search`'s cache lookup, a significant speedup in repeated loop executions was achieved for evaluating safety checks. +**Action:** Pre-compile regular expressions as a tuple at the class level for any methods that perform multiple evaluations within `any()` loops. diff --git a/libs/safety_manager.py b/libs/safety_manager.py index 1da4b28..5e93e27 100644 --- a/libs/safety_manager.py +++ b/libs/safety_manager.py @@ -100,6 +100,7 @@ class ExecutionSafetyManager: r'\.to_excel\s*\([^)]*[\'"/]', r'\.to_parquet\s*\([^)]*[\'"/]', ] + _COMPILED_WRITE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in _WRITE_PATTERNS) # BUG FIX (test_blocks_write_function_with_absolute_path): # When code opens a file handle (any mode, including 'r') and then calls @@ -111,6 +112,7 @@ class ExecutionSafetyManager: _WRITE_ON_HANDLE_PATTERNS = [ r"\.write\s*\(", ] + _COMPILED_WRITE_ON_HANDLE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in _WRITE_ON_HANDLE_PATTERNS) # Sensitive POSIX system path prefixes that are ALWAYS blocked (even for reads). _SENSITIVE_POSIX_PREFIXES = [ @@ -121,6 +123,7 @@ class ExecutionSafetyManager: r"/dev/\w+", r"/boot/\w+", ] + _COMPILED_SENSITIVE_POSIX_PREFIXES = tuple(re.compile(p, re.IGNORECASE) for p in _SENSITIVE_POSIX_PREFIXES) # Known-dangerous call targets for .remove() / .unlink() / .rmtree(). _DANGEROUS_ATTR_OWNERS = frozenset({"os", "shutil", "pathlib", "path"}) @@ -166,6 +169,7 @@ class ExecutionSafetyManager: r"\bformat\s+[a-z]:", r"\bdiskpart\b", ] + _COMPILED_DESTRUCTIVE_PATTERNS = tuple(re.compile(p) for p in _DESTRUCTIVE_PATTERNS) # ========================= # BUG FIX #2: Shell patterns now use re.search() with \b word boundaries @@ -179,6 +183,7 @@ class ExecutionSafetyManager: r"\bcmd\.exe\b", r"\bbash\b", ] + _COMPILED_SHELL_PATTERNS = tuple(re.compile(p) for p in _SHELL_PATTERNS) def __init__(self, unsafe_mode: bool = False): self.unsafe_mode = unsafe_mode @@ -228,7 +233,7 @@ def _has_write_operation(self, code: str) -> bool: """Return True if *code* contains any write operation that must be blocked in SAFE mode. """ - return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_PATTERNS) + return any(p.search(code) for p in self._COMPILED_WRITE_PATTERNS) # ========================= # WRITE-ON-HANDLE DETECTION @@ -240,7 +245,7 @@ def _has_write_on_handle(self, code: str) -> bool: """Return True if *code* calls .write() on any object (handle check). This is intentionally only evaluated when an absolute path is present. """ - return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_ON_HANDLE_PATTERNS) + return any(p.search(code) for p in self._COMPILED_WRITE_ON_HANDLE_PATTERNS) # ========================= # HOST ABSOLUTE PATH CHECK @@ -285,7 +290,7 @@ def _is_host_absolute_path(self, code: str) -> bool: def _is_sensitive_posix_path(self, code: str) -> bool: """Return True if *code* references a sensitive POSIX system path.""" - return any(re.search(p, code, re.IGNORECASE) for p in self._SENSITIVE_POSIX_PREFIXES) + return any(p.search(code) for p in self._COMPILED_SENSITIVE_POSIX_PREFIXES) # ========================= # MAIN CHECK @@ -326,7 +331,7 @@ def assess_execution(self, code: str, mode: str) -> Decision: # (shutdown, reboot, mkfs, dd, format, diskpart) in addition to # filesystem deletes. # ========================= - if any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS): + if any(p.search(code_lower) for p in self._COMPILED_DESTRUCTIVE_PATTERNS): return Decision(False, ["Destructive operation blocked."]) # ========================= @@ -334,7 +339,7 @@ def assess_execution(self, code: str, mode: str) -> Decision: # BUG FIX #2: Uses _SHELL_PATTERNS with \b word-boundary regex instead # of plain substring `in` check to avoid false positives. # ========================= - if any(re.search(p, code_lower) for p in self._SHELL_PATTERNS): + if any(p.search(code_lower) for p in self._COMPILED_SHELL_PATTERNS): return Decision(False, ["Shell execution is blocked."]) # ========================= @@ -370,7 +375,7 @@ def is_dangerous_operation(self, code: str) -> bool: if not code or not code.strip(): return False code_lower = code.lower() - return any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS) + return any(p.search(code_lower) for p in self._COMPILED_DESTRUCTIVE_PATTERNS) # ========================= # ARTIFACT EXPORT