Skip to content

Commit ca6b2c3

Browse files
committed
Pre-compile safety-check regexes in ExecutionSafetyManager
1 parent 2a47494 commit ca6b2c3

2 files changed

Lines changed: 19 additions & 16 deletions

File tree

.jules/bolt.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## 2025-05-18 - Pre-compiled Regexes for Safety Checks
2+
**Learning:** Using `re.search(p, text)` within `any()` on raw string lists causes repeated regex compilation overhead in tight safety-check loops.
3+
**Action:** Pre-compile patterns into a tuple of `re.Pattern` objects using `tuple(re.compile(p) for p in ...)` as class attributes to significantly speed up assessment.

libs/safety_manager.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class ExecutionSafetyManager:
7474
# single-quoted raw strings r'...' so that ['"] is unambiguous.
7575
# Using r"...['\""]..." caused the bare trailing `"` to prematurely close
7676
# the outer double-quoted string → E999 SyntaxError at line 74.
77-
_WRITE_PATTERNS = [
77+
_WRITE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in [
7878
# open() explicit write modes — text and binary variants with optional '+'
7979
r'open\s*\([^)]*[\'"]w[btax]?\+?[\'"]',
8080
r'open\s*\([^)]*[\'"]a[btx]?\+?[\'"]',
@@ -99,7 +99,7 @@ class ExecutionSafetyManager:
9999
r'\.to_html\s*\([^)]*[\'"/]',
100100
r'\.to_excel\s*\([^)]*[\'"/]',
101101
r'\.to_parquet\s*\([^)]*[\'"/]',
102-
]
102+
])
103103

104104
# BUG FIX (test_blocks_write_function_with_absolute_path):
105105
# When code opens a file handle (any mode, including 'r') and then calls
@@ -108,19 +108,19 @@ class ExecutionSafetyManager:
108108
# _WRITE_PATTERNS so it is only evaluated in the combined absolute-path
109109
# write check — preventing false positives like sys.stdout.write() on
110110
# purely relative / non-file code paths.
111-
_WRITE_ON_HANDLE_PATTERNS = [
111+
_WRITE_ON_HANDLE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in [
112112
r"\.write\s*\(",
113-
]
113+
])
114114

115115
# Sensitive POSIX system path prefixes that are ALWAYS blocked (even for reads).
116-
_SENSITIVE_POSIX_PREFIXES = [
116+
_SENSITIVE_POSIX_PREFIXES = tuple(re.compile(p, re.IGNORECASE) for p in [
117117
r"/etc/\w+",
118118
r"/root/\w+",
119119
r"/proc/\w+",
120120
r"/sys/\w+",
121121
r"/dev/\w+",
122122
r"/boot/\w+",
123-
]
123+
])
124124

125125
# Known-dangerous call targets for .remove() / .unlink() / .rmtree().
126126
_DANGEROUS_ATTR_OWNERS = frozenset({"os", "shutil", "pathlib", "path"})
@@ -142,7 +142,7 @@ class ExecutionSafetyManager:
142142
# false-positives on SQL DELETE keyword used as a string literal in
143143
# data-analysis code (e.g. cursor.execute("DELETE FROM ...")).
144144
# =========================
145-
_DESTRUCTIVE_PATTERNS = [
145+
_DESTRUCTIVE_PATTERNS = tuple(re.compile(p) for p in [
146146
# Filesystem deletes
147147
r"\bunlink\b",
148148
r"\bunlinksync\b",
@@ -165,20 +165,20 @@ class ExecutionSafetyManager:
165165
r"\bdd\s+if=",
166166
r"\bformat\s+[a-z]:",
167167
r"\bdiskpart\b",
168-
]
168+
])
169169

170170
# =========================
171171
# BUG FIX #2: Shell patterns now use re.search() with \b word boundaries
172172
# instead of plain `in` substring matching. Previously "bash" matched
173173
# any identifier containing "bash" (e.g. "rehash", "bashful").
174174
# =========================
175-
_SHELL_PATTERNS = [
175+
_SHELL_PATTERNS = tuple(re.compile(p) for p in [
176176
r"\bsubprocess\b",
177177
r"\bos\.system\b",
178178
r"\bpowershell\b",
179179
r"\bcmd\.exe\b",
180180
r"\bbash\b",
181-
]
181+
])
182182

183183
def __init__(self, unsafe_mode: bool = False):
184184
self.unsafe_mode = unsafe_mode
@@ -228,7 +228,7 @@ def _has_write_operation(self, code: str) -> bool:
228228
"""Return True if *code* contains any write operation that must be
229229
blocked in SAFE mode.
230230
"""
231-
return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_PATTERNS)
231+
return any(p.search(code) for p in self._WRITE_PATTERNS)
232232

233233
# =========================
234234
# WRITE-ON-HANDLE DETECTION
@@ -240,7 +240,7 @@ def _has_write_on_handle(self, code: str) -> bool:
240240
"""Return True if *code* calls .write() on any object (handle check).
241241
This is intentionally only evaluated when an absolute path is present.
242242
"""
243-
return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_ON_HANDLE_PATTERNS)
243+
return any(p.search(code) for p in self._WRITE_ON_HANDLE_PATTERNS)
244244

245245
# =========================
246246
# HOST ABSOLUTE PATH CHECK
@@ -285,7 +285,7 @@ def _is_host_absolute_path(self, code: str) -> bool:
285285

286286
def _is_sensitive_posix_path(self, code: str) -> bool:
287287
"""Return True if *code* references a sensitive POSIX system path."""
288-
return any(re.search(p, code, re.IGNORECASE) for p in self._SENSITIVE_POSIX_PREFIXES)
288+
return any(p.search(code) for p in self._SENSITIVE_POSIX_PREFIXES)
289289

290290
# =========================
291291
# MAIN CHECK
@@ -326,15 +326,15 @@ def assess_execution(self, code: str, mode: str) -> Decision:
326326
# (shutdown, reboot, mkfs, dd, format, diskpart) in addition to
327327
# filesystem deletes.
328328
# =========================
329-
if any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS):
329+
if any(p.search(code_lower) for p in self._DESTRUCTIVE_PATTERNS):
330330
return Decision(False, ["Destructive operation blocked."])
331331

332332
# =========================
333333
# SHELL BLOCK
334334
# BUG FIX #2: Uses _SHELL_PATTERNS with \b word-boundary regex instead
335335
# of plain substring `in` check to avoid false positives.
336336
# =========================
337-
if any(re.search(p, code_lower) for p in self._SHELL_PATTERNS):
337+
if any(p.search(code_lower) for p in self._SHELL_PATTERNS):
338338
return Decision(False, ["Shell execution is blocked."])
339339

340340
# =========================
@@ -370,7 +370,7 @@ def is_dangerous_operation(self, code: str) -> bool:
370370
if not code or not code.strip():
371371
return False
372372
code_lower = code.lower()
373-
return any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS)
373+
return any(p.search(code_lower) for p in self._DESTRUCTIVE_PATTERNS)
374374

375375
# =========================
376376
# ARTIFACT EXPORT

0 commit comments

Comments
 (0)