Skip to content

Commit fc58e3f

Browse files
committed
Performance: Pre-compile multiple regular expressions in ExecutionSafetyManager
1 parent 2a47494 commit fc58e3f

2 files changed

Lines changed: 49 additions & 27 deletions

File tree

.jules/bolt.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## 2025-02-23 - [Pre-compile Regex in Tight Loops]
2+
**Learning:** In Python, directly executing pre-compiled `re.Pattern` objects (e.g., `p.search(text)`) within tight safety-critical loops avoids regex cache lookup overhead and speeds up the application. Furthermore, list comprehensions at the class-body level don't have access to the class scope, but converting a generator to a tuple (e.g., `tuple(re.compile(p) for p in _PATTERNS)`) safely works around this limitation.
3+
**Action:** Pre-compile repeated regex checks into `re.Pattern` objects directly as class attributes, and use a generator within a `tuple()` when initializing them from other class variables.

libs/safety_manager.py

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,39 @@ class ExecutionSafetyManager:
180180
r"\bbash\b",
181181
]
182182

183+
# =========================
184+
# PERFORMANCE OPTIMIZATION: Pre-compile regex lists to avoid re.compile cache lookup overhead
185+
# inside tight analysis loops. Using tuple of pre-compiled Patterns allows executing p.search
186+
# directly. Generator expressions are used to avoid NameError inside the class body.
187+
# =========================
188+
_COMPILED_WRITE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in _WRITE_PATTERNS)
189+
_COMPILED_WRITE_ON_HANDLE_PATTERNS = tuple(re.compile(p, re.IGNORECASE) for p in _WRITE_ON_HANDLE_PATTERNS)
190+
_COMPILED_SENSITIVE_POSIX_PREFIXES = tuple(re.compile(p, re.IGNORECASE) for p in _SENSITIVE_POSIX_PREFIXES)
191+
_COMPILED_DESTRUCTIVE_PATTERNS = tuple(re.compile(p) for p in _DESTRUCTIVE_PATTERNS)
192+
_COMPILED_SHELL_PATTERNS = tuple(re.compile(p) for p in _SHELL_PATTERNS)
193+
194+
_posix_system_prefixes = [
195+
r"/etc/\w+",
196+
r"/tmp/\w+",
197+
r"/var/\w+",
198+
r"/usr/\w+",
199+
r"/root/\w+",
200+
r"/home/\w+/",
201+
r"/proc/\w+",
202+
r"/sys/\w+",
203+
r"/dev/\w+",
204+
r"/boot/\w+",
205+
r"/opt/\w+",
206+
r"/mnt/\w+",
207+
r"/media/\w+",
208+
]
209+
_COMPILED_POSIX_SYSTEM_PREFIXES = tuple(re.compile(p, re.IGNORECASE) for p in _posix_system_prefixes)
210+
_COMPILED_WINDOWS_DRIVE_LETTER = re.compile(r"[a-z]:[\\/]")
211+
_COMPILED_POSIX_ABSOLUTE_QUOTED = re.compile(r"""["']/[^"'\s]""")
212+
_COMPILED_OPEN_ARGS = re.compile(r"open\s*\(\s*([\"'][^\"']+[\"'])", re.IGNORECASE)
213+
_COMPILED_WINDOWS_MATCH_DRIVE = re.compile(r"[a-zA-Z]:[\\/]")
214+
_COMPILED_RD_COMMAND = re.compile(r"\brd\s+/s\s+/q\b")
215+
183216
def __init__(self, unsafe_mode: bool = False):
184217
self.unsafe_mode = unsafe_mode
185218

@@ -228,7 +261,7 @@ def _has_write_operation(self, code: str) -> bool:
228261
"""Return True if *code* contains any write operation that must be
229262
blocked in SAFE mode.
230263
"""
231-
return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_PATTERNS)
264+
return any(p.search(code) for p in self._COMPILED_WRITE_PATTERNS)
232265

233266
# =========================
234267
# WRITE-ON-HANDLE DETECTION
@@ -240,52 +273,38 @@ def _has_write_on_handle(self, code: str) -> bool:
240273
"""Return True if *code* calls .write() on any object (handle check).
241274
This is intentionally only evaluated when an absolute path is present.
242275
"""
243-
return any(re.search(p, code, re.IGNORECASE) for p in self._WRITE_ON_HANDLE_PATTERNS)
276+
return any(p.search(code) for p in self._COMPILED_WRITE_ON_HANDLE_PATTERNS)
244277

245278
# =========================
246279
# HOST ABSOLUTE PATH CHECK
247280
# =========================
248281
def _is_host_absolute_path(self, code: str) -> bool:
249282
"""Return True if *code* references a host absolute path."""
283+
code_lower = code.lower()
250284
# Windows drive-letter path
251-
if re.search(r"[a-z]:[\\/]", code.lower()):
285+
if self._COMPILED_WINDOWS_DRIVE_LETTER.search(code_lower):
252286
return True
253287

254288
# Quoted POSIX absolute path: '/...' or "/..."
255-
if re.search(r"""["']/[^"'\s]""", code):
289+
if self._COMPILED_POSIX_ABSOLUTE_QUOTED.search(code):
256290
return True
257291

258292
# Unquoted well-known POSIX system directory prefixes
259-
_posix_system_prefixes = [
260-
r"/etc/\w+",
261-
r"/tmp/\w+",
262-
r"/var/\w+",
263-
r"/usr/\w+",
264-
r"/root/\w+",
265-
r"/home/\w+/",
266-
r"/proc/\w+",
267-
r"/sys/\w+",
268-
r"/dev/\w+",
269-
r"/boot/\w+",
270-
r"/opt/\w+",
271-
r"/mnt/\w+",
272-
r"/media/\w+",
273-
]
274-
if any(re.search(p, code, re.IGNORECASE) for p in _posix_system_prefixes):
293+
if any(p.search(code) for p in self._COMPILED_POSIX_SYSTEM_PREFIXES):
275294
return True
276295

277296
# open() call whose first positional argument is an absolute path string
278-
open_args = re.findall(r"open\s*\(\s*([\"'][^\"']+[\"'])", code, re.IGNORECASE)
297+
open_args = self._COMPILED_OPEN_ARGS.findall(code)
279298
for arg in open_args:
280299
path = arg.strip("'\"")
281-
if path.startswith("/") or re.match(r"[a-zA-Z]:[\\/]", path):
300+
if path.startswith("/") or self._COMPILED_WINDOWS_MATCH_DRIVE.match(path):
282301
return True
283302

284303
return False
285304

286305
def _is_sensitive_posix_path(self, code: str) -> bool:
287306
"""Return True if *code* references a sensitive POSIX system path."""
288-
return any(re.search(p, code, re.IGNORECASE) for p in self._SENSITIVE_POSIX_PREFIXES)
307+
return any(p.search(code) for p in self._COMPILED_SENSITIVE_POSIX_PREFIXES)
289308

290309
# =========================
291310
# MAIN CHECK
@@ -297,7 +316,7 @@ def assess_execution(self, code: str, mode: str) -> Decision:
297316
code_lower = code.lower()
298317

299318
# HARD BLOCK WINDOWS RECURSIVE DELETE (CRITICAL FIX)
300-
if re.search(r"\brd\s+/s\s+/q\b", code_lower):
319+
if self._COMPILED_RD_COMMAND.search(code_lower):
301320
return Decision(False, ["Recursive deletion is blocked."])
302321

303322
# UNSAFE MODE - still detect dangerous operations but allow with warnings
@@ -326,15 +345,15 @@ def assess_execution(self, code: str, mode: str) -> Decision:
326345
# (shutdown, reboot, mkfs, dd, format, diskpart) in addition to
327346
# filesystem deletes.
328347
# =========================
329-
if any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS):
348+
if any(p.search(code_lower) for p in self._COMPILED_DESTRUCTIVE_PATTERNS):
330349
return Decision(False, ["Destructive operation blocked."])
331350

332351
# =========================
333352
# SHELL BLOCK
334353
# BUG FIX #2: Uses _SHELL_PATTERNS with \b word-boundary regex instead
335354
# of plain substring `in` check to avoid false positives.
336355
# =========================
337-
if any(re.search(p, code_lower) for p in self._SHELL_PATTERNS):
356+
if any(p.search(code_lower) for p in self._COMPILED_SHELL_PATTERNS):
338357
return Decision(False, ["Shell execution is blocked."])
339358

340359
# =========================
@@ -370,7 +389,7 @@ def is_dangerous_operation(self, code: str) -> bool:
370389
if not code or not code.strip():
371390
return False
372391
code_lower = code.lower()
373-
return any(re.search(p, code_lower) for p in self._DESTRUCTIVE_PATTERNS)
392+
return any(p.search(code_lower) for p in self._COMPILED_DESTRUCTIVE_PATTERNS)
374393

375394
# =========================
376395
# ARTIFACT EXPORT

0 commit comments

Comments
 (0)