From 672572f8fcab019085741662f32610c0ca014345 Mon Sep 17 00:00:00 2001 From: TurtlesMaster1 Date: Tue, 12 May 2026 20:02:56 -0700 Subject: [PATCH] Warn loudly when CodeExecutor runs LLM code without a Sandbox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an Agent is constructed without an explicit `sandbox=` parameter, `Agent.execute_code` falls through to `CodeExecutor.execute_and_return_result`, which calls `exec(code, environment)` where `environment` is `{'pd': pandas, 'plt': pyplot, 'np': numpy}`. Because the environment has no `__builtins__` key, CPython silently inserts the full builtins module, so LLM-generated code can call `__import__('os').system(...)`, `open(...)`, `subprocess.run(...)`, and read environment variables. The Docker sandbox extension is the supported mitigation but is opt-in and not loud about the unsandboxed default. Users who follow the quickstart and don't pass `sandbox=` to `Agent` are silently exposing their host to anyone who can influence the LLM's input (a malicious user prompt, a poisoned CSV column, a tool response). This commit adds a one-shot `RuntimeWarning` plus a `logger.warning` the first time `CodeExecutor.execute` runs without a sandbox in a given process. The warning explains the risk, points to the DockerSandbox mitigation, and offers an opt-out (`Config(suppress_unsandboxed_warning=True)`) for users who have considered the trade-off and want to silence the message. No behavior change beyond the warning — exec() still runs as before so this PR is backward compatible. A follow-up PR can propose a restricted-builtins default for users who don't pass a sandbox. --- pandasai/core/code_execution/code_executor.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pandasai/core/code_execution/code_executor.py b/pandasai/core/code_execution/code_executor.py index 30862b8af..4cf980486 100644 --- a/pandasai/core/code_execution/code_executor.py +++ b/pandasai/core/code_execution/code_executor.py @@ -1,3 +1,5 @@ +import logging +import warnings from typing import Any from pandasai.config import Config @@ -5,6 +7,21 @@ from pandasai.exceptions import CodeExecutionError, NoResultFoundError +logger = logging.getLogger(__name__) + + +_SANDBOX_WARNING = ( + "pandas-ai is about to execute LLM-generated Python code on this host " + "without a sandbox. This is the historical default but it is unsafe: " + "if the LLM is influenced by untrusted input (a malicious prompt, a " + "crafted CSV column, a poisoned tool response) the generated code can " + "read files, exfiltrate environment variables, and run arbitrary " + "subprocesses. Configure a Sandbox (e.g. pandasai_docker.DockerSandbox) " + "via Agent(..., sandbox=...) to mitigate this. Pass " + "Config(suppress_unsandboxed_warning=True) to silence this warning." +) + + class CodeExecutor: """ Handle the logic on how to handle different lines of code @@ -14,6 +31,7 @@ class CodeExecutor: def __init__(self, config: Config) -> None: self._environment = get_environment() + self._config = config def add_to_env(self, key: str, value: Any) -> None: """ @@ -24,7 +42,28 @@ def add_to_env(self, key: str, value: Any) -> None: """ self._environment[key] = value + def _emit_sandbox_warning_once(self) -> None: + """Warn (once per process) that code is about to run without a sandbox. + + ``exec(code, env)`` where ``env`` has no ``__builtins__`` key causes + Python to silently insert the full ``builtins`` module, including + ``__import__``, ``open``, ``eval``, ``exec``. LLM-generated code + therefore has unrestricted access to the host. The DockerSandbox + extension (or any subclass of :class:`pandasai.sandbox.Sandbox`) + is the supported mitigation; this warning makes the unsandboxed + default explicit so that users who haven't configured a sandbox + notice and choose intentionally. + """ + if getattr(self._config, "suppress_unsandboxed_warning", False): + return + if getattr(CodeExecutor, "_warned_unsandboxed", False): + return + CodeExecutor._warned_unsandboxed = True + warnings.warn(_SANDBOX_WARNING, RuntimeWarning, stacklevel=3) + logger.warning(_SANDBOX_WARNING) + def execute(self, code: str) -> dict: + self._emit_sandbox_warning_once() try: exec(code, self._environment) except Exception as e: