From 5b22cc08fcace709e45114f1f5f2eed62de4a735 Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Sun, 8 Feb 2026 19:53:11 -0700 Subject: [PATCH 1/6] Add Monty REPL environment --- README.md | 13 +- pyproject.toml | 1 + rlm/core/types.py | 2 +- rlm/environments/__init__.py | 11 +- rlm/environments/monty_repl.py | 342 +++++++++++++++++++++++++++++++++ tests/test_imports.py | 17 ++ tests/test_monty_repl.py | 30 +++ uv.lock | 64 +++++- 8 files changed, 472 insertions(+), 8 deletions(-) create mode 100644 rlm/environments/monty_repl.py create mode 100644 tests/test_monty_repl.py diff --git a/README.md b/README.md index 75c0f8b8..d2aeb2b1 100644 --- a/README.md +++ b/README.md @@ -77,11 +77,11 @@ make quickstart ## REPL Environments -We support two types of REPL environments -- isolated, and non-isolated. Non-isolated environments (default) run code execution on the same machine as the RLM (e.g. through `exec`), which is pretty reasonable for some local low-risk tasks, like simple benchmarking, but can be problematic if the prompts or tool calls can interact with malicious users. Fully isolated environments used Cloud-based sandboxes (e.g. Prime Sandboxes, [Modal Sandboxes](https://modal.com/docs/guide/sandboxes)) to run code generated by the RLM, ensuring completely isolation from the host process. Environments can be added, but we natively support the following: `local` (default), `modal`, `prime`. +We support two types of REPL environments -- isolated, and non-isolated. Non-isolated environments (default) run code execution on the same machine as the RLM (e.g. through `exec`), which is pretty reasonable for some local low-risk tasks, like simple benchmarking, but can be problematic if the prompts or tool calls can interact with malicious users. Fully isolated environments used Cloud-based sandboxes (e.g. Prime Sandboxes, [Modal Sandboxes](https://modal.com/docs/guide/sandboxes)) to run code generated by the RLM, ensuring completely isolation from the host process. Environments can be added, but we natively support the following: `local` (default), `monty`, `modal`, `prime`. ```python rlm = RLM( - environment="...", # "local", "docker", "modal", "prime" + environment="...", # "local", "monty", "docker", "modal", "prime" environment_kwargs={...}, ) ``` @@ -89,6 +89,15 @@ rlm = RLM( ### Local Environments The default `local` environment `LocalREPL` runs in the same process as the RLM itself, with specified global and local namespaces for minimal security. Using this REPL is generally safe, but should not be used for production settings. It also shares the same virtual environment (e.g. Conda or uv) as the host process. +#### Monty +We also support a Monty-based environment called `MontyREPL` that runs code inside the Monty sandboxed interpreter. Monty is fast and sandboxed, but it supports only a limited subset of Python and a small standard library. + +To install Monty support: +```bash +uv pip install -e ".[monty]" +# or: pip install pydantic-monty +``` + #### Docker Docker (*requires [Docker installed](https://docs.docker.com/desktop/setup/install/)*) We also support a Docker-based environment called `DockerREPL` that launches the REPL environment as a Docker image. By default, we use the `python:3.11-slim` image, but the user can specify custom images as well. diff --git a/pyproject.toml b/pyproject.toml index 99aff8a4..fea0e6bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ Issues = "https://github.com/alexzhang13/rlm/issues" modal = ["modal>=0.73.0", "dill>=0.3.7"] daytona = ["daytona>=0.128.1", "dill>=0.3.7"] prime = ["prime-sandboxes>=0.2.0", "dill>=0.3.7"] +monty = ["pydantic-monty>=0.0.4"] [build-system] requires = ["setuptools>=61.0"] diff --git a/rlm/core/types.py b/rlm/core/types.py index f20474d4..984f3aca 100644 --- a/rlm/core/types.py +++ b/rlm/core/types.py @@ -13,7 +13,7 @@ "azure_openai", "gemini", ] -EnvironmentType = Literal["local", "docker", "modal", "prime", "daytona"] +EnvironmentType = Literal["local", "docker", "modal", "prime", "daytona", "monty"] def _serialize_value(value: Any) -> Any: diff --git a/rlm/environments/__init__.py b/rlm/environments/__init__.py index eed91b30..7b6cf1a3 100644 --- a/rlm/environments/__init__.py +++ b/rlm/environments/__init__.py @@ -7,12 +7,12 @@ def get_environment( - environment: Literal["local", "modal", "docker", "daytona", "prime"], + environment: Literal["local", "modal", "docker", "daytona", "prime", "monty"], environment_kwargs: dict[str, Any], ) -> BaseEnv: """ Routes a specific environment and the args (as a dict) to the appropriate environment if supported. - Currently supported environments: ['local', 'modal', 'docker', 'daytona', 'prime'] + Currently supported environments: ['local', 'modal', 'docker', 'daytona', 'prime', 'monty'] """ if environment == "local": return LocalREPL(**environment_kwargs) @@ -32,7 +32,12 @@ def get_environment( from rlm.environments.prime_repl import PrimeREPL return PrimeREPL(**environment_kwargs) + elif environment == "monty": + from rlm.environments.monty_repl import MontyREPL + + return MontyREPL(**environment_kwargs) else: raise ValueError( - f"Unknown environment: {environment}. Supported: ['local', 'modal', 'docker', 'daytona', 'prime']" + f"Unknown environment: {environment}. Supported: " + "['local', 'modal', 'docker', 'daytona', 'prime', 'monty']" ) diff --git a/rlm/environments/monty_repl.py b/rlm/environments/monty_repl.py new file mode 100644 index 00000000..8ab405b7 --- /dev/null +++ b/rlm/environments/monty_repl.py @@ -0,0 +1,342 @@ +""" +Monty REPL environment that runs Python code in the Monty sandbox. + +Requires: pydantic-monty +""" + +from __future__ import annotations + +import ast +import time +from typing import Any, Literal + +import pydantic_monty + +from rlm.core.comms_utils import LMRequest, send_lm_request, send_lm_request_batched +from rlm.core.types import REPLResult, RLMChatCompletion +from rlm.environments.base_env import NonIsolatedEnv + +RESERVED_NAMES = { + "__rlm_state", + "__rlm_capture_locals", + "__rlm_state_out", + "llm_query", + "llm_query_batched", + "FINAL_VAR", + "SHOW_VARS", + "print", +} + + +class AssignedNameCollector(ast.NodeVisitor): + def __init__(self) -> None: + self.names: set[str] = set() + self.scope_depth = 0 + + def add_target(self, target: ast.AST) -> None: + if isinstance(target, ast.Name): + self.names.add(target.id) + elif isinstance(target, (ast.Tuple, ast.List)): + for item in target.elts: + self.add_target(item) + + def visit_Assign(self, node: ast.Assign) -> None: + if self.scope_depth == 0: + for target in node.targets: + self.add_target(target) + self.generic_visit(node) + + def visit_AnnAssign(self, node: ast.AnnAssign) -> None: + if self.scope_depth == 0: + self.add_target(node.target) + self.generic_visit(node) + + def visit_AugAssign(self, node: ast.AugAssign) -> None: + if self.scope_depth == 0: + self.add_target(node.target) + self.generic_visit(node) + + def visit_For(self, node: ast.For) -> None: + if self.scope_depth == 0: + self.add_target(node.target) + self.generic_visit(node) + + def visit_AsyncFor(self, node: ast.AsyncFor) -> None: + if self.scope_depth == 0: + self.add_target(node.target) + self.generic_visit(node) + + def visit_With(self, node: ast.With) -> None: + if self.scope_depth == 0: + for item in node.items: + if item.optional_vars is not None: + self.add_target(item.optional_vars) + self.generic_visit(node) + + def visit_AsyncWith(self, node: ast.AsyncWith) -> None: + if self.scope_depth == 0: + for item in node.items: + if item.optional_vars is not None: + self.add_target(item.optional_vars) + self.generic_visit(node) + + def visit_ExceptHandler(self, node: ast.ExceptHandler) -> None: + if self.scope_depth == 0 and node.name: + self.names.add(node.name) + self.generic_visit(node) + + def visit_Import(self, node: ast.Import) -> None: + if self.scope_depth == 0: + for alias in node.names: + self.names.add(alias.asname or alias.name.split(".")[0]) + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + if self.scope_depth == 0: + for alias in node.names: + self.names.add(alias.asname or alias.name) + self.generic_visit(node) + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + if self.scope_depth == 0: + self.names.add(node.name) + self.scope_depth += 1 + self.generic_visit(node) + self.scope_depth -= 1 + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + if self.scope_depth == 0: + self.names.add(node.name) + self.scope_depth += 1 + self.generic_visit(node) + self.scope_depth -= 1 + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + if self.scope_depth == 0: + self.names.add(node.name) + self.scope_depth += 1 + self.generic_visit(node) + self.scope_depth -= 1 + + +class MontyREPL(NonIsolatedEnv): + """ + Monty REPL environment that runs Python code in a sandboxed interpreter. + + Monty runs in-process but is sandboxed, so this is treated as a non-isolated + environment. + """ + + def __init__( + self, + lm_handler_address: tuple[str, int] | None = None, + context_payload: dict | list | str | None = None, + setup_code: str | None = None, + persistent: bool = False, + depth: int = 1, + resource_limits: pydantic_monty.ResourceLimits | None = None, + **kwargs: Any, + ) -> None: + if persistent: + raise NotImplementedError( + "Persistent REPLs are currently not supported for environment: MontyREPL" + ) + super().__init__(persistent=persistent, depth=depth, **kwargs) + + self.lm_handler_address = lm_handler_address + self.resource_limits = resource_limits + self.locals: dict[str, Any] = {} + self.pending_llm_calls: list[RLMChatCompletion] = [] + self.stdout_parts: list[str] = [] + + self.setup() + + if context_payload is not None: + self.load_context(context_payload) + + if setup_code: + self.execute_code(setup_code) + + def setup(self) -> None: + """Setup the environment.""" + self.locals = {} + self.pending_llm_calls = [] + self.stdout_parts = [] + + def load_context(self, context_payload: dict | list | str) -> None: + """Load context into the environment as context_0 (and 'context' alias).""" + self.locals["context_0"] = context_payload + self.locals["context"] = context_payload + + def execute_code(self, code: str) -> REPLResult: + """Execute code in the Monty sandbox and return result.""" + start_time = time.perf_counter() + self.stdout_parts = [] + self.pending_llm_calls = [] + + wrapper_script = self.build_wrapper_script(code) + external_functions = { + "__rlm_capture_locals": self.capture_locals, + "FINAL_VAR": self.final_var, + "SHOW_VARS": self.show_vars, + "llm_query": self.llm_query, + "llm_query_batched": self.llm_query_batched, + } + + try: + runner = pydantic_monty.Monty( + wrapper_script, + inputs=["__rlm_state"], + external_functions=list(external_functions.keys()), + ) + result = runner.run( + inputs={"__rlm_state": self.locals}, + external_functions=external_functions, + limits=self.resource_limits, + print_callback=self.handle_print_callback, + ) + if result is not None: + self.stdout_parts.append(str(result)) + + return REPLResult( + stdout="".join(self.stdout_parts), + stderr="", + locals=self.locals.copy(), + execution_time=time.perf_counter() - start_time, + rlm_calls=self.pending_llm_calls.copy(), + ) + except Exception as exc: + return REPLResult( + stdout="".join(self.stdout_parts), + stderr=f"{type(exc).__name__}: {exc}", + locals=self.locals.copy(), + execution_time=time.perf_counter() - start_time, + rlm_calls=self.pending_llm_calls.copy(), + ) + + def build_wrapper_script(self, user_code: str) -> str: + """Build a wrapper script that restores state, captures output, and persists locals.""" + lines: list[str] = [] + for key in self.locals: + if key.isidentifier(): + lines.append(f"{key} = __rlm_state.get({key!r})") + + lines.append(user_code) + + assigned_names = self.collect_assigned_names(user_code) + persisted_names = { + name + for name in set(self.locals) | assigned_names + if name.isidentifier() and not name.startswith("_") and name not in RESERVED_NAMES + } + + lines.append("__rlm_state_out = {}") + for name in sorted(persisted_names): + lines.append("try:") + lines.append(f" __rlm_state_out[{name!r}] = {name}") + lines.append("except NameError:") + lines.append(" pass") + lines.append("__rlm_capture_locals(__rlm_state_out)") + + return "\n".join(lines) + + def handle_print_callback(self, stream: Literal["stdout", "stderr"], text: str) -> None: + """Collect printed output from Monty.""" + if stream == "stdout": + self.stdout_parts.append(text) + + def capture_locals(self, vars_dict: dict[str, Any]) -> None: + """Capture locals after execution.""" + self.locals = vars_dict.copy() + + @staticmethod + def collect_assigned_names(code: str) -> set[str]: + """Collect names assigned at module scope in the provided code.""" + try: + tree = ast.parse(code) + except SyntaxError: + return set() + + collector = AssignedNameCollector() + collector.visit(tree) + return collector.names + + def final_var(self, variable_name: str) -> str: + """Return the value of a variable as a final answer.""" + variable_name = variable_name.strip().strip("\"'") + if variable_name in self.locals: + return str(self.locals[variable_name]) + + available = [k for k in self.locals.keys() if not k.startswith("_")] + if available: + return ( + f"Error: Variable '{variable_name}' not found. " + f"Available variables: {available}. " + "You must create and assign a variable BEFORE calling FINAL_VAR on it." + ) + return ( + f"Error: Variable '{variable_name}' not found. " + "No variables have been created yet. " + "You must create and assign a variable in a REPL block BEFORE calling FINAL_VAR on it." + ) + + def show_vars(self) -> str: + """Show all available variables in the REPL environment.""" + available = {k: type(v).__name__ for k, v in self.locals.items() if not k.startswith("_")} + if not available: + return "No variables created yet. Use ```repl``` blocks to create variables." + return f"Available variables: {available}" + + def llm_query(self, prompt: str, model: str | None = None) -> str: + """Query the LM via socket connection to the handler.""" + if not self.lm_handler_address: + return "Error: No LM handler configured" + + try: + request = LMRequest(prompt=prompt, model=model, depth=self.depth) + response = send_lm_request(self.lm_handler_address, request) + + if not response.success: + return f"Error: {response.error}" + + self.pending_llm_calls.append(response.chat_completion) + return response.chat_completion.response + except Exception as exc: + return f"Error: LM query failed - {exc}" + + def llm_query_batched(self, prompts: list[str], model: str | None = None) -> list[str]: + """Query the LM with multiple prompts concurrently.""" + if not self.lm_handler_address: + return ["Error: No LM handler configured"] * len(prompts) + + try: + responses = send_lm_request_batched( + self.lm_handler_address, prompts, model=model, depth=self.depth + ) + results: list[str] = [] + for response in responses: + if not response.success: + results.append(f"Error: {response.error}") + else: + self.pending_llm_calls.append(response.chat_completion) + results.append(response.chat_completion.response) + + return results + except Exception as exc: + return [f"Error: LM query failed - {exc}"] * len(prompts) + + def cleanup(self) -> None: + """Clean up environment state.""" + self.locals.clear() + self.pending_llm_calls.clear() + self.stdout_parts.clear() + + def __enter__(self) -> MontyREPL: + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> bool: + self.cleanup() + return False + + def __del__(self) -> None: + self.cleanup() diff --git a/tests/test_imports.py b/tests/test_imports.py index 9b65530f..4b02c263 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -182,6 +182,13 @@ def test_prime_repl_import(self): assert PrimeREPL is not None + def test_monty_repl_import(self): + """Test MontyREPL import.""" + pytest.importorskip("pydantic_monty") + from rlm.environments.monty_repl import MontyREPL + + assert MontyREPL is not None + def test_get_environment_function(self): """Test get_environment function import.""" from rlm.environments import get_environment @@ -325,6 +332,7 @@ def test_no_circular_imports(self): ("rlm.clients.litellm", "litellm"), ("rlm.environments.modal_repl", "modal"), ("rlm.environments.prime_repl", "prime_sandboxes"), + ("rlm.environments.monty_repl", "pydantic_monty"), ] # Test core modules @@ -501,3 +509,12 @@ def test_all_environment_classes_importable(self): assert isinstance(PrimeREPL, type) except Exception: pass + + # Test optional MontyREPL + try: + pytest.importorskip("pydantic_monty") + from rlm.environments.monty_repl import MontyREPL + + assert isinstance(MontyREPL, type) + except Exception: + pass diff --git a/tests/test_monty_repl.py b/tests/test_monty_repl.py new file mode 100644 index 00000000..080382f6 --- /dev/null +++ b/tests/test_monty_repl.py @@ -0,0 +1,30 @@ +"""Tests for MontyREPL environment.""" + +import pytest + +pytest.importorskip("pydantic_monty") + +from rlm.environments.monty_repl import MontyREPL + + +class TestMontyREPLBasic: + """Basic functionality tests for MontyREPL.""" + + def test_basic_execution_and_print(self): + repl = MontyREPL() + result = repl.execute_code("x = 2 + 3\nprint(x)") + assert "5" in result.stdout + + def test_state_retention_across_blocks(self): + repl = MontyREPL() + repl.execute_code("x = 2 + 3\nprint(x)") + result = repl.execute_code("y = x * 2\nprint(y)") + assert "10" in result.stdout + assert "x" in result.locals + assert "y" in result.locals + + def test_final_var(self): + repl = MontyREPL() + repl.execute_code("x = 10") + result = repl.execute_code("print(FINAL_VAR('x'))") + assert "10" in result.stdout diff --git a/uv.lock b/uv.lock index 653d14c3..c6be32df 100644 --- a/uv.lock +++ b/uv.lock @@ -1549,6 +1549,62 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, ] +[[package]] +name = "pydantic-monty" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/7ed116f2545faef034f65a2d85e3d1989d52293ec1ed98db0e70b8b23d61/pydantic_monty-0.0.4.tar.gz", hash = "sha256:f78b04f057deff3eb676d8e9c4a4754f3fb5ee353d68c3de0a4531c216918df2", size = 657214, upload-time = "2026-02-07T17:48:15.281Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/cf/53816f33283f979ba231267963e0df3637a434d51367b62f71c0dd6d568a/pydantic_monty-0.0.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0bbfa7963fbb219a6a3fde206d5f01a7197c7375666d1559728269a2706f9526", size = 5460256, upload-time = "2026-02-07T17:48:51.405Z" }, + { url = "https://files.pythonhosted.org/packages/55/d7/527fb81440aaec76516b5fc1ae0c8cbef52c20367da853a82de590ae09f2/pydantic_monty-0.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9beff5046a4c59e8b3f3a1816988158c13a35dff20f27f50d6a58e0702e7bc29", size = 5561791, upload-time = "2026-02-07T17:49:22.943Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0d/85f7902bd7c5cc696b3a583425e19c9656d3f661c51409cefd8db06e6404/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61ecfe3be3a4b4c7d9737b1412faff91794536af3d9ec67270d59c058f817a95", size = 5271367, upload-time = "2026-02-07T17:48:16.876Z" }, + { url = "https://files.pythonhosted.org/packages/dd/ae/067e6e5b6ae29536c82d85811b3b984bfc416cf9f0b59f3049926e839973/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62c25627382335292b0769eaa0a13c42800b2d211519d1d389f4203f8b19f5f9", size = 5543042, upload-time = "2026-02-07T17:47:47.796Z" }, + { url = "https://files.pythonhosted.org/packages/9a/ba/9f569a1fda0d643a8ca4ca40d7d808fcad3ddc482ae067a2d0cdc9f18e68/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b3889cd7bff16222cd28892fe968153893ecf95b75e9d25935cd6d36b2f3703", size = 5938069, upload-time = "2026-02-07T17:48:31.507Z" }, + { url = "https://files.pythonhosted.org/packages/09/08/277ad24f8d52cfd74355e28f5681e32ca4375d9bbde538d05ad9a4b96a44/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74cc63650d03c9585f944ff00cd9890cdd06185513ed0a0d3bcba33248131f82", size = 6129865, upload-time = "2026-02-07T17:49:16.644Z" }, + { url = "https://files.pythonhosted.org/packages/95/8c/8107882b6331d3b6f1734c364e8f3a97309f21f561de0935db2fd6e46ec9/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:568d1c1ee1f5dd866777fce72b2137d571d57b2c207da7bd87a1d63904d0f8f5", size = 5972716, upload-time = "2026-02-07T17:48:33.41Z" }, + { url = "https://files.pythonhosted.org/packages/7d/50/6614c6196d872cd1f0508d49f7630c6438f7ca43c65e31e836d166735c75/pydantic_monty-0.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd065bd3e9f114308574c17b42c8485a2d1eb8280ef9cc1bd304e0165d47315a", size = 5853098, upload-time = "2026-02-07T17:48:03.829Z" }, + { url = "https://files.pythonhosted.org/packages/18/7d/d34eb7e971bb3f27189b874e539312b7f815d312a1247dc5d4e20c7b4163/pydantic_monty-0.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:11ea96ffb50ccd761c7446022d4b9ccf4aae2baa831da5873252ab920d071dcc", size = 5450401, upload-time = "2026-02-07T17:48:07.456Z" }, + { url = "https://files.pythonhosted.org/packages/80/16/0e4b6853da1f7626a316528175842de9850b2e5bbd2e92fc34a36a2109fc/pydantic_monty-0.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01463f078db28ed70cccee83d6e24ba18093ca64be385da93631a134d5880156", size = 5823851, upload-time = "2026-02-07T17:48:39.927Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f3/50cec8af1ca02da45f1f990f2fd653cd84f660d2465d75011bcab0e460e6/pydantic_monty-0.0.4-cp311-cp311-win32.whl", hash = "sha256:195d0814e7088b4cfd48fa61ba034ba16a5289fe04789a6628658ed2336e17c8", size = 5427815, upload-time = "2026-02-07T17:48:09.332Z" }, + { url = "https://files.pythonhosted.org/packages/69/c9/d3521421568e72abbcfd47432f1438866a0881abf7611af34ea4439616b8/pydantic_monty-0.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:e159f0fc84f89d6c281ac706f47a5185ae9e3617d807f4d8f8083db01a2755d5", size = 6057652, upload-time = "2026-02-07T17:48:28.631Z" }, + { url = "https://files.pythonhosted.org/packages/1d/da/a0f77b7bc223fd3e4bc6c5ff36d5d7dd416080e73acb3d5990f9c3fc0403/pydantic_monty-0.0.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b110a14314a922be1fb3802e1da29b5f7dc7f1fed61da9c919c261d8f61b3ffc", size = 5460227, upload-time = "2026-02-07T17:47:56.167Z" }, + { url = "https://files.pythonhosted.org/packages/e6/e0/968729e98af0613104306a5aa5c460499f48c54d734bdd143bc5e6974f50/pydantic_monty-0.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6ead7e1a84507e21e30297d0b131db17f5acde5311175c4fd9d505cd6c9862a", size = 5538013, upload-time = "2026-02-07T17:48:34.772Z" }, + { url = "https://files.pythonhosted.org/packages/68/0e/50003e8626a62ea54a8831872826b2a6311886ba9390927a0da8b71d0791/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:203176419c2793764e36ca963adc65adb40c17e8a10c010e578b5a6484a5e864", size = 5272677, upload-time = "2026-02-07T17:48:41.239Z" }, + { url = "https://files.pythonhosted.org/packages/8d/ad/7bf9f4e79cefeb2aa55e542526186b66801c5ccf0d2273ddc0d1f2442f3d/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59a1eb39783af3c48e5d0947779008c8abebca7b6bc5ddbbf881daf42f095113", size = 5545980, upload-time = "2026-02-07T17:48:10.7Z" }, + { url = "https://files.pythonhosted.org/packages/ff/74/ef62add8a8b8b30812a5f9d0f14657ab0e696d3a91f8ecc114bcb21b0645/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98d4aab379d849a359df7fd5d053c2bba733a1c430df9e3b3e190efeaef345b4", size = 5939277, upload-time = "2026-02-07T17:49:08.701Z" }, + { url = "https://files.pythonhosted.org/packages/ac/b2/9e65211538cb05e12a8b1391b9fbcb4f33034d62221bc8fd54376a77b5c1/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4e6bf8928874055c418ad4cd0dac455adc51793e7a9c86da466fb161e7e5df1", size = 6132635, upload-time = "2026-02-07T17:48:01.937Z" }, + { url = "https://files.pythonhosted.org/packages/e0/58/8b595f78261e444d0c4e9e8eb7334c86974c99feaa31747cae4c23c0a617/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe93b13d798d3acaa9e9b9b423ba4e6cdd6175221e4fbe6699c2cc65a455293a", size = 5948529, upload-time = "2026-02-07T17:48:20.53Z" }, + { url = "https://files.pythonhosted.org/packages/18/52/1e91354dfe7eb335a0574f375a5249a342e9ee87d03eb3b5607060a99339/pydantic_monty-0.0.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4862b22f4b6a8ed9edfb358fe77844066a09f0155377f1a0ad227b98968d8e4", size = 5855376, upload-time = "2026-02-07T17:49:24.515Z" }, + { url = "https://files.pythonhosted.org/packages/03/c7/dec9486dca6a124f2987a56bc167f747a3c82e18e3080b6cce20c6d43c79/pydantic_monty-0.0.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:025577bdf8b85ef975b7bbe0ad5c33730bc8606035d1ad31707b723329968604", size = 5450066, upload-time = "2026-02-07T17:48:18.745Z" }, + { url = "https://files.pythonhosted.org/packages/60/18/6f9a3c2b4ec28cea189ab0dc324e7c1a70643937a0ee835b15e8d49baeae/pydantic_monty-0.0.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:234e04fd2c4a27a73ab4c4d2927cae938c4465e70b78e95c52237f8e77b78085", size = 5826060, upload-time = "2026-02-07T17:48:56.207Z" }, + { url = "https://files.pythonhosted.org/packages/c5/e2/9e7c5bcd0b1cf1cc5f6ca7a2037ab5ca85edcd5e938c36649e3b254b1df1/pydantic_monty-0.0.4-cp312-cp312-win32.whl", hash = "sha256:3901097664645da6d49df7f327b503254721cc2c44020fdab781498257f7d2ef", size = 5424456, upload-time = "2026-02-07T17:48:43.99Z" }, + { url = "https://files.pythonhosted.org/packages/14/d2/ed269925e6221bcf7f86eb0d984dc88b7a0f7dec10d68059ebd9930fcea9/pydantic_monty-0.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:520f22a2f10b4d1e51857b5641ae78d04dbfef9677077d6c55536527067ce8d0", size = 6034389, upload-time = "2026-02-07T17:49:12.615Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/5cfa30e247bc5efd312604d0e6fe8f038b277a4d8a39558ee08a69b9f508/pydantic_monty-0.0.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0a7b1857194e99368f4ade3878a6eb9741ebc7297bdbb21d255bc5608c9e204f", size = 5461244, upload-time = "2026-02-07T17:48:54.797Z" }, + { url = "https://files.pythonhosted.org/packages/c3/fa/9dbe8e4f4d52bed0fdf61ee8ca770ec667701c54c2e60d802fdd48cc0e6f/pydantic_monty-0.0.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:271d0a374db72ba71f641eb93a41d1e857da01e4c6657e48dc5005856660b8d2", size = 5538279, upload-time = "2026-02-07T17:47:58.103Z" }, + { url = "https://files.pythonhosted.org/packages/90/1b/70a8e2103353bb3139bb1bb855ae80d9a68e4e797de8100b22915ce76e90/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff07c0fd3b624d5907b280ecac22a7a926e4c463b9094ccea26adfd8dc988bae", size = 5273880, upload-time = "2026-02-07T17:49:10.691Z" }, + { url = "https://files.pythonhosted.org/packages/a3/04/3a5d20f8250c1e8b14b9425777f22cb5281068f7c542d4dc65304796b166/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:09892488f816df82cd9a512fea6c4cd6b489bb67b3417c918092e3b5a2ac286f", size = 5546146, upload-time = "2026-02-07T17:47:52.116Z" }, + { url = "https://files.pythonhosted.org/packages/44/2c/0436ba6480492d6b9b597b3f45771ae18ab13ccb0655878dba523ddd6959/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f54a1df8f1084a83c147139dbefdd753dcf02db7bfdad647ad5f066be0702521", size = 5939953, upload-time = "2026-02-07T17:49:19.572Z" }, + { url = "https://files.pythonhosted.org/packages/4e/6f/834e83d893718386f3966a2956f603e2a802090e7777d81efdc872219ea7/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ccdfbfeec80f638ab311eb6f28c63103fdf005237e0ecaf9fe349b492d60e1", size = 6133729, upload-time = "2026-02-07T17:47:54.195Z" }, + { url = "https://files.pythonhosted.org/packages/32/8e/a756ef7c712ce4ed2278cc3809a7552ed41d114d7b68411ac8dcfc1819fe/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e489b3282e44310c49fc0ebdec37242c68d4deabf824457b37e15edf80e7793", size = 5948229, upload-time = "2026-02-07T17:48:59.395Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d3/925805b8ddbc8b17eb0c76cb4e203269246bf6ca7a94fd6a09f455ce823f/pydantic_monty-0.0.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b86319de9fcf4f305b37862af3842b99fdb17689a9e3385cfcc22c4647a3bbf6", size = 5856418, upload-time = "2026-02-07T17:48:48.183Z" }, + { url = "https://files.pythonhosted.org/packages/23/a8/32c66964d3c4853917be736b6ffd4013686487d7edc8c7a7460023627570/pydantic_monty-0.0.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:33d3121aed1286b1f1b67b1101f4d375bbc137c80f59866d45869ffd03b51c52", size = 5450456, upload-time = "2026-02-07T17:49:18.163Z" }, + { url = "https://files.pythonhosted.org/packages/75/c3/ddb73b3da0d4608c77e3db8ad17e4dc87baf9b0de1c9d69c1d3976eb2e9f/pydantic_monty-0.0.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a08e31a1926d9fd6b499bb61fd6b471c6c2083da1ebc2c9665961a9717feb44f", size = 5826233, upload-time = "2026-02-07T17:48:00.048Z" }, + { url = "https://files.pythonhosted.org/packages/44/d9/8a8672186f1d1eb8f3dacf852ed4cd59309caa017ec8bd5866f99f750954/pydantic_monty-0.0.4-cp313-cp313-win32.whl", hash = "sha256:d78531b7462e9f66b496a330e1f96372561d1bd8b4a3051f7a07b85a41a04b01", size = 5425846, upload-time = "2026-02-07T17:48:42.683Z" }, + { url = "https://files.pythonhosted.org/packages/51/31/11dc4c64d64220198cd2f25a70bff9a4b12a5563e500297253a0dad59647/pydantic_monty-0.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:2d1a2382a782b5a4f6d73d613ab9e606cfcfdb0c2ff0364da47bc55e8566b2e7", size = 6035012, upload-time = "2026-02-07T17:49:01.575Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7b/9ac7775950abf33048949204234a48d5b1bef293fc87e8d1eed52ce2f4e3/pydantic_monty-0.0.4-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:8da2fb6e2b6b2db9dd6366266011f98b82ed46a1ba359f67a7372fbc6ace6105", size = 5464267, upload-time = "2026-02-07T17:49:26.199Z" }, + { url = "https://files.pythonhosted.org/packages/7a/4b/2ba526a09e70fb3c33d02b78e2cf01188f630f0a2d954239a91fb8942df5/pydantic_monty-0.0.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:15f29cf3d1804ad90a487fffc32d516c800ec567a5b1dec5ae27da1a76218efa", size = 5554320, upload-time = "2026-02-07T17:47:49.997Z" }, + { url = "https://files.pythonhosted.org/packages/ec/51/e569e652a777febab2efa7a134053f62b1a010f454ec31010c511b6e9af8/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6534125477f5c562450334d8a94eef40706b2700c54174a106929f86ace1ead7", size = 5275934, upload-time = "2026-02-07T17:48:57.798Z" }, + { url = "https://files.pythonhosted.org/packages/da/8e/feff0ada9aa343696bb07571c96f8beb8c1a9013d2abd596849620c66e9a/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8922a68e4ce810e08fd6e6228f9c356d7a60f028507cc35c1aa60505ca87bf2", size = 5546145, upload-time = "2026-02-07T17:48:24.193Z" }, + { url = "https://files.pythonhosted.org/packages/59/84/d8fdf463c2c266ca915708d320a003afcacb9e627d00ab0005b77d158b5c/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cf8c4e14c3ffb9d2cc467cfa33c25825e8cc3d690b47d056f28485d6e7a29a2", size = 5943406, upload-time = "2026-02-07T17:48:36.726Z" }, + { url = "https://files.pythonhosted.org/packages/80/f9/196fcffd12b705269d0f7fead9298e09ddab8e9d1a38d1db02021db982be/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06f4db63658a8a8f76e0dc9a94efdf8bba0dac14596d94e35342440d95ae37a3", size = 6135772, upload-time = "2026-02-07T17:49:05.743Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/56bc5a1dddd6d2961002c58decb560f87e68a84f4dd0264a769a8430c3b3/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b7bdf48414576958029bad37b66d407b2b90aa6859a6a32b2191ada10d63f82", size = 5966127, upload-time = "2026-02-07T17:48:38.199Z" }, + { url = "https://files.pythonhosted.org/packages/51/0c/cd6295d1f8b086ced40e8ce5ad31a2654ce772a7a7735ccacb1fa9248998/pydantic_monty-0.0.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56c1cfae27ec5af72bf036e59430c2f07d6e2f75666bea1f56904a13aa261f3e", size = 5858649, upload-time = "2026-02-07T17:48:49.594Z" }, + { url = "https://files.pythonhosted.org/packages/ac/65/1c86615f31fc6f000d0e69ec2b609b0b9e7cb91170d565d78defc93cae44/pydantic_monty-0.0.4-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:90cfa051d6b9614f4f19bd27c1d169b5da3e1878f989ee30eab0f5cd4f5e1d2a", size = 5452121, upload-time = "2026-02-07T17:48:25.578Z" }, + { url = "https://files.pythonhosted.org/packages/ee/5e/4a340ba05d83949d711ab200f12441313d4696566f4f18321f9e6d7b66e8/pydantic_monty-0.0.4-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f7128b2f3a15b92402fd6fa268e58f0e92710368aff65e51993b53db12ecc391", size = 5827769, upload-time = "2026-02-07T17:48:26.985Z" }, + { url = "https://files.pythonhosted.org/packages/bc/22/3dc6ce1596cac9bf383889f95f1d5d8026256aced1eba41336ac7134850b/pydantic_monty-0.0.4-cp314-cp314-win32.whl", hash = "sha256:9d9fcf9e74b632b916cbc04cd72fba5bfeeda376d4301afdcba20ac603625110", size = 5427671, upload-time = "2026-02-07T17:49:28.024Z" }, + { url = "https://files.pythonhosted.org/packages/8a/64/f5861d2fadfe0dc390f74b59612af056ccaf535ff54dc04f287d192f686b/pydantic_monty-0.0.4-cp314-cp314-win_amd64.whl", hash = "sha256:c2d05bd93fb65f6dfd9c072158a9827fd544ca5e44e972dbbf2d6010668c4882", size = 6053509, upload-time = "2026-02-07T17:49:20.97Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1706,7 +1762,7 @@ wheels = [ ] [[package]] -name = "rlm" +name = "rlms" version = "0.1.0" source = { editable = "." } dependencies = [ @@ -1729,6 +1785,9 @@ modal = [ { name = "dill" }, { name = "modal" }, ] +monty = [ + { name = "pydantic-monty" }, +] prime = [ { name = "dill" }, { name = "prime-sandboxes" }, @@ -1758,12 +1817,13 @@ requires-dist = [ { name = "openai", specifier = ">=2.14.0" }, { name = "portkey-ai", specifier = ">=2.1.0" }, { name = "prime-sandboxes", marker = "extra == 'prime'", specifier = ">=0.2.0" }, + { name = "pydantic-monty", marker = "extra == 'monty'", specifier = ">=0.0.4" }, { name = "pytest", specifier = ">=9.0.2" }, { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "requests", specifier = ">=2.32.5" }, { name = "rich", specifier = ">=13.0.0" }, ] -provides-extras = ["modal", "daytona", "prime"] +provides-extras = ["modal", "daytona", "prime", "monty"] [package.metadata.requires-dev] dev = [ From a437c66ea7bed570905fdd0a8f237b0a7930f0bc Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Sun, 8 Feb 2026 20:01:43 -0700 Subject: [PATCH 2/6] Enable Monty persistent mode --- README.md | 2 +- rlm/core/rlm.py | 2 +- rlm/environments/monty_repl.py | 50 ++++++++++++++++++++++++++-- tests/test_monty_repl.py | 15 +++++++++ tests/test_multi_turn_integration.py | 11 ++++++ 5 files changed, 76 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d2aeb2b1..53755d96 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ rlm = RLM( The default `local` environment `LocalREPL` runs in the same process as the RLM itself, with specified global and local namespaces for minimal security. Using this REPL is generally safe, but should not be used for production settings. It also shares the same virtual environment (e.g. Conda or uv) as the host process. #### Monty -We also support a Monty-based environment called `MontyREPL` that runs code inside the Monty sandboxed interpreter. Monty is fast and sandboxed, but it supports only a limited subset of Python and a small standard library. +We also support a Monty-based environment called `MontyREPL` that runs code inside the Monty sandboxed interpreter. Monty is fast and sandboxed, but it supports only a limited subset of Python and a small standard library. Monty also supports `persistent=True` for multi-turn mode (state retained via host-side locals). To install Monty support: ```bash diff --git a/rlm/core/rlm.py b/rlm/core/rlm.py index 5f303928..a20905a0 100644 --- a/rlm/core/rlm.py +++ b/rlm/core/rlm.py @@ -369,7 +369,7 @@ def _validate_persistent_environment_support(self) -> None: ValueError: If the environment type does not support persistent mode. """ # Known environments that support persistence - persistent_supported_environments = {"local"} + persistent_supported_environments = {"local", "monty"} if self.environment_type not in persistent_supported_environments: raise ValueError( diff --git a/rlm/environments/monty_repl.py b/rlm/environments/monty_repl.py index 8ab405b7..de95a528 100644 --- a/rlm/environments/monty_repl.py +++ b/rlm/environments/monty_repl.py @@ -7,6 +7,7 @@ from __future__ import annotations import ast +import copy import time from typing import Any, Literal @@ -148,6 +149,8 @@ def __init__( self.locals: dict[str, Any] = {} self.pending_llm_calls: list[RLMChatCompletion] = [] self.stdout_parts: list[str] = [] + self._context_count = 0 + self._history_count = 0 self.setup() @@ -162,11 +165,12 @@ def setup(self) -> None: self.locals = {} self.pending_llm_calls = [] self.stdout_parts = [] + self._context_count = 0 + self._history_count = 0 def load_context(self, context_payload: dict | list | str) -> None: """Load context into the environment as context_0 (and 'context' alias).""" - self.locals["context_0"] = context_payload - self.locals["context"] = context_payload + self.add_context(context_payload, 0) def execute_code(self, code: str) -> REPLResult: """Execute code in the Monty sandbox and return result.""" @@ -261,6 +265,48 @@ def collect_assigned_names(code: str) -> set[str]: collector.visit(tree) return collector.names + def update_handler_address(self, address: tuple[str, int]) -> None: + """Update the LM handler address for a new completion call.""" + self.lm_handler_address = address + + def add_context( + self, context_payload: dict | list | str, context_index: int | None = None + ) -> int: + """Add a context with versioned variable name.""" + if context_index is None: + context_index = self._context_count + + var_name = f"context_{context_index}" + self.locals[var_name] = context_payload + if context_index == 0: + self.locals["context"] = context_payload + + self._context_count = max(self._context_count, context_index + 1) + return context_index + + def get_context_count(self) -> int: + """Return the number of contexts loaded.""" + return self._context_count + + def add_history( + self, message_history: list[dict[str, Any]], history_index: int | None = None + ) -> int: + """Store a conversation's message history as a versioned variable.""" + if history_index is None: + history_index = self._history_count + + var_name = f"history_{history_index}" + self.locals[var_name] = copy.deepcopy(message_history) + if history_index == 0: + self.locals["history"] = self.locals[var_name] + + self._history_count = max(self._history_count, history_index + 1) + return history_index + + def get_history_count(self) -> int: + """Return the number of conversation histories stored.""" + return self._history_count + def final_var(self, variable_name: str) -> str: """Return the value of a variable as a final answer.""" variable_name = variable_name.strip().strip("\"'") diff --git a/tests/test_monty_repl.py b/tests/test_monty_repl.py index 080382f6..7c616a73 100644 --- a/tests/test_monty_repl.py +++ b/tests/test_monty_repl.py @@ -28,3 +28,18 @@ def test_final_var(self): repl.execute_code("x = 10") result = repl.execute_code("print(FINAL_VAR('x'))") assert "10" in result.stdout + + def test_context_and_history_counts(self): + repl = MontyREPL() + assert repl.get_context_count() == 0 + assert repl.get_history_count() == 0 + + repl.add_context({"a": 1}) + assert repl.get_context_count() == 1 + assert "context_0" in repl.locals + assert "context" in repl.locals + + repl.add_history([{"role": "user", "content": "hi"}]) + assert repl.get_history_count() == 1 + assert "history_0" in repl.locals + assert "history" in repl.locals diff --git a/tests/test_multi_turn_integration.py b/tests/test_multi_turn_integration.py index f55de39b..024e9986 100644 --- a/tests/test_multi_turn_integration.py +++ b/tests/test_multi_turn_integration.py @@ -345,6 +345,17 @@ def test_local_environment_supported(self): ) assert rlm.persistent is True + def test_monty_environment_supported(self): + """Monty environment should support persistent mode.""" + pytest.importorskip("pydantic_monty") + rlm = RLM( + backend="openai", + backend_kwargs={"model_name": "test"}, + environment="monty", + persistent=True, + ) + assert rlm.persistent is True + class TestMultiTurnEndToEnd: """End-to-end tests simulating realistic multi-turn usage.""" From f6491efee588cb8de485e492ac2fb5887bb1aa95 Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Sun, 8 Feb 2026 20:03:32 -0700 Subject: [PATCH 3/6] Update README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 53755d96..d71c2070 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ make quickstart ## REPL Environments -We support two types of REPL environments -- isolated, and non-isolated. Non-isolated environments (default) run code execution on the same machine as the RLM (e.g. through `exec`), which is pretty reasonable for some local low-risk tasks, like simple benchmarking, but can be problematic if the prompts or tool calls can interact with malicious users. Fully isolated environments used Cloud-based sandboxes (e.g. Prime Sandboxes, [Modal Sandboxes](https://modal.com/docs/guide/sandboxes)) to run code generated by the RLM, ensuring completely isolation from the host process. Environments can be added, but we natively support the following: `local` (default), `monty`, `modal`, `prime`. +We support two types of REPL environments -- isolated, and non-isolated. Non-isolated environments (default) run code execution on the same machine as the RLM (e.g. through `exec`), which is pretty reasonable for some local low-risk tasks, like simple benchmarking, but can be problematic if the prompts or tool calls can interact with malicious users. Fully isolated environments use Cloud-based sandboxes (e.g. Prime Sandboxes, [Modal Sandboxes](https://modal.com/docs/guide/sandboxes)) to run code generated by the RLM, ensuring complete isolation from the host process. Environments can be added, but we natively support the following: `local` (default), `monty`, `modal`, `prime`. ```python rlm = RLM( From d8c40d164f5d5617b02601ddf6ebce50a8142c81 Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Sun, 8 Feb 2026 20:14:27 -0700 Subject: [PATCH 4/6] Address Monty REPL review feedback --- rlm/environments/monty_repl.py | 103 +++++++++++++++++++++++++++++---- tests/test_imports.py | 1 + tests/test_monty_repl.py | 10 ++++ 3 files changed, 104 insertions(+), 10 deletions(-) diff --git a/rlm/environments/monty_repl.py b/rlm/environments/monty_repl.py index de95a528..865375e4 100644 --- a/rlm/environments/monty_repl.py +++ b/rlm/environments/monty_repl.py @@ -119,6 +119,43 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None: self.generic_visit(node) self.scope_depth -= 1 + def visit_NamedExpr(self, node: ast.NamedExpr) -> None: + if self.scope_depth == 0: + self.add_target(node.target) + self.generic_visit(node) + + def visit_Match(self, node: ast.Match) -> None: + if self.scope_depth == 0: + for case in node.cases: + self.collect_match_pattern(case.pattern) + self.generic_visit(node) + + def collect_match_pattern(self, pattern: ast.pattern) -> None: + if isinstance(pattern, ast.MatchAs): + if pattern.name: + self.names.add(pattern.name) + if pattern.pattern is not None: + self.collect_match_pattern(pattern.pattern) + elif isinstance(pattern, ast.MatchStar): + if pattern.name: + self.names.add(pattern.name) + elif isinstance(pattern, ast.MatchMapping): + if pattern.rest: + self.names.add(pattern.rest) + for subpattern in pattern.patterns: + self.collect_match_pattern(subpattern) + elif isinstance(pattern, ast.MatchSequence): + for subpattern in pattern.patterns: + self.collect_match_pattern(subpattern) + elif isinstance(pattern, ast.MatchClass): + for subpattern in pattern.patterns: + self.collect_match_pattern(subpattern) + for subpattern in pattern.kwd_patterns: + self.collect_match_pattern(subpattern) + elif isinstance(pattern, ast.MatchOr): + for subpattern in pattern.patterns: + self.collect_match_pattern(subpattern) + class MontyREPL(NonIsolatedEnv): """ @@ -149,6 +186,7 @@ def __init__( self.locals: dict[str, Any] = {} self.pending_llm_calls: list[RLMChatCompletion] = [] self.stdout_parts: list[str] = [] + self.stderr_parts: list[str] = [] self._context_count = 0 self._history_count = 0 @@ -165,6 +203,7 @@ def setup(self) -> None: self.locals = {} self.pending_llm_calls = [] self.stdout_parts = [] + self.stderr_parts = [] self._context_count = 0 self._history_count = 0 @@ -176,13 +215,12 @@ def execute_code(self, code: str) -> REPLResult: """Execute code in the Monty sandbox and return result.""" start_time = time.perf_counter() self.stdout_parts = [] + self.stderr_parts = [] self.pending_llm_calls = [] wrapper_script = self.build_wrapper_script(code) external_functions = { "__rlm_capture_locals": self.capture_locals, - "FINAL_VAR": self.final_var, - "SHOW_VARS": self.show_vars, "llm_query": self.llm_query, "llm_query_batched": self.llm_query_batched, } @@ -204,15 +242,20 @@ def execute_code(self, code: str) -> REPLResult: return REPLResult( stdout="".join(self.stdout_parts), - stderr="", + stderr="".join(self.stderr_parts), locals=self.locals.copy(), execution_time=time.perf_counter() - start_time, rlm_calls=self.pending_llm_calls.copy(), ) except Exception as exc: + stderr = "".join(self.stderr_parts) + if stderr: + stderr = f"{stderr.rstrip()}\n{type(exc).__name__}: {exc}" + else: + stderr = f"{type(exc).__name__}: {exc}" return REPLResult( stdout="".join(self.stdout_parts), - stderr=f"{type(exc).__name__}: {exc}", + stderr=stderr, locals=self.locals.copy(), execution_time=time.perf_counter() - start_time, rlm_calls=self.pending_llm_calls.copy(), @@ -221,18 +264,56 @@ def execute_code(self, code: str) -> REPLResult: def build_wrapper_script(self, user_code: str) -> str: """Build a wrapper script that restores state, captures output, and persists locals.""" lines: list[str] = [] - for key in self.locals: - if key.isidentifier(): - lines.append(f"{key} = __rlm_state.get({key!r})") - - lines.append(user_code) - assigned_names = self.collect_assigned_names(user_code) persisted_names = { name for name in set(self.locals) | assigned_names if name.isidentifier() and not name.startswith("_") and name not in RESERVED_NAMES } + for name in sorted(persisted_names): + lines.append(f"{name} = __rlm_state.get({name!r})") + + lines.append("def FINAL_VAR(variable_name):") + lines.append(' variable_name = variable_name.strip().strip("\\"\'")') + for name in sorted(persisted_names): + lines.append(f" if variable_name == {name!r}:") + lines.append(" try:") + lines.append(f" return str({name})") + lines.append(" except NameError:") + lines.append(" pass") + lines.append(" available = []") + for name in sorted(persisted_names): + lines.append(" try:") + lines.append(f" {name}") + lines.append(f" available.append({name!r})") + lines.append(" except NameError:") + lines.append(" pass") + lines.append(" if available:") + lines.append( + " return f\"Error: Variable '{variable_name}' not found. " + "Available variables: {available}. " + 'You must create and assign a variable BEFORE calling FINAL_VAR on it."' + ) + lines.append( + " return f\"Error: Variable '{variable_name}' not found. " + "No variables have been created yet. " + 'You must create and assign a variable in a REPL block BEFORE calling FINAL_VAR on it."' + ) + + lines.append("def SHOW_VARS():") + lines.append(" available = {}") + for name in sorted(persisted_names): + lines.append(" try:") + lines.append(f" available[{name!r}] = type({name}).__name__") + lines.append(" except NameError:") + lines.append(" pass") + lines.append(" if not available:") + lines.append( + ' return "No variables created yet. Use ```repl``` blocks to create variables."' + ) + lines.append(' return f"Available variables: {available}"') + + lines.append(user_code) lines.append("__rlm_state_out = {}") for name in sorted(persisted_names): @@ -248,6 +329,8 @@ def handle_print_callback(self, stream: Literal["stdout", "stderr"], text: str) """Collect printed output from Monty.""" if stream == "stdout": self.stdout_parts.append(text) + elif stream == "stderr": + self.stderr_parts.append(text) def capture_locals(self, vars_dict: dict[str, Any]) -> None: """Capture locals after execution.""" diff --git a/tests/test_imports.py b/tests/test_imports.py index 4b02c263..18355db7 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -517,4 +517,5 @@ def test_all_environment_classes_importable(self): assert isinstance(MontyREPL, type) except Exception: + # Optional dependency may not be available in CI. pass diff --git a/tests/test_monty_repl.py b/tests/test_monty_repl.py index 7c616a73..765db1f1 100644 --- a/tests/test_monty_repl.py +++ b/tests/test_monty_repl.py @@ -29,6 +29,16 @@ def test_final_var(self): result = repl.execute_code("print(FINAL_VAR('x'))") assert "10" in result.stdout + def test_final_var_same_block(self): + repl = MontyREPL() + result = repl.execute_code("x = 3\nprint(FINAL_VAR('x'))") + assert "3" in result.stdout + + def test_stderr_capture(self): + repl = MontyREPL() + repl.handle_print_callback("stderr", "oops") + assert "oops" in repl.stderr_parts + def test_context_and_history_counts(self): repl = MontyREPL() assert repl.get_context_count() == 0 From bd730e50f2f160f96e69a85c63553f2dd98afc9a Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Sun, 8 Feb 2026 20:16:50 -0700 Subject: [PATCH 5/6] Resolve Monty persistence review notes --- rlm/core/rlm.py | 2 +- rlm/environments/monty_repl.py | 4 ---- tests/test_multi_turn_integration.py | 30 +++++++++++++++++++++------- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/rlm/core/rlm.py b/rlm/core/rlm.py index a20905a0..b7538c0d 100644 --- a/rlm/core/rlm.py +++ b/rlm/core/rlm.py @@ -363,7 +363,7 @@ def _validate_persistent_environment_support(self) -> None: - add_context(payload, index): Add new context for multi-turn conversations - get_context_count(): Return the number of loaded contexts - Currently only 'local' (LocalREPL) supports these methods. + Currently only 'local' (LocalREPL) and 'monty' (MontyREPL) support these methods. Raises: ValueError: If the environment type does not support persistent mode. diff --git a/rlm/environments/monty_repl.py b/rlm/environments/monty_repl.py index 865375e4..89bac92a 100644 --- a/rlm/environments/monty_repl.py +++ b/rlm/environments/monty_repl.py @@ -175,10 +175,6 @@ def __init__( resource_limits: pydantic_monty.ResourceLimits | None = None, **kwargs: Any, ) -> None: - if persistent: - raise NotImplementedError( - "Persistent REPLs are currently not supported for environment: MontyREPL" - ) super().__init__(persistent=persistent, depth=depth, **kwargs) self.lm_handler_address = lm_handler_address diff --git a/tests/test_multi_turn_integration.py b/tests/test_multi_turn_integration.py index 024e9986..25530586 100644 --- a/tests/test_multi_turn_integration.py +++ b/tests/test_multi_turn_integration.py @@ -348,13 +348,29 @@ def test_local_environment_supported(self): def test_monty_environment_supported(self): """Monty environment should support persistent mode.""" pytest.importorskip("pydantic_monty") - rlm = RLM( - backend="openai", - backend_kwargs={"model_name": "test"}, - environment="monty", - persistent=True, - ) - assert rlm.persistent is True + responses = [ + "```repl\nx = 2\nprint(x)\n```\nFINAL(done)", + "```repl\nprint(x)\n```\nFINAL(done)", + ] + + with patch.object(rlm_module, "get_client") as mock_get_client: + mock_lm = create_mock_lm(responses) + mock_get_client.return_value = mock_lm + + with RLM( + backend="openai", + backend_kwargs={"model_name": "test"}, + environment="monty", + persistent=True, + ) as rlm: + rlm.completion("First turn") + mock_lm.completion.side_effect = list(responses[1:]) + rlm.completion("Second turn") + + env = rlm._persistent_env + assert env is not None + assert env.get_context_count() == 2 + assert "x" in env.locals class TestMultiTurnEndToEnd: From 58433808e5fe66e3d06fb1d70182074cd4563cdd Mon Sep 17 00:00:00 2001 From: Cyrus <1880797+CyrusNuevoDia@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:37:45 -0700 Subject: [PATCH 6/6] Address final Monty REPL review comments Fix cleanup() to clear stderr_parts and reset counters, guard state restoration to avoid silently setting variables to None, remove dead final_var()/show_vars() instance methods, and add execution-level stderr capture test. Co-Authored-By: Claude Opus 4.6 --- rlm/environments/monty_repl.py | 32 +++++--------------------------- tests/test_monty_repl.py | 5 +++++ 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/rlm/environments/monty_repl.py b/rlm/environments/monty_repl.py index 89bac92a..70b16209 100644 --- a/rlm/environments/monty_repl.py +++ b/rlm/environments/monty_repl.py @@ -267,7 +267,8 @@ def build_wrapper_script(self, user_code: str) -> str: if name.isidentifier() and not name.startswith("_") and name not in RESERVED_NAMES } for name in sorted(persisted_names): - lines.append(f"{name} = __rlm_state.get({name!r})") + lines.append(f"if {name!r} in __rlm_state:") + lines.append(f" {name} = __rlm_state[{name!r}]") lines.append("def FINAL_VAR(variable_name):") lines.append(' variable_name = variable_name.strip().strip("\\"\'")') @@ -386,32 +387,6 @@ def get_history_count(self) -> int: """Return the number of conversation histories stored.""" return self._history_count - def final_var(self, variable_name: str) -> str: - """Return the value of a variable as a final answer.""" - variable_name = variable_name.strip().strip("\"'") - if variable_name in self.locals: - return str(self.locals[variable_name]) - - available = [k for k in self.locals.keys() if not k.startswith("_")] - if available: - return ( - f"Error: Variable '{variable_name}' not found. " - f"Available variables: {available}. " - "You must create and assign a variable BEFORE calling FINAL_VAR on it." - ) - return ( - f"Error: Variable '{variable_name}' not found. " - "No variables have been created yet. " - "You must create and assign a variable in a REPL block BEFORE calling FINAL_VAR on it." - ) - - def show_vars(self) -> str: - """Show all available variables in the REPL environment.""" - available = {k: type(v).__name__ for k, v in self.locals.items() if not k.startswith("_")} - if not available: - return "No variables created yet. Use ```repl``` blocks to create variables." - return f"Available variables: {available}" - def llm_query(self, prompt: str, model: str | None = None) -> str: """Query the LM via socket connection to the handler.""" if not self.lm_handler_address: @@ -455,6 +430,9 @@ def cleanup(self) -> None: self.locals.clear() self.pending_llm_calls.clear() self.stdout_parts.clear() + self.stderr_parts.clear() + self._context_count = 0 + self._history_count = 0 def __enter__(self) -> MontyREPL: return self diff --git a/tests/test_monty_repl.py b/tests/test_monty_repl.py index 765db1f1..fc647408 100644 --- a/tests/test_monty_repl.py +++ b/tests/test_monty_repl.py @@ -39,6 +39,11 @@ def test_stderr_capture(self): repl.handle_print_callback("stderr", "oops") assert "oops" in repl.stderr_parts + def test_stderr_from_execution(self): + repl = MontyREPL() + result = repl.execute_code("1 / 0") + assert "ZeroDivisionError" in result.stderr + def test_context_and_history_counts(self): repl = MontyREPL() assert repl.get_context_count() == 0