alexzhang13
diff --git a/‎examples/compaction_example.py‎
Lines changed: 65 additions & 0 deletions b/‎examples/compaction_example.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎rlm/core/rlm.py‎
Lines changed: 83 additions & 3 deletions b/‎rlm/core/rlm.py‎
Lines changed: 83 additions & 3 deletions
diff --git a/‎rlm/core/types.py‎
Lines changed: 4 additions & 0 deletions b/‎rlm/core/types.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎rlm/environments/local_repl.py‎
Lines changed: 36 additions & 5 deletions b/‎rlm/environments/local_repl.py‎
Lines changed: 36 additions & 5 deletions
@@ -0,0 +1,65 @@
+"""
+Compaction example: run RLM with compaction enabled and a low threshold
+to trigger summarization on gpt-5-nano.
+
+Uses a low compaction_threshold_pct so compaction runs after several iterations.
+The task forces many separate REPL turns with substantial output so the root
+context grows and compaction definitely triggers. The REPL variable `history`
+holds trajectory segments and summaries.
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from rlm import RLM
+from rlm.logger import RLMLogger
+
+load_dotenv()
+
+# Low threshold so compaction triggers after a few iterations (~2% of context).
+# Use 0.85 in production.
+COMPACTION_THRESHOLD_PCT = 0.03
+
+logger = RLMLogger()
+rlm = RLM(
+    backend="portkey",
+    backend_kwargs={
+        "model_name": "@openai/gpt-5-nano",
+        "api_key": os.getenv("PORTKEY_API_KEY"),
+    },
+    environment="local",
+    environment_kwargs={},
+    max_depth=1,
+    max_iterations=10,
+    compaction=True,
+    compaction_threshold_pct=COMPACTION_THRESHOLD_PCT,
+    verbose=True,
+    logger=logger,
+)
+
+# Hard task that forces many iterations: find the 50th prime using at least 8
+# separate REPL blocks, one per turn. Each block must produce visible output.
+# This grows message history (long reasoning + code + execution results) so
+# compaction triggers.
+prompt = (
+    "Find the 50th prime number. You MUST use at least 8 separate REPL blocks, "
+    "each in its own response (one block per message). Do NOT combine steps.\n\n"
+    "Required structure: "
+    "Block 1: Define a function is_prime(n) and test it on a few numbers; print the results. "
+    "Block 2: Write a loop that counts primes and print the first 10 primes. "
+    "Block 3: Extend to count up to 20 primes and print them. "
+    "Block 4: Count up to 30 primes and print the 25th. "
+    "Block 5: Count up to 40 primes and print the 35th. "
+    "Block 6: Count up to 50 primes and print the 45th. "
+    "Block 7: Print the full list of the first 50 primes (so we see all 50). "
+    "Block 8: Set answer = (the 50th prime) and call FINAL_VAR(answer).\n\n"
+    "Each block must run alone. Show your reasoning briefly before each block. "
+    "After each block, wait for the execution result before writing the next block."
+)
+
+result = rlm.completion(prompt, root_prompt=prompt)
+
+print("Response:", result.response)
+print("Execution time:", result.execution_time)
+print("Metadata:", result.metadata)
@@ -27,6 +27,7 @@
     build_user_prompt,
 )
 from rlm.utils.rlm_utils import filter_sensitive_keys
+from rlm.utils.token_utils import count_tokens, get_context_limit
 
 
 class RLM:
@@ -54,6 +55,8 @@ def __init__(
         persistent: bool = False,
         custom_tools: dict[str, Any] | None = None,
         custom_sub_tools: dict[str, Any] | None = None,
+        compaction: bool = False,
+        compaction_threshold_pct: float = 0.85,
     ):
         """
         Args:
@@ -74,6 +77,10 @@ def __init__(
                 values are callable functions. These are injected into the REPL globals.
             custom_sub_tools: Dict of custom tools for sub-agents (llm_query calls). If None, inherits
                 from custom_tools. Pass an empty dict {} to disable tools for sub-agents.
+            compaction: If True, keep full root model history in REPL variable `history` and compact
+                when root context reaches compaction_threshold_pct of the model's context limit.
+            compaction_threshold_pct: When compaction is on, trigger summarization when root
+                message token count reaches this fraction of the model context limit (default 0.85).
         """
         # Store config for spawning per-completion
         self.backend = backend
@@ -98,6 +105,9 @@ def __init__(
         # Sub-tools: if None, inherit from custom_tools; if {}, no tools for sub-agents
         self.custom_sub_tools = custom_sub_tools if custom_sub_tools is not None else custom_tools
 
+        self.compaction = compaction
+        self.compaction_threshold_pct = compaction_threshold_pct
+
         self.depth = depth
         self.max_depth = max_depth
         self.max_iterations = max_iterations
@@ -181,6 +191,8 @@ def _spawn_completion_context(self, prompt: str | dict[str, Any]):
                 env_kwargs["custom_tools"] = self.custom_tools
             if self.custom_sub_tools is not None:
                 env_kwargs["custom_sub_tools"] = self.custom_sub_tools
+            if self.compaction and self.environment_type == "local":
+                env_kwargs["compaction"] = True
             environment: BaseEnv = get_environment(self.environment_type, env_kwargs)
 
             if self.persistent:
@@ -204,7 +216,11 @@ def _setup_prompt(self, prompt: str | dict[str, Any]) -> list[dict[str, Any]]:
             query_metadata=metadata,
             custom_tools=self.custom_tools,
         )
-
+        if self.compaction:
+            message_history[0]["content"] += (
+                "\n\nThe full conversation history (trajectory segments and any summaries) "
+                "is available in the REPL variable `history` as a list."
+            )
         return message_history
 
     def completion(
@@ -236,6 +252,18 @@ def completion(
             message_history = self._setup_prompt(prompt)
 
             for i in range(self.max_iterations):
+                if self.compaction and hasattr(environment, "append_compaction_entry"):
+                    current_tokens, threshold_tokens, max_tokens = self._get_compaction_status(
+                        message_history
+                    )
+                    self.verbose.print_compaction_status(
+                        current_tokens, threshold_tokens, max_tokens
+                    )
+                    if current_tokens >= threshold_tokens:
+                        self.verbose.print_compaction()
+                        message_history = self._compact_history(
+                            lm_handler, environment, message_history
+                        )
                 # Current prompt = message history + additional prompt suffix
                 context_count = (
                     environment.get_context_count()
@@ -257,8 +285,14 @@ def completion(
                     environment=environment,
                 )
 
-                # Check if RLM is done and has a final answer.
-                final_answer = find_final_answer(iteration.response, environment=environment)
+                # Check if RLM is done and has a final answer. Prefer FINAL_VAR result from REPL execution.
+                final_answer = None
+                for block in iteration.code_blocks:
+                    if getattr(block.result, "final_answer", None):
+                        final_answer = block.result.final_answer
+                        break
+                if final_answer is None:
+                    final_answer = find_final_answer(iteration.response, environment=environment)
                 iteration.final_answer = final_answer
 
                 # If logger is used, log the iteration.
@@ -294,6 +328,8 @@ def completion(
 
                 # Update message history with the new messages.
                 message_history.extend(new_messages)
+                if self.compaction and hasattr(environment, "append_compaction_entry"):
+                    environment.append_compaction_entry(new_messages)
 
             # Default behavior: we run out of iterations, provide one final answer
             time_end = time.perf_counter()
@@ -317,6 +353,50 @@ def completion(
                 metadata=self.logger.get_trajectory() if self.logger else None,
             )
 
+    def _get_compaction_status(self, message_history: list[dict[str, Any]]) -> tuple[int, int, int]:
+        """Return (current_tokens, threshold_tokens, max_tokens) for compaction."""
+        model_name = (
+            self.backend_kwargs.get("model_name", "unknown") if self.backend_kwargs else "unknown"
+        )
+        max_tokens = get_context_limit(model_name)
+        current_tokens = count_tokens(message_history, model_name)
+        threshold_tokens = int(self.compaction_threshold_pct * max_tokens)
+        return current_tokens, threshold_tokens, max_tokens
+
+    def _should_compact(self, message_history: list[dict[str, Any]]) -> bool:
+        """True when root message history is at or over the compaction threshold."""
+        current_tokens, threshold_tokens, _ = self._get_compaction_status(message_history)
+        return current_tokens >= threshold_tokens
+
+    def _compact_history(
+        self,
+        lm_handler: LMHandler,
+        environment: BaseEnv,
+        message_history: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        """
+        Summarize current trajectory, append summary to REPL history, and return
+        a short message_history with the summary as the new starting point.
+        """
+        summary_prompt = message_history + [
+            {
+                "role": "user",
+                "content": "Very concisely summarize what you have been doing so far in 1–3 short paragraphs. Be extremely brief. This summary will be used to continue the conversation.",
+            }
+        ]
+        summary = lm_handler.completion(summary_prompt)
+        if hasattr(environment, "append_compaction_entry"):
+            environment.append_compaction_entry({"type": "summary", "content": summary})
+        # Keep system + initial assistant (metadata), then summary + continue
+        new_history = message_history[:2] + [
+            {"role": "assistant", "content": summary},
+            {
+                "role": "user",
+                "content": "Continue from the above summary. The full history (including this summary) is in the REPL variable `history`. Your next action:",
+            },
+        ]
+        return new_history
+
     def _completion_turn(
         self,
         prompt: str | dict[str, Any],
 
@@ -131,6 +131,7 @@ class REPLResult:
     locals: dict
     execution_time: float
     llm_calls: list["RLMChatCompletion"]
+    final_answer: str | None = None
 
     def __init__(
         self,
@@ -139,12 +140,14 @@ def __init__(
         locals: dict,
         execution_time: float = None,
         rlm_calls: list["RLMChatCompletion"] = None,
+        final_answer: str | None = None,
     ):
         self.stdout = stdout
         self.stderr = stderr
         self.locals = locals
         self.execution_time = execution_time
         self.rlm_calls = rlm_calls or []
+        self.final_answer = final_answer
 
     def __str__(self):
         return f"REPLResult(stdout={self.stdout}, stderr={self.stderr}, locals={self.locals}, execution_time={self.execution_time}, rlm_calls={len(self.rlm_calls)})"
@@ -156,6 +159,7 @@ def to_dict(self):
             "locals": {k: _serialize_value(v) for k, v in self.locals.items()},
             "execution_time": self.execution_time,
             "rlm_calls": [call.to_dict() for call in self.rlm_calls],
+            "final_answer": self.final_answer,
         }
 
 
 
@@ -132,6 +132,7 @@ def __init__(
         depth: int = 1,
         custom_tools: dict[str, Any] | None = None,
         custom_sub_tools: dict[str, Any] | None = None,
+        compaction: bool = False,
         **kwargs,
     ):
         super().__init__(persistent=persistent, depth=depth, **kwargs)
@@ -142,6 +143,7 @@ def __init__(
         self._lock = threading.Lock()
         self._context_count: int = 0
         self._history_count: int = 0
+        self.compaction = compaction
 
         # Custom tools: functions available in the REPL
         self.custom_tools = custom_tools or {}
@@ -156,6 +158,10 @@ def __init__(
         # Setup globals, locals, and modules in environment.
         self.setup()
 
+        if compaction:
+            self._compaction_history: list[Any] = []
+            self.locals["history"] = self._compaction_history
+
         # Load context if provided
         if context_payload is not None:
             self.load_context(context_payload)
@@ -175,6 +181,8 @@ def setup(self):
 
         # Track LLM calls made during code execution
         self._pending_llm_calls: list[RLMChatCompletion] = []
+        # When FINAL_VAR is called inside a REPL block, we store the value here for the main loop
+        self._last_final_answer: str | None = None
 
         # Add helper functions
         self.globals["FINAL_VAR"] = self._final_var
@@ -192,13 +200,19 @@ def setup(self):
                 # For non-callable values (constants, data), add to locals
                 self.locals[name] = value
 
-    def _final_var(self, variable_name: str) -> str:
-        """Return the value of a variable as a final answer."""
+    def _final_var(self, variable_name: str | Any) -> str:
+        """Return the value of a variable as a final answer for the main model, or stringify a direct value."""
+        if not isinstance(variable_name, str):
+            answer = str(variable_name)
+            self._last_final_answer = answer
+            return answer
         variable_name = variable_name.strip().strip("\"'")
         if variable_name in self.locals:
-            return str(self.locals[variable_name])
+            answer = str(self.locals[variable_name])
+            self._last_final_answer = answer
+            return answer
 
-        # Provide helpful error message with available variables
+        # Provide helpful error message with available variables (do not set _last_final_answer)
         available = [k for k in self.locals.keys() if not k.startswith("_")]
         if available:
             return (
@@ -358,6 +372,17 @@ def get_history_count(self) -> int:
         """Return the number of conversation histories stored."""
         return self._history_count
 
+    def append_compaction_entry(self, entry: list[dict[str, Any]] | dict[str, Any]) -> None:
+        """
+        Append a trajectory segment or a summary to the compaction history.
+
+        Entry is either a list of message dicts (trajectory segment) or
+        a dict with "type": "summary" and "content": str.
+        """
+        if not self.compaction:
+            return
+        self._compaction_history.append(copy.deepcopy(entry))
+
     @contextmanager
     def _capture_output(self):
         """Thread-safe context manager to capture stdout/stderr."""
@@ -393,8 +418,10 @@ def _restore_scaffold(self) -> None:
                 self.globals["SHOW_VARS"] = self._show_vars
             elif name == "context" and "context_0" in self.locals:
                 self.locals["context"] = self.locals["context_0"]
-            elif name == "history" and "history_0" in self.locals:
+            elif name == "history" and "history_0" in self.locals and not self.compaction:
                 self.locals["history"] = self.locals["history_0"]
+            elif name == "history" and self.compaction:
+                self.locals["history"] = self._compaction_history
 
     def execute_code(self, code: str) -> REPLResult:
         """Execute code in the persistent namespace and return result."""
@@ -422,12 +449,16 @@ def execute_code(self, code: str) -> REPLResult:
                 stdout = stdout_buf.getvalue()
                 stderr = stderr_buf.getvalue() + f"\n{type(e).__name__}: {e}"
 
+        final_answer = self._last_final_answer
+        self._last_final_answer = None
+
         return REPLResult(
             stdout=stdout,
             stderr=stderr,
             locals=self.locals.copy(),
             execution_time=time.perf_counter() - start_time,
             rlm_calls=self._pending_llm_calls.copy(),
+            final_answer=final_answer,
         )
 
     def __enter__(self):