[Tunix] Update ModelAgent to accept observations with a "prompts" key rather than "question".

lc5211 · The tunix Authors · commit ad3fde5fe8b6 · 2026-02-26T10:37:08.000-08:00
PiperOrigin-RevId: 875785435
diff --git a/tunix/rl/agentic/agents/model_agent.py b/tunix/rl/agentic/agents/model_agent.py
@@ -15,6 +15,7 @@
 """Agent implementation for single-turn interactions."""
 
 import copy
+from typing import Any, Dict
 
 from tunix.rl.agentic.agents import agent_types
 from tunix.rl.agentic.agents import base_agent
@@ -26,9 +27,31 @@ class ModelAgent(base_agent.ConversationAgentBase):
   def __init__(self, system_prompt: str):
     super().__init__(system_prompt=system_prompt)
 
-  # If you want to handle observations in a special way, you can override
-  # _observation_to_messages. Here, we stick to the default behavior of
-  # ConversationAgentBase.
+  def _observation_to_messages(
+      self, observation: Any, reward: float, done: bool, info: Dict[str, Any]
+  ) -> None:
+    """Convert environment observation into chat messages.
+
+    Default behavior:
+    * If observation is a dict containing "question", use it as user content.
+    * If observation is a string, append as a user message.
+    * Otherwise, do nothing.
+
+    Subclasses can override this to handle richer observation formats.
+
+    Args:
+      observation: The observation from the environment.
+      reward: The reward from the environment.
+      done: Whether the episode is done.
+      info: Additional information from the environment.
+    """
+    del reward, done, info  # Unused in default implementation.
+    if isinstance(observation, dict) and "prompts" in observation:
+      self._messages.append(
+          {"role": "user", "content": observation["prompts"]}
+      )
+    elif isinstance(observation, str):
+      self._messages.append({"role": "user", "content": observation})
 
   def update_from_model(self, response: str, **kwargs) -> agent_types.Action:
     """Receive model response and return it as the final action."""