[Tunix] Update BaseAgent to accept observations with a "prompts" key rather than "question".

lc5211 · The tunix Authors · commit efb4913f9d00 · 2026-02-26T15:28:22.000-08:00
PiperOrigin-RevId: 875916766
diff --git a/examples/deepscaler/train_deepscaler_nb.py b/examples/deepscaler/train_deepscaler_nb.py
@@ -304,11 +304,6 @@ def process_item(item):
         "Let's think step by step, and put your final answer within \\boxed{}."
     )
     prompt = f"{question} {instruction}"
-    prompt = tokenizer.apply_chat_template(
-        [{"role": "user", "content": prompt}],
-        tokenize=False,
-        add_generation_prompt=True,
-    )
 
     return {
         "prompts": prompt,
@@ -326,7 +321,7 @@ def process_item(item):
 tokenizer_source = MODEL_PATH if NOTEBOOK_ENV == "g3" else MODEL_VERSION
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_source)
 
-chat_parser = parser.QwenChatTemplateParser(tokenizer)
+chat_parser = parser.DefaultChatTemplateParser(tokenizer)
 
 # %%
 train_dataset, test_dataset = create_datasets()
diff --git a/tunix/rl/agentic/agents/base_agent.py b/tunix/rl/agentic/agents/base_agent.py
@@ -168,7 +168,13 @@ def _observation_to_messages(
       info: Additional information from the environment.
     """
     del reward, done, info  # Unused in default implementation.
-    if isinstance(observation, dict) and "question" in observation:
+    # prompts should not be applied with template beforehand to avoid double
+    # templating.
+    if isinstance(observation, dict) and "prompts" in observation:
+      self._messages.append(
+          {"role": "user", "content": observation["prompts"]}
+      )
+    elif isinstance(observation, dict) and "question" in observation:
       self._messages.append(
           {"role": "user", "content": observation["question"]}
       )