google · copybara-service · Feb 26, 2026 · Feb 26, 2026
@@ -304,11 +304,6 @@ def process_item(item):
         "Let's think step by step, and put your final answer within \\boxed{}."
     )
     prompt = f"{question} {instruction}"
-    prompt = tokenizer.apply_chat_template(
-        [{"role": "user", "content": prompt}],
-        tokenize=False,
-        add_generation_prompt=True,
-    )
 
     return {
         "prompts": prompt,
@@ -326,7 +321,7 @@ def process_item(item):
 tokenizer_source = MODEL_PATH if NOTEBOOK_ENV == "g3" else MODEL_VERSION
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_source)
 
-chat_parser = parser.QwenChatTemplateParser(tokenizer)
+chat_parser = parser.DefaultChatTemplateParser(tokenizer)
 
 # %%
 train_dataset, test_dataset = create_datasets()

@@ -168,7 +168,13 @@ def _observation_to_messages(
       info: Additional information from the environment.
     """
     del reward, done, info  # Unused in default implementation.
-    if isinstance(observation, dict) and "question" in observation:
+    # prompts should not be applied with template beforehand to avoid double
+    # templating.
+    if isinstance(observation, dict) and "prompts" in observation:
+      self._messages.append(
+          {"role": "user", "content": observation["prompts"]}
+      )
+    elif isinstance(observation, dict) and "question" in observation:
       self._messages.append(
           {"role": "user", "content": observation["question"]}
       )