Skip to content

Commit efb4913

Browse files
lc5211The tunix Authors
authored andcommitted
[Tunix] Update BaseAgent to accept observations with a "prompts" key rather than "question".
PiperOrigin-RevId: 875916766
1 parent 38d3834 commit efb4913

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

examples/deepscaler/train_deepscaler_nb.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,6 @@ def process_item(item):
304304
"Let's think step by step, and put your final answer within \\boxed{}."
305305
)
306306
prompt = f"{question} {instruction}"
307-
prompt = tokenizer.apply_chat_template(
308-
[{"role": "user", "content": prompt}],
309-
tokenize=False,
310-
add_generation_prompt=True,
311-
)
312307

313308
return {
314309
"prompts": prompt,
@@ -326,7 +321,7 @@ def process_item(item):
326321
tokenizer_source = MODEL_PATH if NOTEBOOK_ENV == "g3" else MODEL_VERSION
327322
tokenizer = AutoTokenizer.from_pretrained(tokenizer_source)
328323

329-
chat_parser = parser.QwenChatTemplateParser(tokenizer)
324+
chat_parser = parser.DefaultChatTemplateParser(tokenizer)
330325

331326
# %%
332327
train_dataset, test_dataset = create_datasets()

tunix/rl/agentic/agents/base_agent.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,13 @@ def _observation_to_messages(
168168
info: Additional information from the environment.
169169
"""
170170
del reward, done, info # Unused in default implementation.
171-
if isinstance(observation, dict) and "question" in observation:
171+
# prompts should not be applied with template beforehand to avoid double
172+
# templating.
173+
if isinstance(observation, dict) and "prompts" in observation:
174+
self._messages.append(
175+
{"role": "user", "content": observation["prompts"]}
176+
)
177+
elif isinstance(observation, dict) and "question" in observation:
172178
self._messages.append(
173179
{"role": "user", "content": observation["question"]}
174180
)

0 commit comments

Comments
 (0)