Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7939166
Add tests to ensure behavior consistency of chat template application
qgallouedec Feb 16, 2026
985e201
parametrize
qgallouedec Feb 16, 2026
49c7cc6
Support for GPT-OSS
qgallouedec Feb 17, 2026
5c2b319
Update doc
qgallouedec Feb 17, 2026
8e551f5
Add docstyle-ignore directive for gpt_oss_chat_template
qgallouedec Feb 17, 2026
42a3a8d
Merge branch 'main' into more-test-get_training_chat_template
qgallouedec Feb 17, 2026
499a111
Merge branch 'more-test-get_training_chat_template' into support-gpt-oss
qgallouedec Feb 17, 2026
4d9e40d
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 18, 2026
39aed19
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 18, 2026
a3c51d2
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 18, 2026
02fbb6e
be more conservative
qgallouedec Feb 18, 2026
17361c2
not usefull
qgallouedec Feb 18, 2026
7416e5f
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 20, 2026
3322788
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 23, 2026
01024d6
address tool analysis begin dropped
qgallouedec Feb 24, 2026
7f58bc7
docstyle ignore
qgallouedec Feb 24, 2026
9709506
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 24, 2026
6dea19d
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 25, 2026
e9b5d4f
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 25, 2026
df6cba8
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 26, 2026
fd2c497
Merge branch 'main' into support-gpt-oss
qgallouedec Feb 27, 2026
92af268
Merge branch 'main' into support-gpt-oss
qgallouedec Mar 2, 2026
0d0edf8
Merge branch 'main' into support-gpt-oss
qgallouedec Mar 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/grpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ trainer = GRPOTrainer(

Tested with:

- **GPT-OSS** — e.g., `openai/gpt-oss-20b`
- **Qwen3** — e.g., `Qwen/Qwen3-0.6B`

> [!TIP]
Expand All @@ -665,6 +666,8 @@ accelerate launch \
...
```

You can also use `openai/gpt-oss-20b` as the base model.

## Vision-Language Model (VLM) Training

GRPO supports training Vision-Language Models (VLMs) on multimodal datasets containing both text and images.
Expand Down
35 changes: 25 additions & 10 deletions tests/test_chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,25 @@ def test_non_prefix_preserving_template(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gpt-oss"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
],
)
class TestGetTrainingChatTemplate:
@staticmethod
def _replace_end(text: str, old: str, new: str) -> str:
if text.endswith(old):
return text[: -len(old)] + new
return text

def _assert_equal(self, tokenizer_name: str, before: str, after: str) -> None:
# Same as `before == after` but with a special case for GPT-OSS.
# For GPT-OSS, the training template replaces the final <|return|> with <|end|> to ensure prefix preservation,
# so we expect a difference in the output.
if tokenizer_name == "trl-internal-testing/tiny-GptOssForCausalLM":
before = self._replace_end(before, "<|return|>", "<|end|>")
assert before == after

def test_new_chat_template_is_prefix_preserving(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
assert is_chat_template_prefix_preserving(tokenizer) is False
Expand All @@ -232,7 +247,7 @@ def test_behavior_unchanged_single_user_no_generation_prompt(self, tokenizer_nam
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_single_user_with_generation_prompt(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -245,7 +260,7 @@ def test_behavior_unchanged_single_user_with_generation_prompt(self, tokenizer_n
add_generation_prompt=True,
chat_template=new_chat_template,
)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_single_user_and_final_assistant_plain_content(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -256,7 +271,7 @@ def test_behavior_unchanged_single_user_and_final_assistant_plain_content(self,
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_final_assistant_with_reasoning_content(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -271,7 +286,7 @@ def test_behavior_unchanged_final_assistant_with_reasoning_content(self, tokeniz
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_final_assistant_with_existing_think_tags(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -285,7 +300,7 @@ def test_behavior_unchanged_final_assistant_with_existing_think_tags(self, token
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_assistant_with_tool_calls(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -300,7 +315,7 @@ def test_behavior_unchanged_assistant_with_tool_calls(self, tokenizer_name):
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_assistant_with_tool_calls_with_string_arguments(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -315,7 +330,7 @@ def test_behavior_unchanged_assistant_with_tool_calls_with_string_arguments(self
before = tokenizer.apply_chat_template(messages, tokenize=False)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_with_tools_with_and_without_system_message(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -340,7 +355,7 @@ def test_behavior_unchanged_with_tools_with_and_without_system_message(self, tok
before = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_with_tools_with_system_message(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -365,7 +380,7 @@ def test_behavior_unchanged_with_tools_with_system_message(self, tokenizer_name)
before = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools)
new_chat_template = get_training_chat_template(tokenizer)
after = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools, chat_template=new_chat_template)
assert before == after
self._assert_equal(tokenizer_name, before, after)

def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
Expand All @@ -381,7 +396,7 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
enable_thinking=False,
chat_template=new_chat_template,
)
assert before == after
self._assert_equal(tokenizer_name, before, after)


@pytest.mark.parametrize(
Expand Down
Loading
Loading