huggingface · robrui · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/trl/experimental/dppo/dppo_trainer.py b/trl/experimental/dppo/dppo_trainer.py
@@ -978,6 +978,11 @@ def _generate_and_score_completions(
             forward_kwargs = {k: v for k, v in prompt_inputs.items() if k not in ["input_ids", "attention_mask"]}
         else:
             forward_kwargs = {}
+            # For text-only models whose forward pass expects token_type_ids (e.g., gemma-3),
+            # create a zero tensor matching the prompt length. The extension block below will
+            # automatically pad it with zeros for the completion part.
+            if "token_type_ids" in self.model_kwarg_keys:
+                forward_kwargs["token_type_ids"] = torch.zeros_like(prompt_ids)
 
         # If token_type_ids are used, extend them with zeros for the completion part
         if "token_type_ids" in forward_kwargs:

diff --git a/trl/experimental/gfpo/gfpo_trainer.py b/trl/experimental/gfpo/gfpo_trainer.py
@@ -178,6 +178,11 @@ def _generate_and_score_completions(self, inputs):
             forward_kwargs = {k: v for k, v in prompt_inputs.items() if k not in ["input_ids", "attention_mask"]}
         else:
             forward_kwargs = {}
+            # For text-only models whose forward pass expects token_type_ids (e.g., gemma-3),
+            # create a zero tensor matching the prompt length. The extension block below will
+            # automatically pad it with zeros for the completion part.
+            if "token_type_ids" in self.model_kwarg_keys:
+                forward_kwargs["token_type_ids"] = torch.zeros_like(prompt_ids)
 
         # If token_type_ids are used, extend them with zeros for the completion part
         if "token_type_ids" in forward_kwargs:

diff --git a/trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_trainer.py b/trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_trainer.py
@@ -187,6 +187,11 @@ def _generate_and_score_completions(
             forward_kwargs = {k: v for k, v in prompt_inputs.items() if k not in ["input_ids", "attention_mask"]}
         else:
             forward_kwargs = {}
+            # For text-only models whose forward pass expects token_type_ids (e.g., gemma-3),
+            # create a zero tensor matching the prompt length. The extension block below will
+            # automatically pad it with zeros for the completion part.
+            if "token_type_ids" in self.model_kwarg_keys:
+                forward_kwargs["token_type_ids"] = torch.zeros_like(prompt_ids)
 
         # If token_type_ids are used, extend them with zeros for the completion part
         if "token_type_ids" in forward_kwargs:

diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -1938,6 +1938,11 @@ def _generate_and_score_completions(
             forward_kwargs = {k: v for k, v in prompt_inputs.items() if k not in ["input_ids", "attention_mask"]}
         else:
             forward_kwargs = {}
+            # For text-only models whose forward pass expects token_type_ids (e.g., gemma-3),
+            # create a zero tensor matching the prompt length. The extension block below will
+            # automatically pad it with zeros for the completion part.
+            if "token_type_ids" in self.model_kwarg_keys:
+                forward_kwargs["token_type_ids"] = torch.zeros_like(prompt_ids)
 
         # If token_type_ids are used, extend them with zeros for the completion part
         if "token_type_ids" in forward_kwargs:

diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
@@ -1171,6 +1171,11 @@ def _generate_and_score_completions(
             forward_kwargs = {k: v for k, v in prompt_inputs.items() if k not in ["input_ids", "attention_mask"]}
         else:
             forward_kwargs = {}
+            # For text-only models whose forward pass expects token_type_ids (e.g., gemma-3),
+            # create a zero tensor matching the prompt length. The extension block below will
+            # automatically pad it with zeros for the completion part.
+            if "token_type_ids" in self.model_kwarg_keys:
+                forward_kwargs["token_type_ids"] = torch.zeros_like(prompt_ids)
 
         # If token_type_ids are used, extend them with zeros for the completion part
         if "token_type_ids" in forward_kwargs: