generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Open
Labels
Description
Reproduction
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer
dataset = load_dataset("trl-internal-testing/zen", 'standard_prompt_only', split="train")
def reward_func1(completions, **kwargs):
"""Reward function that rewards longer completions."""
return [float(len(completion)) for completion in completions]
training_args = GRPOConfig(
learning_rate=0.1,
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_generations=4,
max_completion_length=8,
report_to="none",
eval_on_start=True,
loss_type='grpo'
)
trainer = GRPOTrainer(
model="Qwen/Qwen2-0.5B-Instruct",
reward_funcs=reward_func1,
args=training_args,
train_dataset=dataset,
eval_dataset=dataset,
)
trainer.train()
outputs:
File "~/envs/torch_2/lib/python3.11/site-packages/transformers/trainer.py", line 2325, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/transformers/trainer.py", line 2576, in _inner_training_loop
self._evaluate(trial, ignore_keys_for_eval, skip_scheduler=True)
File "~/envs/torch_2/lib/python3.11/site-packages/transformers/trainer.py", line 3170, in _evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/transformers/trainer.py", line 4489, in evaluate
output = eval_loop(
^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/transformers/trainer.py", line 4685, in evaluation_loop
losses, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/trl/trainer/grpo_trainer.py", line 2283, in prediction_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/trl/extras/profiling.py", line 98, in wrapper
return func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/trl/trainer/grpo_trainer.py", line 2106, in compute_loss
return self._compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/envs/torch_2/lib/python3.11/site-packages/trl/trainer/grpo_trainer.py", line 2222, in _compute_loss
loss = loss / self.current_gradient_accumulation_steps
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'GRPOTrainer' object has no attribute 'current_gradient_accumulation_steps'
We can set current_gradient_accumulation_steps=1, in init, if eval_on_start is True
System Info
trl env
Copy-paste the following information when reporting an issue:
- Platform: macOS-15.7.2-arm64-arm-64bit
- Python version: 3.11.4
- TRL version: 0.26.0
- PyTorch version: 2.9.0
- accelerator(s): MPS
- Transformers version: 4.57.3
- Accelerate version: 1.12.0
- Accelerate config: not found
- Datasets version: 3.2.0
- HF Hub version: 0.36.0
- bitsandbytes version: not installed
- DeepSpeed version: not installed
- Liger-Kernel version: not installed
- LLM-Blender version: not installed
- OpenAI version: 2.9.0
- PEFT version: not installed
- vLLM version: 0.11.2
Checklist
- I have checked that my issue isn't already filed (see open issues)
- I have included my system information
- Any code provided is minimal, complete, and reproducible (more on MREs)
- Any code provided is properly formatted in code blocks, (no screenshot, more on code blocks)
- Any traceback provided is complete