bump trl to 0.29

hjh0119 · hjh0119 · commit aea9b10256d8 · 2026-03-23T17:10:53.000+08:00
diff --git a/README.md b/README.md
@@ -145,7 +145,7 @@ Running Environment:
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.19 |                     |                                           |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                                           |
-| trl          | >=0.15,<0.29 | 0.28.0              | RLHF                                      |
+| trl          | >=0.15,<0.30 | 0.28.0              | RLHF                                      |
 | deepspeed    | >=0.14       | 0.18.8              | Training                                  |
 | vllm         | >=0.5.1      | 0.11.0/0.17.1       | Inference/Deployment                      |
 | sglang       | >=0.4.6      |          | Inference/Deployment                      |
diff --git a/README_CN.md b/README_CN.md
@@ -141,7 +141,7 @@ uv pip install -e . --torch-backend=auto
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.19 |                     |                    |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                    |
-| trl          | >=0.15,<0.29 | 0.28.0              | RLHF               |
+| trl          | >=0.15,<0.30 | 0.28.0              | RLHF               |
 | deepspeed    | >=0.14       | 0.18.8              | 训练                 |
 | vllm         | >=0.5.1      | 0.11.0/0.17.1        | 推理/部署              |
 | sglang       | >=0.4.6      |          | 推理/部署              |
diff --git a/docs/source/GetStarted/SWIFT-installation.md b/docs/source/GetStarted/SWIFT-installation.md
@@ -144,7 +144,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.19 |                     |                    |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                    |
-| trl          | >=0.15,<0.29 | 0.28.0              | RLHF               |
+| trl          | >=0.15,<0.30 | 0.28.0              | RLHF               |
 | deepspeed    | >=0.14       | 0.18.8              | 训练                 |
 | vllm         | >=0.5.1      | 0.11.0/0.17.1       | 推理/部署              |
 | sglang       | >=0.4.6      |          | 推理/部署              |
diff --git a/docs/source/Megatron-SWIFT/Quick-start.md b/docs/source/Megatron-SWIFT/Quick-start.md
@@ -73,7 +73,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | transformers | >=4.33       | 4.57.6/5.2.0   |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.19 |             |      LoRA          |
-| trl          | >=0.15,<0.29 |       |      RLHF        |
+| trl          | >=0.15,<0.30 |       |      RLHF        |
 
 
 ## 快速入门案例
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -143,7 +143,7 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.19 |                     |                                           |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                                           |
-| trl          | >=0.15,<0.29 | 0.28.0              | RLHF                                      |
+| trl          | >=0.15,<0.30 | 0.28.0              | RLHF                                      |
 | deepspeed    | >=0.14       | 0.18.8              | Training                                  |
 | vllm         | >=0.5.1      | 0.11.0/0.17.1       | Inference/Deployment                      |
 | sglang       | >=0.4.6      |          | Inference/Deployment                      |
diff --git a/docs/source_en/Megatron-SWIFT/Quick-start.md b/docs/source_en/Megatron-SWIFT/Quick-start.md
@@ -73,7 +73,7 @@ Recommended Operating Environment:
 | transformers | >=4.33       | 4.57.6/5.2.0    |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.19 |             |      LoRA          |
-| trl          | >=0.15,<0.29 |       |      RLHF        |
+| trl          | >=0.15,<0.30 |       |      RLHF        |
 
 
 ## Quick Start Example
diff --git a/requirements/framework.txt b/requirements/framework.txt
@@ -35,6 +35,6 @@ tiktoken
 tqdm
 transformers>=4.33,<5.4.0
 transformers_stream_generator
-trl>=0.15,<0.29
+trl>=0.15,<0.30
 uvicorn
 zstandard
diff --git a/swift/megatron/trainers/dpo_trainer.py b/swift/megatron/trainers/dpo_trainer.py
@@ -13,12 +13,11 @@
 
 
 class DummyDPOTrainer(DPOTrainer):
-    # For reusing the dpo_loss function in TRL.
+    # For reusing the dpo_loss function implemented in Swift's DPOTrainer.
     def __init__(self, args):
-        from trl.trainer import FDivergenceConstants
         self.accelerator = namedtuple('Accelerator', ['device'])(device=get_current_device())
         self.f_alpha_divergence_coef = 1.
-        self.f_divergence_params = {FDivergenceConstants.ALPHA_DIVERGENCE_COEF_KEY: self.f_alpha_divergence_coef}
+        self.f_divergence_params = {'alpha_divergence_coef': self.f_alpha_divergence_coef}
         self.reference_free = args.reference_free
         self.label_smoothing = args.label_smoothing
         self.f_divergence_type = args.f_divergence_type
diff --git a/swift/rlhf_trainers/arguments.py b/swift/rlhf_trainers/arguments.py
@@ -28,6 +28,10 @@
 @dataclass
 class DPOConfig(TrainArgumentsMixin, HfDPOConfig):
     ld_alpha: Optional[float] = None  # compat trl==0.15
+    # Fields removed in trl 0.29, kept here for backward compatibility
+    rpo_alpha: Optional[float] = None
+    ref_adapter_name: Optional[str] = None
+    reference_free: Optional[bool] = None
 
     def __post_init__(self):
         TrainArgumentsMixin.__post_init__(self)
diff --git a/swift/rlhf_trainers/dpo_trainer.py b/swift/rlhf_trainers/dpo_trainer.py
diff --git a/swift/rlhf_trainers/gkd_trainer.py b/swift/rlhf_trainers/gkd_trainer.py
diff --git a/swift/rlhf_trainers/reward_trainer.py b/swift/rlhf_trainers/reward_trainer.py
diff --git a/swift/rlhf_trainers/rlhf_mixin.py b/swift/rlhf_trainers/rlhf_mixin.py