|
14 | 14 |
|
15 | 15 | """Tests for agentic_rl_learner.""" |
16 | 16 |
|
| 17 | +import asyncio |
| 18 | +from typing import Any |
17 | 19 | from unittest import mock |
18 | 20 |
|
| 21 | +from absl import logging |
19 | 22 | from absl.testing import absltest |
20 | 23 | from absl.testing import parameterized |
| 24 | +from tunix.rl import rl_cluster as rl_cluster_lib |
| 25 | +from tunix.rl import utils as rl_utils |
21 | 26 | from tunix.rl.agentic import agentic_rl_learner |
22 | 27 | from tunix.rl.rollout import base_rollout |
23 | 28 |
|
@@ -135,6 +140,44 @@ def test_validate_rollout_config_vllm_missing_server_mode(self): |
135 | 140 | algo_config=algo_config, |
136 | 141 | ) |
137 | 142 |
|
| 143 | + def test_train_batch_size_mismatch_raises_error(self): |
| 144 | + with mock.patch.object( |
| 145 | + rl_utils, "is_sharing_weights", return_value=False |
| 146 | + ): |
| 147 | + rl_cluster = mock.Mock() |
| 148 | + rl_cluster.cluster_config = mock.Mock() |
| 149 | + rl_cluster.cluster_config.role_to_mesh = { |
| 150 | + rl_cluster_lib.Role.ACTOR: mock.Mock(), |
| 151 | + rl_cluster_lib.Role.ROLLOUT: mock.Mock(), |
| 152 | + } |
| 153 | + training_config = mock.Mock() |
| 154 | + training_config.compute_logps_micro_batch_size = 2 |
| 155 | + training_config.train_micro_batch_size = 1 |
| 156 | + training_config.mini_batch_size = None |
| 157 | + rl_cluster.cluster_config.training_config = training_config |
| 158 | + rl_cluster.cluster_config.rollout_config = base_rollout.RolloutConfig( |
| 159 | + max_tokens_to_generate=10, return_logprobs=True |
| 160 | + ) |
| 161 | + rl_cluster.cluster_config.rollout_engine = 'generic' |
| 162 | + rl_cluster.actor_trainer = mock.Mock() |
| 163 | + rl_cluster.actor_trainer.restored_global_step.return_value = 0 |
| 164 | + rl_cluster.actor_trainer.iter_steps = 0 |
| 165 | + rl_cluster.rollout = mock.Mock() |
| 166 | + rl_cluster.tokenizer = mock.Mock() |
| 167 | + algo_config = agentic_rl_learner.AgenticRLConfig(max_response_length=10) |
| 168 | + learner = DummyLearner( |
| 169 | + rl_cluster=rl_cluster, |
| 170 | + reward_fns=mock.Mock(), |
| 171 | + algo_config=algo_config, |
| 172 | + ) |
| 173 | + train_dataset = [{'prompt': ['p1']}] |
| 174 | + with self.assertRaisesRegex( |
| 175 | + ValueError, |
| 176 | + r'compute_logps_micro_batch_size \(2\) must be equal to' |
| 177 | + r' train_micro_batch_size \(1\)', |
| 178 | + ): |
| 179 | + learner.train(train_dataset) |
| 180 | + |
138 | 181 |
|
139 | 182 | if __name__ == "__main__": |
140 | 183 | absltest.main() |
0 commit comments