fix expert_parallel_size

khatwanimohit · khatwanimohit · commit e82c781696af · 2026-03-02T13:55:31.000Z
diff --git a/tests/generate/vllm_sampler_test.py b/tests/generate/vllm_sampler_test.py
@@ -395,6 +395,30 @@ def test_expert_parallel_size_plumbed_to_sharding(self):
     self.assertEqual(sampler.args["tensor_parallel_size"], 4)
     self.assertEqual(sampler.args["data_parallel_size"], 1)
 
+  def test_expert_parallel_size_via_engine_kwargs_not_leaked_to_vllm(self):
+    # Regression test: expert_parallel_size passed via engine_kwargs should be
+    # consumed by tunix config processing and translated into
+    # additional_config["sharding"]["sharding_strategy"]["expert_parallelism"].
+    # It must NOT appear as a top-level key in sampler.args, because vLLM's
+    # EngineArgs has no such parameter and would raise an error.
+    mesh = self._make_mock_mesh(8)
+    config = vllm_sampler.VllmConfig(
+        mesh=mesh,
+        init_with_random_weights=False,
+        engine_kwargs={"expert_parallel_size": 2},
+    )
+    sampler = self._make_sampler(config)
+
+    self.assertNotIn(
+        "expert_parallel_size",
+        sampler.args,
+        "expert_parallel_size must not be passed directly to vLLM engine args",
+    )
+    sharding_strategy = sampler.args["additional_config"]["sharding"][
+        "sharding_strategy"
+    ]
+    self.assertEqual(sharding_strategy["expert_parallelism"], 2)
+
   def test_default_expert_parallel_size_is_one(self):
     mesh = self._make_mock_mesh(8)
     config = vllm_sampler.VllmConfig(
diff --git a/tunix/generate/vllm_sampler.py b/tunix/generate/vllm_sampler.py
@@ -202,6 +202,10 @@ def load_checkpoint(self, path_or_weights: str | jaxtyping.PyTree):
   def _vllm_config(self, config: VllmConfig):
     """Setup vllm config from Tunix Vllm config."""
     args = config._processed_engine_kwargs.copy()
+    # expert_parallel_size is a tunix-owned concept translated into
+    # additional_config["sharding"]["sharding_strategy"]["expert_parallelism"].
+    # It is not a vLLM EngineArgs parameter and must not be passed through.
+    args.pop("expert_parallel_size", None)
 
     # Init vLLM model with random weights to speed up bootstrap time, because
     # model weights are synced from trainer later on