google
diff --git a/‎examples/deepscaler/run_deepscaler_disagg_v5p16.sh‎
Lines changed: 0 additions & 2 deletions b/‎examples/deepscaler/run_deepscaler_disagg_v5p16.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/deepswe/run_deepswe_disagg_v5p_32.sh‎
Lines changed: 0 additions & 2 deletions b/‎examples/deepswe/run_deepswe_disagg_v5p_32.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/rl/grpo/gsm8k/run_qwen3_8b.sh‎
Lines changed: 6 additions & 3 deletions b/‎examples/rl/grpo/gsm8k/run_qwen3_8b.sh‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎tests/cli/grpo_main_test.py‎
Lines changed: 122 additions & 2 deletions b/‎tests/cli/grpo_main_test.py‎
Lines changed: 122 additions & 2 deletions
@@ -64,8 +64,6 @@ python -m tunix.cli.grpo_main \
   model_config.remat_config=3 \
   actor_model_config.mesh.shape="$trainer_mesh" \
   actor_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.mesh=null \
-  reference_model_config.same_mesh_as="actor" \
   rollout_model_config.mesh.shape="$rollout_mesh" \
   rollout_model_config.mesh.axis_names="('fsdp','tp')" \
   \
 
@@ -81,8 +81,6 @@ python -m tunix.cli.grpo_main \
   model_config.remat_config=3 \
   actor_model_config.mesh.shape="$trainer_mesh" \
   actor_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.mesh=null \
-  reference_model_config.same_mesh_as="actor" \
   rollout_model_config.mesh.shape="$rollout_mesh" \
   rollout_model_config.mesh.axis_names="('fsdp','tp')" \
   \
 
@@ -45,7 +45,11 @@ num_generations="${num_generations:-4}"
 train_mesh="${train_mesh:-(8,1)}"
 rollout_mesh="${rollout_mesh:-(1,8)}"
 
-checkpoint_dir="${checkpoint_dir:-gs://tunix/rl/checkpoints/gsm8k/qwen3/01}"
+# Set rollout_colocate to the mesh name (e.g. "actor") to colocate the rollout
+# model on the same mesh as the actor model
+rollout_colocate="${rollout_colocate:-null}"
+
+checkpoint_dir="${checkpoint_dir-gs://tunix/rl/checkpoints/gsm8k/qwen3/01}"
 checkpoint_suffix="${checkpoint_suffix:-$(printf '%04d' "$((RANDOM % 10000))")}"
 if [[ -n "$checkpoint_dir" && "$checkpoint_dir" != "null" ]]; then
   checkpoint_dir="${checkpoint_dir}_${checkpoint_suffix}"
@@ -79,8 +83,7 @@ python -m tunix.cli.grpo_main \
   model_config.remat_config=3 \
   actor_model_config.mesh.shape="$train_mesh" \
   actor_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.mesh=null \
-  reference_model_config.same_mesh_as="actor" \
+  rollout_model_config.colocate_with="$rollout_colocate" \
   rollout_model_config.mesh.shape="$rollout_mesh" \
   rollout_model_config.mesh.axis_names="('fsdp','tp')" \
   \
 
@@ -644,7 +644,6 @@ def test_cli_empty_system_prompt_stays_empty_string(self):
     )
     self.assertEqual(p.config["agentic_grpo_config"]["system_prompt"], "")
 
-
 class SplitMeshConfigTest(absltest.TestCase):
 
   def test_split_mesh_uses_explicit_role_meshes(self):
@@ -688,7 +687,6 @@ def test_split_mesh_uses_explicit_role_meshes(self):
           "shape": "(2,1)",
           "axis_names": "('fsdp','tp')",
       }
-    pipeline.config["reference_model_config"] = {"same_mesh_as": "actor"}
     rollout_model_config = pipeline.config["rollout_model_config"]
     if isinstance(rollout_model_config, omegaconf.dictconfig.DictConfig):
       rollout_model_config["mesh"] = {
@@ -732,6 +730,128 @@ def __init__(self, devices, axis_names, axis_types=None):
         role_to_mesh[rl_cluster_lib.Role.ACTOR],
     )
 
+  def test_colocate_with_reuses_device_slice_with_different_mesh(self):
+    extra = """
+training_mode: "agentic_grpo"
+data_module: "tunix.cli.recipes.deepscaler_data"
+apply_chat_template_to_dataset: false
+data_config:
+  train_data_path: "gs://fake/train.json"
+  eval_data_path: "gs://fake/eval.parquet"
+prompt_key: "prompts"
+reward_functions: []
+verl_compatible: false
+chat_parser_config:
+  type: "default"
+agent_class_path: null
+agent_kwargs: {}
+env_class_path: null
+env_kwargs: {}
+kubernetes_config: null
+agentic_grpo_config:
+  num_generations: 2
+  num_iterations: 1
+  beta: 0.0
+  epsilon: 0.2
+  epsilon_high: 0.28
+  system_prompt: ""
+  max_concurrency: 1
+  off_policy_steps: 0
+  max_turns: 1
+  context_ratio: 1
+sglang_jax_config:
+  mem_fraction_static: 0.8
+vllm_config:
+  hbm_utilization: 0.4
+"""
+    pipeline = _make_pipeline(extra)
+    actor_model_config = pipeline.config["actor_model_config"]
+    if isinstance(actor_model_config, omegaconf.dictconfig.DictConfig):
+      actor_model_config["mesh"] = {
+          "shape": "(2,1)",
+          "axis_names": "('fsdp','tp')",
+      }
+    rollout_model_config = pipeline.config["rollout_model_config"]
+    if isinstance(rollout_model_config, omegaconf.dictconfig.DictConfig):
+      rollout_model_config["colocate_with"] = "actor"
+      rollout_model_config["mesh"] = {
+          "shape": "(1,2)",
+          "axis_names": "('fsdp','tp')",
+      }
+
+    fake_devices = list(range(4))
+
+    class FakeMesh:
+
+      def __init__(self, devices, axis_names, axis_types=None):
+        self.devices = devices
+        self.axis_names = axis_names
+        self.axis_types = axis_types
+
+    with mock.patch.object(grpo_main.jax, "devices", return_value=fake_devices):
+      with mock.patch.object(
+          grpo_main.jax.sharding, "Mesh", side_effect=FakeMesh
+      ):
+        role_to_mesh = pipeline.create_role_to_mesh()
+
+    self.assertSequenceEqual(
+        role_to_mesh[rl_cluster_lib.Role.ACTOR].devices.flatten().tolist(),
+        [0, 1],
+    )
+    self.assertSequenceEqual(
+        role_to_mesh[rl_cluster_lib.Role.ROLLOUT].devices.flatten().tolist(),
+        [0, 1],
+    )
+    self.assertEqual(
+        role_to_mesh[rl_cluster_lib.Role.ACTOR].devices.shape,
+        (2, 1),
+    )
+    self.assertEqual(
+        role_to_mesh[rl_cluster_lib.Role.ROLLOUT].devices.shape,
+        (1, 2),
+    )
+
+  def test_empty_string_colocate_with_is_treated_as_unset(self):
+    extra = """
+training_mode: "agentic_grpo"
+data_module: "tunix.cli.recipes.deepscaler_data"
+apply_chat_template_to_dataset: false
+data_config:
+  train_data_path: "gs://fake/train.json"
+  eval_data_path: "gs://fake/eval.parquet"
+prompt_key: "prompts"
+reward_functions: []
+verl_compatible: false
+chat_parser_config:
+  type: "default"
+agent_class_path: null
+agent_kwargs: {}
+env_class_path: null
+env_kwargs: {}
+kubernetes_config: null
+agentic_grpo_config:
+  num_generations: 2
+  num_iterations: 1
+  beta: 0.0
+  epsilon: 0.2
+  epsilon_high: 0.28
+  system_prompt: ""
+  max_concurrency: 1
+  off_policy_steps: 0
+  max_turns: 1
+  context_ratio: 1
+sglang_jax_config:
+  mem_fraction_static: 0.8
+vllm_config:
+  hbm_utilization: 0.4
+"""
+    pipeline = _make_pipeline(extra)
+    rollout_model_config = pipeline.config["rollout_model_config"]
+    if isinstance(rollout_model_config, omegaconf.dictconfig.DictConfig):
+      rollout_model_config["colocate_with"] = ""
+
+    self.assertEmpty(pipeline._get_colocate_with_map())
+
 
 if __name__ == "__main__":
   absltest.main()
Original file line number	Diff line number	Diff line change
`@@ -64,8 +64,6 @@ python -m tunix.cli.grpo_main \`
`64`	`64`	`model_config.remat_config=3 \`
`65`	`65`	`actor_model_config.mesh.shape="$trainer_mesh" \`
`66`	`66`	`actor_model_config.mesh.axis_names="('fsdp','tp')" \`
`67`		`- reference_model_config.mesh=null \`
`68`		`- reference_model_config.same_mesh_as="actor" \`
`69`	`67`	`rollout_model_config.mesh.shape="$rollout_mesh" \`
`70`	`68`	`rollout_model_config.mesh.axis_names="('fsdp','tp')" \`
`71`	`69`	`\`
Original file line number	Diff line number	Diff line change
`@@ -81,8 +81,6 @@ python -m tunix.cli.grpo_main \`
`81`	`81`	`model_config.remat_config=3 \`
`82`	`82`	`actor_model_config.mesh.shape="$trainer_mesh" \`
`83`	`83`	`actor_model_config.mesh.axis_names="('fsdp','tp')" \`
`84`		`- reference_model_config.mesh=null \`
`85`		`- reference_model_config.same_mesh_as="actor" \`
`86`	`84`	`rollout_model_config.mesh.shape="$rollout_mesh" \`
`87`	`85`	`rollout_model_config.mesh.axis_names="('fsdp','tp')" \`
`88`	`86`	`\`