[Feature] Tiny fix for wandb run id (THUDM#730)

yitianlian · web-flow · commit 491f252e5d73 · 2025-11-16T11:15:10.000+08:00
diff --git a/slime/ray/placement_group.py b/slime/ray/placement_group.py
@@ -109,32 +109,30 @@ def create_placement_groups(args):
     }
 
 
-def allocate_train_group(args, num_nodes, num_gpus_per_node, pg, wandb_run_id):
+def allocate_train_group(args, num_nodes, num_gpus_per_node, pg):
     return RayTrainGroup(
         args=args,
         num_nodes=num_nodes,
         num_gpus_per_node=num_gpus_per_node,
         pg=pg,
-        wandb_run_id=wandb_run_id,
+        wandb_run_id=args.wandb_run_id,
         num_gpus_per_actor=0.4,
     )
 
 
-def create_training_models(args, pgs, rollout_manager, wandb_run_id):
+def create_training_models(args, pgs, rollout_manager):
     actor_model = allocate_train_group(
         args=args,
         num_nodes=args.actor_num_nodes,
         num_gpus_per_node=args.actor_num_gpus_per_node,
         pg=pgs["actor"],
-        wandb_run_id=wandb_run_id,
     )
     if args.use_critic:
         critic_model = allocate_train_group(
             args=args,
             num_nodes=args.critic_num_nodes,
             num_gpus_per_node=args.critic_num_gpus_per_node,
             pg=pgs["critic"],
-            wandb_run_id=wandb_run_id,
         )
         critic_init_handle = critic_model.async_init(args, role="critic", with_ref=False)
     else:
@@ -159,11 +157,11 @@ def create_training_models(args, pgs, rollout_manager, wandb_run_id):
     return actor_model, critic_model
 
 
-def create_rollout_manager(args, pg, wandb_run_id):
+def create_rollout_manager(args, pg):
     rollout_manager = RolloutManager.options(
         num_cpus=1,
         num_gpus=0,
-    ).remote(args, pg, wandb_run_id=wandb_run_id)
+    ).remote(args, pg, wandb_run_id=args.wandb_run_id)
 
     # calculate num_rollout from num_epoch
     num_rollout_per_epoch = None
diff --git a/slime/utils/wandb_utils.py b/slime/utils/wandb_utils.py
@@ -18,7 +18,8 @@ def _is_offline_mode(args) -> bool:
 
 def init_wandb_primary(args):
     if not args.use_wandb:
-        return None
+        args.wandb_run_id = None
+        return
 
     # Set W&B mode if specified (overrides WANDB_MODE env var)
     if args.wandb_mode:
@@ -71,7 +72,8 @@ def init_wandb_primary(args):
 
     _init_wandb_common()
 
-    return wandb.run.id
+    # Set wandb_run_id in args for easy access throughout the training process
+    args.wandb_run_id = wandb.run.id
 
 
 def _compute_config_for_logging(args):