@@ -154,8 +154,8 @@ def _build_launcher_parts( # noqa: C901
154154 ) -> list [str ]:
155155 fields_set = args .model_fields_set
156156 force_fields = {
157- "model_name " ,
158- "model_size " ,
157+ "model_family_name " ,
158+ "model_recipe_name " ,
159159 "num_gpus" ,
160160 "gpus_per_node" ,
161161 "hf_token" ,
@@ -198,6 +198,15 @@ def add(flag: str, value: Any) -> None:
198198 return
199199 if isinstance (value , bool ):
200200 parts .extend ([flag , "true" if value else "false" ])
201+ elif isinstance (value , (list , tuple )):
202+ if not value :
203+ return
204+ if flag == "--dataset_paths" :
205+ parts .extend ([flag , * [str (x ) for x in value ]])
206+ elif flag == "--profiling_ranks" :
207+ parts .extend ([flag , "," .join (str (x ) for x in value )])
208+ else :
209+ parts .extend ([flag , str (value [0 ]) if len (value ) == 1 else "," .join (str (x ) for x in value )])
201210 else :
202211 sv = str (value )
203212 if sv != "" :
@@ -222,31 +231,38 @@ def add_field(field: str, flag: str, value: Any) -> None:
222231 add_field ("hf_token" , "-hf" , args .hf_token )
223232 add_field ("nemo_home" , "-nh" , args .nemo_home )
224233 add_field ("wandb_key" , "-wdk" , args .wandb_key )
225- add_field ("wandb_prj_name" , "-wdp" , args .wandb_prj_name )
226- add_field ("wandb_exp_name" , "-wdj" , args .wandb_exp_name )
234+ add_field ("wandb_project_name" , "-wdp" , args .wandb_project_name )
235+ add_field ("wandb_entity_name" , "-wde" , args .wandb_entity_name )
236+ add_field ("wandb_experiment_name" , "-wdj" , args .wandb_experiment_name )
237+ add_field ("wandb_save_dir" , "-wds" , args .wandb_save_dir )
238+ add_field ("max_retries" , "--max_retries" , args .max_retries )
227239 if args .dryrun and "dryrun" in fields_set :
228240 parts .append ("-d" )
229241 add_field ("num_gpus" , "-ng" , args .num_gpus )
230242 add_field ("gpus_per_node" , "-gn" , args .gpus_per_node )
231243 if mounts :
232244 add ("-cm" , "," .join (mounts ))
233245
234- # Model flags (Megatron-Bridge r0.2.0 API)
246+ # Model flags (Megatron-Bridge main-branch API)
247+ if args .use_recipes and "use_recipes" in fields_set :
248+ parts .append ("--use_recipes" )
235249 if "enable_vboost" in fields_set :
236250 add_field ("enable_vboost" , "-vb" , bool (args .enable_vboost ))
237- if not args .model_name :
238- raise RuntimeError ("Missing required cmd_args.model_name (maps to -m/--model_name )." )
239- if not args .model_size :
240- raise RuntimeError ("Missing required cmd_args.model_size (maps to -s /--model_size )." )
241- add_field ("model_name " , "-m" , args .model_name )
242- add_field ("model_size " , "-s " , args .model_size )
251+ if not args .model_family_name :
252+ raise RuntimeError ("Missing required cmd_args.model_family_name (maps to -m/--model_family_name )." )
253+ if not args .model_recipe_name :
254+ raise RuntimeError ("Missing required cmd_args.model_recipe_name (maps to -mr /--model_recipe_name )." )
255+ add_field ("model_family_name " , "-m" , args .model_family_name )
256+ add_field ("model_recipe_name " , "-mr " , args .model_recipe_name )
243257 if args .enable_nsys and "enable_nsys" in fields_set :
244258 parts .append ("-en" )
245259 add_field ("domain" , "--domain" , args .domain )
246260 if "use_tokendrop" in fields_set and args .use_tokendrop is not None :
247261 add_field ("use_tokendrop" , "--use_tokendrop" , bool (args .use_tokendrop ))
248262 if "use_megatron_fsdp" in fields_set and args .use_megatron_fsdp is not None :
249263 add_field ("use_megatron_fsdp" , "--use_megatron_fsdp" , bool (args .use_megatron_fsdp ))
264+ if "nccl_ub" in fields_set and args .nccl_ub is not None :
265+ add_field ("nccl_ub" , "--nccl_ub" , bool (args .nccl_ub ))
250266 add_field ("cuda_graph_impl" , "--cuda_graph_impl" , args .cuda_graph_impl )
251267 if args .cuda_graph_scope and "cuda_graph_scope" in fields_set :
252268 add_field (
@@ -264,6 +280,7 @@ def add_field(field: str, flag: str, value: Any) -> None:
264280 # Batch
265281 add_field ("mb" , "-mb" , args .mb )
266282 add_field ("gb" , "-gb" , args .gb )
283+ add_field ("seq_length" , "-sl" , args .seq_length )
267284
268285 # Misc
269286 if "moe_a2a_overlap" in fields_set :
@@ -273,11 +290,44 @@ def add_field(field: str, flag: str, value: Any) -> None:
273290 add_field ("activation_offload_layers" , "-ol" , args .activation_offload_layers )
274291 if args .recompute_modules and "recompute_modules" in fields_set :
275292 parts .extend (["--recompute_modules" , self ._normalize_recompute_modules (args .recompute_modules )])
276- # r0.2.0 supports `--detach` / `--no-detach` flags (no boolean value)
277- if args .detach is True and "detach" in fields_set :
278- parts .append ("--detach" )
279- elif args .detach is False and "detach" in fields_set :
280- parts .append ("--no-detach" )
293+ if "detach" in fields_set and args .detach is not None :
294+ parts .extend (["--detach" , "true" if args .detach else "false" ])
295+
296+ # Optimizer
297+ add_field ("lr" , "--lr" , args .lr )
298+ add_field ("min_lr" , "--min_lr" , args .min_lr )
299+ add_field ("warmup_iters" , "--warmup_iters" , args .warmup_iters )
300+
301+ # Checkpointing
302+ add_field ("pretrained_checkpoint" , "--pretrained_checkpoint" , args .pretrained_checkpoint )
303+ add_field ("save_dir" , "--save_dir" , args .save_dir )
304+ add_field ("load_dir" , "--load_dir" , args .load_dir )
305+ add_field ("save_interval" , "--save_interval" , args .save_interval )
306+ add_field ("most_recent_k" , "--most_recent_k" , args .most_recent_k )
307+ add_field ("save_config_filepath" , "--save_config_filepath" , args .save_config_filepath )
308+
309+ # Data / Tokenizer
310+ add_field ("data" , "--data" , args .data )
311+ add_field ("dataset_paths" , "--dataset_paths" , args .dataset_paths )
312+ add_field ("dataset_root" , "--dataset_root" , args .dataset_root )
313+ add_field ("index_mapping_dir" , "--index_mapping_dir" , args .index_mapping_dir )
314+ add_field ("dataset_name" , "--dataset_name" , args .dataset_name )
315+ if args .packed_sequence and "packed_sequence" in fields_set :
316+ parts .append ("--packed_sequence" )
317+ if args .head_only and "head_only" in fields_set :
318+ parts .append ("--head_only" )
319+ add_field ("tokenizer_type" , "--tokenizer_type" , args .tokenizer_type )
320+ add_field ("tokenizer_model" , "--tokenizer_model" , args .tokenizer_model )
321+ add_field ("vocab_size" , "--vocab_size" , args .vocab_size )
322+
323+ # Profiling (performance group)
324+ add_field ("pytorch_profiler" , "-pyp" , args .pytorch_profiler )
325+ add_field ("profiling_start_step" , "--profiling_start_step" , args .profiling_start_step )
326+ add_field ("profiling_stop_step" , "--profiling_stop_step" , args .profiling_stop_step )
327+ add_field ("record_memory_history" , "-mh" , args .record_memory_history )
328+ if args .profiling_gpu_metrics and "profiling_gpu_metrics" in fields_set :
329+ parts .append ("--profiling_gpu_metrics" )
330+ add_field ("profiling_ranks" , "--profiling_ranks" , args .profiling_ranks )
281331
282332 # Extra user args (dict -> string)
283333 if tdef .extra_cmd_args :
0 commit comments