@@ -1700,10 +1700,10 @@ def get_args_parser() -> ArgumentParser:
17001700 default = "127.0.0.1" ,
17011701 type = str ,
17021702 action = env ,
1703- help = "Address of the master node (rank 0) that only used for static rendezvous. It should "
1704- "be either the IP address or the hostname of rank 0. For single node multi-proc training "
1705- "the --master-addr can simply be 127.0.0.1; IPv6 should have the pattern "
1706- "`[0:0:0:0:0:0:0:1]`." ,
1703+ help = "Address of the master node (rank 0) that is used for static and c10d rendezvous backends "
1704+ "when rdzv_endpoint is not specified. It should be either the IP address or the hostname of rank 0. "
1705+ "For single node multi-proc training the --master-addr can simply be 127.0.0.1; "
1706+ "IPv6 should have the pattern `[0:0:0:0:0:0:0:1]`." ,
17071707 )
17081708 parser .add_argument (
17091709 "--master-port" ,
@@ -1712,7 +1712,7 @@ def get_args_parser() -> ArgumentParser:
17121712 type = int ,
17131713 action = env ,
17141714 help = "Port on the master node (rank 0) to be used for communication during distributed "
1715- "training. It is only used for static rendezvous." ,
1715+ "training. It is used for static and c10d rendezvous backends when rdzv_endpoint is not specified ." ,
17161716 )
17171717 parser .add_argument (
17181718 "--local-addr" ,
@@ -1974,7 +1974,7 @@ def determine_local_world_size(nproc_per_node: str):
19741974
19751975
19761976def get_rdzv_endpoint (args ):
1977- if args .rdzv_backend == "static" and not args .rdzv_endpoint :
1977+ if ( args .rdzv_backend in [ "static" , "c10d" ]) and not args .rdzv_endpoint :
19781978 return f"{ args .master_addr } :{ args .master_port } " # noqa: E231
19791979 return args .rdzv_endpoint
19801980
@@ -2030,9 +2030,9 @@ def config_from_args(args) -> Tuple[LaunchConfig, Union[Callable, str], List[str
20302030 assert 0 < min_nodes <= max_nodes
20312031 assert args .max_restarts >= 0
20322032
2033- if hasattr (args , "master_addr" ) and args .rdzv_backend != "static" and not args .rdzv_endpoint :
2033+ if hasattr (args , "master_addr" ) and args .rdzv_backend not in [ "static" , "c10d" ] and not args .rdzv_endpoint :
20342034 logger .warning (
2035- "master_addr is only used for static rdzv_backend and when rdzv_endpoint "
2035+ "master_addr is only used for static and c10d rdzv_backend when rdzv_endpoint "
20362036 "is not specified."
20372037 )
20382038
0 commit comments