@@ -327,7 +327,7 @@ def _invoke_run_with_any_failed_policy(self, role: str = DEFAULT_ROLE) -> RunRes
327327 # Record failure detection event
328328 record_profiling_event (
329329 ProfilingEvent .FAILURE_DETECTED ,
330- node_id = self ._node_id ,
330+ node_id = self ._rdzv_handler . _this_node ,
331331 rank = self ._worker_group .group_rank ,
332332 )
333333
@@ -359,7 +359,7 @@ def _invoke_run_with_any_failed_policy(self, role: str = DEFAULT_ROLE) -> RunRes
359359 # Record failure detection event
360360 record_profiling_event (
361361 ProfilingEvent .FAILURE_DETECTED ,
362- node_id = self ._node_id ,
362+ node_id = self ._rdzv_handler . _this_node ,
363363 rank = self ._worker_group .group_rank ,
364364 )
365365
@@ -606,7 +606,7 @@ async def send_close_msg():
606606 # Record worker termination event after shutdown is complete
607607 record_profiling_event (
608608 ProfilingEvent .WORKER_TERMINATED ,
609- node_id = self ._node_id ,
609+ node_id = self ._rdzv_handler . _this_node ,
610610 rank = worker_group .group_rank ,
611611 )
612612
@@ -622,7 +622,7 @@ def _start_workers(self, worker_group: WorkerGroup) -> Dict[int, Any]:
622622 # Record worker start start event
623623 record_profiling_event (
624624 ProfilingEvent .WORKER_START_STARTED ,
625- node_id = self ._node_id ,
625+ node_id = self ._rdzv_handler . _this_node ,
626626 rank = worker_group .group_rank ,
627627 )
628628
@@ -700,7 +700,7 @@ def _start_workers(self, worker_group: WorkerGroup) -> Dict[int, Any]:
700700 # Record worker start completion event
701701 record_profiling_event (
702702 ProfilingEvent .WORKER_START_COMPLETED ,
703- node_id = self ._node_id ,
703+ node_id = self ._rdzv_handler . _this_node ,
704704 rank = worker_group .group_rank ,
705705 )
706706
0 commit comments