We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8e4d7e1 commit 76ddc7fCopy full SHA for 76ddc7f
src/nvidia_resiliency_ext/fault_tolerance/_ft_rendezvous.py
@@ -795,7 +795,7 @@ def _add_to_participants(self) -> None:
795
796
# The ranks of the participants will be set once the rendezvous is
797
# complete. If use_infra_group_rank is enabled, store the infrastructure
798
- # rank (SLURM_PROCID or GROUP_RANK) here; otherwise, use placeholder 0.
+ # rank (SLURM_PROCID or GROUP_RANK) here; otherwise, use placeholder -1.
799
if self._settings.use_infra_group_rank:
800
# Try SLURM_PROCID first (set by SLURM), then fall back to GROUP_RANK (set by launcher)
801
infra_rank_str = os.getenv('SLURM_PROCID', os.getenv('GROUP_RANK', '-1'))
0 commit comments