Skip to content

Commit 118d1a2

Browse files
authored
Participants APIs should check if quorum is started (#95)
1 parent 87290f5 commit 118d1a2

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

torchft/manager.py

+6
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,9 @@ def participating_rank(self) -> Optional[int]:
664664
Returns:
665665
the rank of the current quorum
666666
"""
667+
if self._quorum_future is None:
668+
return None
669+
667670
self.wait_quorum()
668671

669672
return self._participating_rank
@@ -679,6 +682,9 @@ def num_participants(self) -> int:
679682
Returns:
680683
the number of participants in the current quorum
681684
"""
685+
if self._quorum_future is None:
686+
return 0
687+
682688
self.wait_quorum()
683689

684690
assert self._participating_world_size >= 0, "internal error"

torchft/manager_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ def test_quorum_heal_sync(self, client_mock: MagicMock) -> None:
196196
self.assertEqual(manager._quorum_id, -1)
197197
self.assertEqual(manager.current_step(), 0)
198198

199+
self.assertEqual(manager.num_participants(), 0)
200+
self.assertEqual(manager.participating_rank(), None)
201+
199202
manager.start_quorum()
200203
manager.allreduce(torch.tensor([1.0])).wait()
201204
self.assertFalse(manager._healing)

0 commit comments

Comments
 (0)