1
- import torch . distributed as dist
1
+ import time
2
2
from unittest import TestCase
3
- from torchft .torchft import Lighthouse
3
+
4
+ import torch .distributed as dist
5
+
4
6
from torchft import Manager , ProcessGroupGloo
5
- import time
7
+ from torchft .torchft import Lighthouse
8
+
6
9
7
10
class TestLighthouse (TestCase ):
8
11
def test_join_timeout_behavior (self ) -> None :
@@ -14,7 +17,7 @@ def test_join_timeout_behavior(self) -> None:
14
17
min_replicas = 1 ,
15
18
join_timeout_ms = 100 ,
16
19
)
17
-
20
+
18
21
# Create a manager that tries to join
19
22
try :
20
23
store = dist .TCPStore (
@@ -37,7 +40,7 @@ def test_join_timeout_behavior(self) -> None:
37
40
use_async_quorum = False ,
38
41
lighthouse_addr = lighthouse .address (),
39
42
)
40
-
43
+
41
44
start_time = time .time ()
42
45
manager .start_quorum ()
43
46
time_taken = time .time () - start_time
@@ -46,15 +49,15 @@ def test_join_timeout_behavior(self) -> None:
46
49
finally :
47
50
# Cleanup
48
51
lighthouse .shutdown ()
49
- if ' manager' in locals ():
52
+ if " manager" in locals ():
50
53
manager .shutdown ()
51
-
54
+
52
55
lighthouse = Lighthouse (
53
56
bind = "[::]:0" ,
54
57
min_replicas = 1 ,
55
58
join_timeout_ms = 400 ,
56
59
)
57
-
60
+
58
61
# Create a manager that tries to join
59
62
try :
60
63
store = dist .TCPStore (
@@ -77,7 +80,7 @@ def test_join_timeout_behavior(self) -> None:
77
80
use_async_quorum = False ,
78
81
lighthouse_addr = lighthouse .address (),
79
82
)
80
-
83
+
81
84
start_time = time .time ()
82
85
manager .start_quorum ()
83
86
time_taken = time .time () - start_time
@@ -86,6 +89,5 @@ def test_join_timeout_behavior(self) -> None:
86
89
finally :
87
90
# Cleanup
88
91
lighthouse .shutdown ()
89
- if ' manager' in locals ():
92
+ if " manager" in locals ():
90
93
manager .shutdown ()
91
-
0 commit comments