@@ -27,6 +27,7 @@ use tokio::sync::Mutex;
27
27
use tokio:: task:: JoinSet ;
28
28
use tokio:: time:: sleep;
29
29
use tonic:: service:: Routes ;
30
+ use tonic:: transport:: server:: TcpIncoming ;
30
31
use tonic:: transport:: Server ;
31
32
use tonic:: { Request , Response , Status } ;
32
33
@@ -56,23 +57,25 @@ struct State {
56
57
pub struct Lighthouse {
57
58
state : Mutex < State > ,
58
59
opt : LighthouseOpt ,
60
+ listener : Mutex < Option < tokio:: net:: TcpListener > > ,
61
+ local_addr : SocketAddr ,
59
62
}
60
63
61
64
#[ derive( StructOpt , Debug ) ]
62
65
#[ structopt( ) ]
63
66
pub struct LighthouseOpt {
64
67
// bind is the address to bind the server to.
65
68
#[ structopt( long = "bind" , default_value = "[::]:29510" ) ]
66
- bind : String ,
69
+ pub bind : String ,
67
70
68
71
#[ structopt( long = "join_timeout_ms" , default_value = "60000" ) ]
69
- join_timeout_ms : u64 ,
72
+ pub join_timeout_ms : u64 ,
70
73
71
74
#[ structopt( long = "min_replicas" ) ]
72
- min_replicas : u64 ,
75
+ pub min_replicas : u64 ,
73
76
74
77
#[ structopt( long = "quorum_tick_ms" , default_value = "100" ) ]
75
- quorum_tick_ms : u64 ,
78
+ pub quorum_tick_ms : u64 ,
76
79
}
77
80
78
81
fn quorum_changed ( a : & Vec < QuorumMember > , b : & Vec < QuorumMember > ) -> bool {
@@ -83,9 +86,10 @@ fn quorum_changed(a: &Vec<QuorumMember>, b: &Vec<QuorumMember>) -> bool {
83
86
}
84
87
85
88
impl Lighthouse {
86
- pub fn new ( opt : LighthouseOpt ) -> Arc < Self > {
89
+ pub async fn new ( opt : LighthouseOpt ) -> Result < Arc < Self > > {
87
90
let ( tx, _) = broadcast:: channel ( 16 ) ;
88
- Arc :: new ( Self {
91
+ let listener = tokio:: net:: TcpListener :: bind ( & opt. bind ) . await ?;
92
+ Ok ( Arc :: new ( Self {
89
93
state : Mutex :: new ( State {
90
94
participants : HashMap :: new ( ) ,
91
95
channel : tx,
@@ -94,7 +98,9 @@ impl Lighthouse {
94
98
heartbeats : HashMap :: new ( ) ,
95
99
} ) ,
96
100
opt : opt,
97
- } )
101
+ local_addr : listener. local_addr ( ) ?,
102
+ listener : Mutex :: new ( Some ( listener) ) ,
103
+ } ) )
98
104
}
99
105
100
106
// Checks whether the quorum is valid and an explanation for the state.
@@ -209,13 +215,20 @@ impl Lighthouse {
209
215
}
210
216
}
211
217
212
- async fn _run_grpc ( self : Arc < Self > ) -> Result < ( ) > {
213
- let bind: SocketAddr = self . opt . bind . parse ( ) ?;
214
- info ! (
215
- "Lighthouse listening on: http://{}:{}" ,
218
+ pub fn address ( & self ) -> String {
219
+ format ! (
220
+ "http://{}:{}" ,
216
221
gethostname( ) . into_string( ) . unwrap( ) ,
217
- bind. port( )
218
- ) ;
222
+ self . local_addr. port( )
223
+ )
224
+ }
225
+
226
+ async fn _run_grpc ( self : Arc < Self > ) -> Result < ( ) > {
227
+ info ! ( "Lighthouse listening on: {}" , self . address( ) ) ;
228
+
229
+ let listener = self . listener . lock ( ) . await . take ( ) . unwrap ( ) ;
230
+ let incoming =
231
+ TcpIncoming :: from_listener ( listener, true , None ) . map_err ( |e| anyhow:: anyhow!( e) ) ?;
219
232
220
233
// Setup HTTP endpoints
221
234
let app = Router :: new ( )
@@ -245,7 +258,7 @@ impl Lighthouse {
245
258
// allow non-GRPC connections
246
259
. accept_http1 ( true )
247
260
. add_routes ( routes)
248
- . serve ( bind )
261
+ . serve_with_incoming ( incoming )
249
262
. await
250
263
. map_err ( |e| e. into ( ) )
251
264
}
@@ -429,14 +442,14 @@ mod tests {
429
442
430
443
use crate :: torchftpb:: lighthouse_service_client:: LighthouseServiceClient ;
431
444
432
- fn lighthouse_test_new ( ) -> Arc < Lighthouse > {
445
+ async fn lighthouse_test_new ( ) -> Result < Arc < Lighthouse > > {
433
446
let opt = LighthouseOpt {
434
447
min_replicas : 1 ,
435
- bind : "0.0.0.0:29510 " . to_string ( ) ,
448
+ bind : "[::]:0 " . to_string ( ) ,
436
449
join_timeout_ms : 60 * 60 * 1000 , // 1hr
437
450
quorum_tick_ms : 10 ,
438
451
} ;
439
- Lighthouse :: new ( opt)
452
+ Lighthouse :: new ( opt) . await
440
453
}
441
454
442
455
async fn lighthouse_client_new ( addr : String ) -> Result < LighthouseServiceClient < Channel > > {
@@ -448,8 +461,8 @@ mod tests {
448
461
}
449
462
450
463
#[ tokio:: test]
451
- async fn test_quorum_join_timeout ( ) {
452
- let lighthouse = lighthouse_test_new ( ) ;
464
+ async fn test_quorum_join_timeout ( ) -> Result < ( ) > {
465
+ let lighthouse = lighthouse_test_new ( ) . await ? ;
453
466
assert ! ( !lighthouse. quorum_valid( ) . await . 0 ) ;
454
467
455
468
{
@@ -478,11 +491,13 @@ mod tests {
478
491
}
479
492
480
493
assert ! ( lighthouse. quorum_valid( ) . await . 0 ) ;
494
+
495
+ Ok ( ( ) )
481
496
}
482
497
483
498
#[ tokio:: test]
484
- async fn test_quorum_fast_prev_quorum ( ) {
485
- let lighthouse = lighthouse_test_new ( ) ;
499
+ async fn test_quorum_fast_prev_quorum ( ) -> Result < ( ) > {
500
+ let lighthouse = lighthouse_test_new ( ) . await ? ;
486
501
assert ! ( !lighthouse. quorum_valid( ) . await . 0 ) ;
487
502
488
503
{
@@ -520,23 +535,23 @@ mod tests {
520
535
}
521
536
522
537
assert ! ( lighthouse. quorum_valid( ) . await . 0 ) ;
538
+
539
+ Ok ( ( ) )
523
540
}
524
541
525
542
#[ tokio:: test]
526
- async fn test_lighthouse_e2e ( ) {
543
+ async fn test_lighthouse_e2e ( ) -> Result < ( ) > {
527
544
let opt = LighthouseOpt {
528
545
min_replicas : 1 ,
529
- bind : "0.0.0.0:29510 " . to_string ( ) ,
546
+ bind : "[::]:0 " . to_string ( ) ,
530
547
join_timeout_ms : 1 ,
531
548
quorum_tick_ms : 10 ,
532
549
} ;
533
- let lighthouse = Lighthouse :: new ( opt) ;
550
+ let lighthouse = Lighthouse :: new ( opt) . await ? ;
534
551
535
552
let lighthouse_task = tokio:: spawn ( lighthouse. clone ( ) . run ( ) ) ;
536
553
537
- let mut client = lighthouse_client_new ( "http://localhost:29510" . to_string ( ) )
538
- . await
539
- . unwrap ( ) ;
554
+ let mut client = lighthouse_client_new ( lighthouse. address ( ) ) . await . unwrap ( ) ;
540
555
541
556
{
542
557
let request = tonic:: Request :: new ( LighthouseHeartbeatRequest {
@@ -563,6 +578,7 @@ mod tests {
563
578
}
564
579
565
580
lighthouse_task. abort ( ) ;
581
+ Ok ( ( ) )
566
582
}
567
583
568
584
#[ tokio:: test]
0 commit comments