@@ -12,6 +12,8 @@ use std::time::Duration;
1212use anyhow:: Result ;
1313use tokio:: sync:: broadcast;
1414use tokio:: sync:: Mutex ;
15+ use tokio:: task:: JoinSet ;
16+ use tokio:: time:: sleep;
1517use tonic:: transport:: Server ;
1618use tonic:: { Request , Response , Status } ;
1719
@@ -21,9 +23,9 @@ use crate::torchftpb::lighthouse_service_client::LighthouseServiceClient;
2123use crate :: torchftpb:: manager_service_client:: ManagerServiceClient ;
2224use crate :: torchftpb:: {
2325 manager_service_server:: { ManagerService , ManagerServiceServer } ,
24- CheckpointAddressRequest , CheckpointAddressResponse , LighthouseQuorumRequest ,
25- ManagerQuorumRequest , ManagerQuorumResponse , Quorum , QuorumMember , ShouldCommitRequest ,
26- ShouldCommitResponse ,
26+ CheckpointAddressRequest , CheckpointAddressResponse , LighthouseHeartbeatRequest ,
27+ LighthouseQuorumRequest , ManagerQuorumRequest , ManagerQuorumResponse , Quorum , QuorumMember ,
28+ ShouldCommitRequest , ShouldCommitResponse ,
2729} ;
2830
2931#[ cfg( not( test) ) ]
@@ -99,6 +101,19 @@ impl Manager {
99101 }
100102
101103 pub async fn run ( self : Arc < Self > ) -> Result < ( ) > {
104+ let mut set = JoinSet :: new ( ) ;
105+
106+ set. spawn ( self . clone ( ) . _run_heartbeat ( ) ) ;
107+
108+ set. spawn ( self . clone ( ) . _run_grpc ( ) ) ;
109+
110+ while let Some ( res) = set. join_next ( ) . await {
111+ res??;
112+ }
113+ Ok ( ( ) )
114+ }
115+
116+ async fn _run_grpc ( self : Arc < Self > ) -> Result < ( ) > {
102117 let bind = self . bind . parse ( ) ?;
103118 info ! ( "Manager {} listening on {}" , self . replica_id, bind) ;
104119
@@ -109,6 +124,19 @@ impl Manager {
109124 . map_err ( |e| e. into ( ) )
110125 }
111126
127+ async fn _run_heartbeat ( self : Arc < Self > ) -> Result < ( ) > {
128+ let mut client = self . lighthouse_client_new ( ) . await ?;
129+ loop {
130+ let request = tonic:: Request :: new ( LighthouseHeartbeatRequest {
131+ replica_id : self . replica_id . clone ( ) ,
132+ } ) ;
133+
134+ let response = client. heartbeat ( request) . await ;
135+
136+ sleep ( Duration :: from_millis ( 100 ) ) . await ;
137+ }
138+ }
139+
112140 async fn lighthouse_client_new ( & self ) -> Result < LighthouseServiceClient < Channel > > {
113141 info ! (
114142 "Manager: connecting to lighthouse at {}" ,
@@ -333,17 +361,14 @@ mod tests {
333361 #[ tokio:: test]
334362 async fn test_should_commit ( ) -> Result < ( ) > {
335363 let manager = Manager :: new (
336- "repid " . to_string ( ) ,
364+ "rep_id " . to_string ( ) ,
337365 "lighthouse" . to_string ( ) ,
338366 "addr" . to_string ( ) ,
339367 "0.0.0.0:29531" . to_string ( ) ,
340368 "store_addr" . to_string ( ) ,
341369 2 ,
342370 ) ;
343- println ! ( "manager spawn" ) ;
344- let manager_fut = tokio:: spawn ( manager. run ( ) ) ;
345-
346- println ! ( "should_commit1" ) ;
371+ let manager_fut = tokio:: spawn ( manager. _run_grpc ( ) ) ;
347372
348373 let fut_a = tokio:: spawn ( should_commit ( 0 , true ) ) ;
349374 let fut_b = tokio:: spawn ( should_commit ( 1 , true ) ) ;
@@ -353,8 +378,6 @@ mod tests {
353378 assert ! ( resp_a. should_commit) ;
354379 assert ! ( resp_b. should_commit) ;
355380
356- println ! ( "should_commit2" ) ;
357-
358381 let fut_a = tokio:: spawn ( should_commit ( 0 , true ) ) ;
359382 let fut_b = tokio:: spawn ( should_commit ( 1 , false ) ) ;
360383 let resp_a = fut_a. await ??;
@@ -363,8 +386,6 @@ mod tests {
363386 assert ! ( !resp_a. should_commit) ;
364387 assert ! ( !resp_b. should_commit) ;
365388
366- println ! ( "aborting" ) ;
367-
368389 manager_fut. abort ( ) ;
369390
370391 Ok ( ( ) )
0 commit comments