@@ -200,6 +200,14 @@ handle_event(state_timeout, heartbeat, monitoring, #state{replies = Replies,
200200 % ?TRACE("cannot_renew timeout ~p", [Answer], State#state.id),
201201 {stop , {cannot_renew_registration , {timeout , Answer }}, State }
202202 end ;
203+ handle_event (info , # promise_reply {result = {nack , {Server , not_found }}} = Reply , monitoring ,
204+ # state {id = Id , term = Term , pid = Pid , last_timestamp = Now } = State ) ->
205+ % we are in monitoring phase so we can do repair when term not found on server
206+ NewResult = case pes_promise :await (repair (Server , Id , not_found , Term , {Pid , self (), Now })) of
207+ ack -> ack ;
208+ _ -> nack
209+ end ,
210+ handle_event (info , Reply # promise_reply {result = NewResult }, monitoring , State );
203211handle_event (info , # promise_reply {result = {nack , {Server , OldTerm }}} = Reply , monitoring ,
204212 # state {id = Id , term = Term , pid = Pid , last_timestamp = Now } = State ) ->
205213 % we are in monitoring phase so we can do repair because we surely have the majority,
@@ -244,6 +252,10 @@ handle_event(_EventType, _EventContext, handoff, _State) ->
244252 % queue all the stuff until handoff is not ready
245253 {keep_state_and_data , [postpone ]};
246254
255+ % transfer is only allowed in monitoring or registered state
256+ handle_event ({call , From }, {update , _ }, StateName , _State )
257+ when StateName =/= monitoring andalso StateName =/= registered ->
258+ {keep_state_and_data , [{reply , From , {error , not_in_proper_state }}]};
247259% we need to update the guarded pid
248260% @TODO unfortunately if the registration not succeed that the old reg could not be restored
249261handle_event ({call , From }, {update , NewPid }, _StateName , State ) when node (NewPid ) =:= node () ->
@@ -252,8 +264,7 @@ handle_event({call, From}, {update, NewPid}, _StateName, State) when node(NewPid
252264 % If it goes down and the pid is not matched in the state basically we just ignores it.
253265 erlang :monitor (process , NewPid ),
254266 {next_state , commit , State # state {pid = NewPid , caller = From }};
255- handle_event ({call , From }, {update , NewPid }, StateName ,
256- # state {id = Id , term = Term } = State ) ->
267+ handle_event ({call , From }, {update , NewPid }, StateName , # state {id = Id } = State ) ->
257268 % things gets complicated we need too transfer the guard process to the target node
258269 Now = pes_time :now (),
259270 Nodes = pes_cluster :nodes (),
@@ -263,9 +274,8 @@ handle_event({call, From}, {update, NewPid}, StateName,
263274 ),
264275 TargetNode = node (NewPid ),
265276 {ok , NewGuard } = rpc :call (TargetNode , gen_statem , start , [? MODULE , {handoff , NewState }, []]),
266- CurrentTerm = encapsulate_term (Term ),
267277 NewValue = {NewPid , NewGuard , Now },
268- Promises = [repair (Server , Id , CurrentTerm , NewState # state .term , NewValue ) || Server <- Nodes ],
278+ Promises = [force_repair (Server , Id , NewState # state .term , NewValue ) || Server <- Nodes ],
269279 lists :foreach (fun (Promise ) -> pes_promise :await (Promise , ? DEFAULT_TIMEOUT ) end , Promises ),
270280 ok = gen_statem :call (NewGuard , {handoff_ready , StateName }),
271281 gen_statem :reply (From , registered ),
@@ -437,6 +447,9 @@ commit(Node, Id, Term, Value) ->
437447repair (Node , Id , OldTerm , NewTerm , Value ) ->
438448 pes_server_sup :repair (Node , Id , OldTerm , encapsulate_term (NewTerm ), Value ).
439449
450+ force_repair (Node , Id , NewTerm , Value ) ->
451+ pes_server_sup :force_repair (Node , Id , encapsulate_term (NewTerm ), Value ).
452+
440453encapsulate_term (Term ) ->
441454 {Term , self ()}.
442455
0 commit comments