@@ -421,42 +421,41 @@ reconcile_children(Group, Overall, Delegate) ->
421421 end .
422422
423423do_reconcile_children (Group , Overall , Delegate ) ->
424- case ? SUPERVISOR :which_children (Delegate ) of
425- Children when is_list (Children ) ->
426- Members = rabbit_nodes :list_members (),
427- TotalSize = length (Members ),
428- Reachable = rabbit_nodes :list_reachable (),
429- % % A strict majority of cluster members must be reachable to keep
430- % % children running locally. On an even split (for example 2|2 of
431- % % four nodes) neither side reaches the majority, so both sides stop
432- % % their children; the majority side, once one exists again, reclaims
433- % % them on partition heal. Clusters with fewer than three members
434- % % never stop children this way, since they cannot have a majority.
435- IsMinority = (TotalSize >= 3 ) andalso (length (Reachable ) < (TotalSize div 2 ) + 1 ),
436- case IsMinority of
437- true ->
438- [begin
439- ? LOG_WARNING (" Mirrored supervisor: node is in a minority partition (~tp /~tp nodes reachable), stopping local instance of child ~tp in group ~tp " ,
440- [length (Reachable ), TotalSize , Id , Group ]),
441- catch ? SUPERVISOR :terminate_child (Delegate , Id ),
442- catch ? SUPERVISOR :delete_child (Delegate , Id )
443- end
444- || {Id , Pid , _ , _ } <- Children , is_pid (Pid )];
445- false ->
446- [case rabbit_db_msup :find_mirror (Group , Id ) of
447- {ok , Owner } when Owner =/= Overall ->
448- ? LOG_WARNING (" Mirrored supervisor: child ~tp in group ~tp is owned by another node ~tp (we are ~tp ), stopping local instance" ,
449- [Id , Group , node (Owner ), node ()]),
450- catch ? SUPERVISOR :terminate_child (Delegate , Id ),
451- catch ? SUPERVISOR :delete_child (Delegate , Id );
452- _ ->
453- ok
454- end
455- || {Id , Pid , _ , _ } <- Children , is_pid (Pid )],
456- reclaim_orphans (Group , Overall , Delegate )
457- end ;
458- _ ->
459- ok
424+ Children = ? SUPERVISOR :which_children (Delegate ),
425+ RunningIds = [Id || {Id , Pid , _ , _ } <- Children , is_pid (Pid )],
426+ Members = rabbit_nodes :list_members (),
427+ TotalSize = length (Members ),
428+ Reachable = rabbit_nodes :list_reachable (),
429+ % % A strict majority of cluster members must be reachable to keep children
430+ % % running locally. On an even split (for example 2|2 of four nodes) neither
431+ % % side reaches the majority, so both sides stop their children; the
432+ % % majority side, once one exists again, reclaims them on partition heal.
433+ % % Clusters with fewer than three members never stop children this way,
434+ % % since they cannot have a majority.
435+ IsMinority = (TotalSize >= 3 ) andalso (length (Reachable ) < (TotalSize div 2 ) + 1 ),
436+ case IsMinority of
437+ true ->
438+ lists :foreach (
439+ fun (Id ) ->
440+ ? LOG_WARNING (" Mirrored supervisor: node is in a minority partition (~tp /~tp nodes reachable), stopping local instance of child ~tp in group ~tp " ,
441+ [length (Reachable ), TotalSize , Id , Group ]),
442+ catch ? SUPERVISOR :terminate_child (Delegate , Id ),
443+ catch ? SUPERVISOR :delete_child (Delegate , Id )
444+ end , RunningIds );
445+ false ->
446+ lists :foreach (
447+ fun (Id ) ->
448+ case rabbit_db_msup :find_mirror (Group , Id ) of
449+ {ok , Owner } when Owner =/= Overall ->
450+ ? LOG_WARNING (" Mirrored supervisor: child ~tp in group ~tp is owned by another node ~tp (we are ~tp ), stopping local instance" ,
451+ [Id , Group , node (Owner ), node ()]),
452+ catch ? SUPERVISOR :terminate_child (Delegate , Id ),
453+ catch ? SUPERVISOR :delete_child (Delegate , Id );
454+ _ ->
455+ ok
456+ end
457+ end , RunningIds ),
458+ reclaim_orphans (Group , Overall , Delegate )
460459 end .
461460
462461reclaim_orphans (Group , Overall , Delegate ) ->
@@ -470,26 +469,31 @@ reclaim_orphans(Group, Overall, Delegate) ->
470469 # if_all {conditions = Conditions }),
471470 case rabbit_khepri :get_many (PathPattern ) of
472471 {ok , Map } ->
473- [case S0 of
474- # mirrored_sup_childspec {mirroring_pid = OwnerPid , childspec = ChildSpec , key = {Group , Id }} ->
475- case lists :member (OwnerPid , ActiveMembers ) of
476- false ->
477- ? LOG_NOTICE (" Mirrored supervisor: reclaiming orphan child ~tp in group ~tp (previous owner ~tp was dead/unreachable)" ,
478- [Id , Group , OwnerPid ]),
479- NewS = S0 # mirrored_sup_childspec {mirroring_pid = Overall },
480- case rabbit_khepri :put (Path , NewS ) of
481- ok ->
482- catch ? SUPERVISOR :start_child (Delegate , ChildSpec );
483- _ ->
484- ok
485- end ;
486- true ->
487- ok
488- end ;
489- _ ->
490- ok
491- end
492- || {Path , S0 } <- maps :to_list (Map )];
472+ lists :foreach (
473+ % % get_many already restricts results to this group via
474+ % % the #if_data_matches condition, so the key's group
475+ % % element is always Group here.
476+ fun ({Path , # mirrored_sup_childspec {mirroring_pid = OwnerPid ,
477+ childspec = ChildSpec ,
478+ key = {_ , Id }} = S0 }) ->
479+ case lists :member (OwnerPid , ActiveMembers ) of
480+ false ->
481+ ? LOG_NOTICE (" Mirrored supervisor: reclaiming orphan child ~tp in group ~tp (previous owner ~tp was dead/unreachable)" ,
482+ [Id , Group , OwnerPid ]),
483+ NewS = S0 # mirrored_sup_childspec {mirroring_pid = Overall },
484+ case rabbit_khepri :put (Path , NewS ) of
485+ ok ->
486+ catch ? SUPERVISOR :start_child (Delegate , ChildSpec ),
487+ ok ;
488+ _ ->
489+ ok
490+ end ;
491+ true ->
492+ ok
493+ end ;
494+ (_ ) ->
495+ ok
496+ end , maps :to_list (Map ));
493497 _ ->
494498 ok
495499 end ;
0 commit comments