@@ -72,7 +72,7 @@ int clusterNodeAddReplica(clusterNode *primary, clusterNode *replica);
7272int clusterAddSlot (clusterNode * n , int slot );
7373int clusterDelSlot (int slot );
7474int clusterDelNodeSlots (clusterNode * node );
75- int clusterMoveNodeSlots (clusterNode * from_node , clusterNode * to_node );
75+ void clusterMoveNodeSlots (clusterNode * from_node , clusterNode * to_node , int * slots , int * importing_slots , int * migrating_slots );
7676void clusterNodeSetSlotBit (clusterNode * n , int slot );
7777static void clusterSetPrimary (clusterNode * n , int closeSlots , int full_sync_required );
7878void clusterHandleReplicaFailover (void );
@@ -2783,7 +2783,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
27832783 /* Update importing_slots_from to point to the sender, if it is in the
27842784 * same shard as the previous slot owner */
27852785 if (areInSameShard (sender , server .cluster -> importing_slots_from [j ])) {
2786- serverLog (LL_NOTICE ,
2786+ serverLog (LL_VERBOSE ,
27872787 "Failover occurred in migration source. Update importing "
27882788 "source for slot %d to node %.40s (%s) in shard %.40s." ,
27892789 j , sender -> name , sender -> human_nodename , sender -> shard_id );
@@ -2825,7 +2825,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
28252825 (server .cluster -> migrating_slots_to [j ]-> configEpoch < senderConfigEpoch ||
28262826 nodeIsReplica (server .cluster -> migrating_slots_to [j ])) &&
28272827 areInSameShard (server .cluster -> migrating_slots_to [j ], sender )) {
2828- serverLog (LL_NOTICE ,
2828+ serverLog (LL_VERBOSE ,
28292829 "Failover occurred in migration target."
28302830 " Slot %d is now being migrated to node %.40s (%s) in shard %.40s." ,
28312831 j , sender -> name , sender -> human_nodename , sender -> shard_id );
@@ -3717,17 +3717,35 @@ int clusterProcessPacket(clusterLink *link) {
37173717 /* A failover occurred in the shard where `sender` belongs to and `sender` is
37183718 * no longer a primary. Update slot assignment to `sender_claimed_config_epoch`,
37193719 * which is the new primary in the shard. */
3720- int slots = clusterMoveNodeSlots (sender , sender_claimed_primary );
3720+ int slots = 0 , importing_slots = 0 , migrating_slots = 0 ;
3721+ clusterMoveNodeSlots (sender , sender_claimed_primary ,
3722+ & slots , & importing_slots , & migrating_slots );
37213723 /* `primary` is still a `replica` in this observer node's view;
37223724 * update its role and configEpoch */
37233725 clusterSetNodeAsPrimary (sender_claimed_primary );
37243726 sender_claimed_primary -> configEpoch = sender_claimed_config_epoch ;
3725- serverLog (LL_NOTICE ,
3726- "A failover occurred in shard %.40s; node %.40s (%s) lost %d slot(s) and"
3727- " failed over to node %.40s (%s) with a config epoch of %llu" ,
3728- sender -> shard_id , sender -> name , sender -> human_nodename , slots ,
3729- sender_claimed_primary -> name , sender_claimed_primary -> human_nodename ,
3730- (unsigned long long )sender_claimed_primary -> configEpoch );
3727+ if (slots ) {
3728+ serverLog (LL_NOTICE ,
3729+ "A failover occurred in shard %.40s; node %.40s (%s) lost %d slot(s) and"
3730+ " failed over to node %.40s (%s) with a config epoch of %llu" ,
3731+ sender -> shard_id , sender -> name , sender -> human_nodename , slots ,
3732+ sender_claimed_primary -> name , sender_claimed_primary -> human_nodename ,
3733+ (unsigned long long )sender_claimed_primary -> configEpoch );
3734+ }
3735+ if (importing_slots ) {
3736+ serverLog (LL_NOTICE ,
3737+ "A failover occurred in migration source. Update importing "
3738+ "source of %d slot(s) to node %.40s (%s) in shard %.40s." ,
3739+ importing_slots , sender_claimed_primary -> name ,
3740+ sender_claimed_primary -> human_nodename , sender_claimed_primary -> shard_id );
3741+ }
3742+ if (migrating_slots ) {
3743+ serverLog (LL_NOTICE ,
3744+ "A failover occurred in migration target. Update migrating "
3745+ "target of %d slot(s) to node %.40s (%s) in shard %.40s." ,
3746+ migrating_slots , sender_claimed_primary -> name ,
3747+ sender_claimed_primary -> human_nodename , sender_claimed_primary -> shard_id );
3748+ }
37313749 serverAssert (sender -> numslots == 0 );
37323750 }
37333751 } else {
@@ -3748,7 +3766,7 @@ int clusterProcessPacket(clusterLink *link) {
37483766 sender -> flags |= CLUSTER_NODE_REPLICA ;
37493767
37503768 /* Update config and state. */
3751- clusterDoBeforeSleep (CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE );
3769+ clusterDoBeforeSleep (CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG );
37523770 }
37533771
37543772 /* Primary node changed for this replica? */
@@ -5886,18 +5904,43 @@ int clusterDelNodeSlots(clusterNode *node) {
58865904/* Transfer slots from `from_node` to `to_node`.
58875905 *
58885906 * Iterates over all cluster slots, transferring each slot covered
5889- * by `from_node` to `to_node`. Counts and returns the number of
5890- * slots transferred. */
5891- int clusterMoveNodeSlots (clusterNode * from_node , clusterNode * to_node ) {
5892- int processed = 0 ;
5907+ * by `from_node` to `to_node`. Includes importing slots and migrating
5908+ * slots. This function currently only called after a failover occurs
5909+ * within a shard, i.e. moving slots from the old primary to the new
5910+ * primary. It is a special case of clusterUpdateSlotsConfigWith. */
5911+ void clusterMoveNodeSlots (clusterNode * from_node , clusterNode * to_node , int * slots , int * importing_slots , int * migrating_slots ) {
5912+ serverAssert (areInSameShard (from_node , to_node ));
5913+ int processed = 0 , importing_processed = 0 , migrating_processed = 0 ;
5914+
58935915 for (int j = 0 ; j < CLUSTER_SLOTS ; j ++ ) {
58945916 if (clusterNodeCoversSlot (from_node , j )) {
58955917 clusterDelSlot (j );
58965918 clusterAddSlot (to_node , j );
58975919 processed ++ ;
58985920 }
5921+
5922+ if (server .cluster -> importing_slots_from [j ] == from_node ) {
5923+ serverLog (LL_VERBOSE ,
5924+ "Failover occurred in migration source. Update importing "
5925+ "source for slot %d to node %.40s (%s) in shard %.40s." ,
5926+ j , to_node -> name , to_node -> human_nodename , to_node -> shard_id );
5927+ server .cluster -> importing_slots_from [j ] = to_node ;
5928+ importing_processed ++ ;
5929+ }
5930+
5931+ if (server .cluster -> migrating_slots_to [j ] == from_node ) {
5932+ serverLog (LL_VERBOSE ,
5933+ "Failover occurred in migration target."
5934+ " Slot %d is now being migrated to node %.40s (%s) in shard %.40s." ,
5935+ j , to_node -> name , to_node -> human_nodename , to_node -> shard_id );
5936+ server .cluster -> migrating_slots_to [j ] = to_node ;
5937+ migrating_processed ++ ;
5938+ }
58995939 }
5900- return processed ;
5940+
5941+ if (slots ) * slots = processed ;
5942+ if (importing_slots ) * importing_slots = importing_processed ;
5943+ if (migrating_slots ) * migrating_slots = migrating_processed ;
59015944}
59025945
59035946/* Clear the migrating / importing state for all the slots.
0 commit comments