Update clusterMoveNodeSlots to also move importing slots and migrating slots (valkey-io#2370)

enjoy-binbin · web-flow · commit a481fe228c16 · 2025-07-28T19:15:21.000+08:00
In valkey-io#2301, we added clusterMoveNodeSlots to implement the logic of moving slots from old primary to new primary, when myself receives the replica (old primary) message first and the new primary message later in a shard failover. However due to this, when myself receives the new primary message later next time, there is no way to call clusterUpdateSlotsConfigWith, because we have already updated the slots of the new primary before. This result in, for example, importing slots and migrating slots not being updated, see valkey-io#445. In this commit, we also make clusterMoveNodeSlots to move importing slots and migrating slots. Fixes valkey-io#2363. Signed-off-by: Binbin <binloveplay1314@qq.com>
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -72,7 +72,7 @@ int clusterNodeAddReplica(clusterNode *primary, clusterNode *replica);
 int clusterAddSlot(clusterNode *n, int slot);
 int clusterDelSlot(int slot);
 int clusterDelNodeSlots(clusterNode *node);
-int clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node);
+void clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node, int *slots, int *importing_slots, int *migrating_slots);
 void clusterNodeSetSlotBit(clusterNode *n, int slot);
 static void clusterSetPrimary(clusterNode *n, int closeSlots, int full_sync_required);
 void clusterHandleReplicaFailover(void);
@@ -2783,7 +2783,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
                     /* Update importing_slots_from to point to the sender, if it is in the
                      * same shard as the previous slot owner */
                     if (areInSameShard(sender, server.cluster->importing_slots_from[j])) {
-                        serverLog(LL_NOTICE,
+                        serverLog(LL_VERBOSE,
                                   "Failover occurred in migration source. Update importing "
                                   "source for slot %d to node %.40s (%s) in shard %.40s.",
                                   j, sender->name, sender->human_nodename, sender->shard_id);
@@ -2825,7 +2825,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
                 (server.cluster->migrating_slots_to[j]->configEpoch < senderConfigEpoch ||
                  nodeIsReplica(server.cluster->migrating_slots_to[j])) &&
                 areInSameShard(server.cluster->migrating_slots_to[j], sender)) {
-                serverLog(LL_NOTICE,
+                serverLog(LL_VERBOSE,
                           "Failover occurred in migration target."
                           " Slot %d is now being migrated to node %.40s (%s) in shard %.40s.",
                           j, sender->name, sender->human_nodename, sender->shard_id);
@@ -3717,17 +3717,35 @@ int clusterProcessPacket(clusterLink *link) {
                             /* A failover occurred in the shard where `sender` belongs to and `sender` is
                              * no longer a primary. Update slot assignment to `sender_claimed_config_epoch`,
                              * which is the new primary in the shard. */
-                            int slots = clusterMoveNodeSlots(sender, sender_claimed_primary);
+                            int slots = 0, importing_slots = 0, migrating_slots = 0;
+                            clusterMoveNodeSlots(sender, sender_claimed_primary,
+                                                 &slots, &importing_slots, &migrating_slots);
                             /* `primary` is still a `replica` in this observer node's view;
                              * update its role and configEpoch */
                             clusterSetNodeAsPrimary(sender_claimed_primary);
                             sender_claimed_primary->configEpoch = sender_claimed_config_epoch;
-                            serverLog(LL_NOTICE,
-                                      "A failover occurred in shard %.40s; node %.40s (%s) lost %d slot(s) and"
-                                      " failed over to node %.40s (%s) with a config epoch of %llu",
-                                      sender->shard_id, sender->name, sender->human_nodename, slots,
-                                      sender_claimed_primary->name, sender_claimed_primary->human_nodename,
-                                      (unsigned long long)sender_claimed_primary->configEpoch);
+                            if (slots) {
+                                serverLog(LL_NOTICE,
+                                          "A failover occurred in shard %.40s; node %.40s (%s) lost %d slot(s) and"
+                                          " failed over to node %.40s (%s) with a config epoch of %llu",
+                                          sender->shard_id, sender->name, sender->human_nodename, slots,
+                                          sender_claimed_primary->name, sender_claimed_primary->human_nodename,
+                                          (unsigned long long)sender_claimed_primary->configEpoch);
+                            }
+                            if (importing_slots) {
+                                serverLog(LL_NOTICE,
+                                          "A failover occurred in migration source. Update importing "
+                                          "source of %d slot(s) to node %.40s (%s) in shard %.40s.",
+                                          importing_slots, sender_claimed_primary->name,
+                                          sender_claimed_primary->human_nodename, sender_claimed_primary->shard_id);
+                            }
+                            if (migrating_slots) {
+                                serverLog(LL_NOTICE,
+                                          "A failover occurred in migration target. Update migrating "
+                                          "target of %d slot(s) to node %.40s (%s) in shard %.40s.",
+                                          migrating_slots, sender_claimed_primary->name,
+                                          sender_claimed_primary->human_nodename, sender_claimed_primary->shard_id);
+                            }
                             serverAssert(sender->numslots == 0);
                         }
                     } else {
@@ -3748,7 +3766,7 @@ int clusterProcessPacket(clusterLink *link) {
                     sender->flags |= CLUSTER_NODE_REPLICA;
 
                     /* Update config and state. */
-                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE);
+                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG);
                 }
 
                 /* Primary node changed for this replica? */
@@ -5886,18 +5904,43 @@ int clusterDelNodeSlots(clusterNode *node) {
 /* Transfer slots from `from_node` to `to_node`.
  *
  * Iterates over all cluster slots, transferring each slot covered
- * by `from_node` to `to_node`. Counts and returns the number of
- * slots transferred. */
-int clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node) {
-    int processed = 0;
+ * by `from_node` to `to_node`. Includes importing slots and migrating
+ * slots. This function currently only called after a failover occurs
+ * within a shard, i.e. moving slots from the old primary to the new
+ * primary. It is a special case of clusterUpdateSlotsConfigWith. */
+void clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node, int *slots, int *importing_slots, int *migrating_slots) {
+    serverAssert(areInSameShard(from_node, to_node));
+    int processed = 0, importing_processed = 0, migrating_processed = 0;
+
     for (int j = 0; j < CLUSTER_SLOTS; j++) {
         if (clusterNodeCoversSlot(from_node, j)) {
             clusterDelSlot(j);
             clusterAddSlot(to_node, j);
             processed++;
         }
+
+        if (server.cluster->importing_slots_from[j] == from_node) {
+            serverLog(LL_VERBOSE,
+                      "Failover occurred in migration source. Update importing "
+                      "source for slot %d to node %.40s (%s) in shard %.40s.",
+                      j, to_node->name, to_node->human_nodename, to_node->shard_id);
+            server.cluster->importing_slots_from[j] = to_node;
+            importing_processed++;
+        }
+
+        if (server.cluster->migrating_slots_to[j] == from_node) {
+            serverLog(LL_VERBOSE,
+                      "Failover occurred in migration target."
+                      " Slot %d is now being migrated to node %.40s (%s) in shard %.40s.",
+                      j, to_node->name, to_node->human_nodename, to_node->shard_id);
+            server.cluster->migrating_slots_to[j] = to_node;
+            migrating_processed++;
+        }
     }
-    return processed;
+
+    if (slots) *slots = processed;
+    if (importing_slots) *importing_slots = importing_processed;
+    if (migrating_slots) *migrating_slots = migrating_processed;
 }
 
 /* Clear the migrating / importing state for all the slots.
diff --git a/tests/support/cluster_util.tcl b/tests/support/cluster_util.tcl
@@ -433,3 +433,21 @@ proc check_cluster_node_mark {flag ref_node_index instance_id_to_check} {
 proc get_slot_field {slot_output shard_id node_id attrib_id} {
     return [lindex [lindex [lindex $slot_output $shard_id] $node_id] $attrib_id]
 }
+
+proc get_open_slots {srv_idx} {
+    set slots [dict get [cluster_get_myself $srv_idx] slots]
+    if {[regexp {\[.*} $slots slots]} {
+        set slots [regsub -all {[{}]} $slots ""]
+        return $slots
+    } else {
+        return {}
+    }
+}
+
+proc wait_for_slot_state {srv_idx pattern} {
+    wait_for_condition 100 100 {
+        [get_open_slots $srv_idx] eq $pattern
+    } else {
+        fail "incorrect slot state on R $srv_idx: expected $pattern; got [get_open_slots $srv_idx]"
+    }
+}
diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl
@@ -422,6 +422,16 @@ start_cluster 3 1 {tags {external:skip cluster}} {
         set R3_shardid [R 3 cluster myshardid]
         assert_equal $R0_shardid $R3_shardid
 
+        # We also take this opportunity to verify slot migration.
+        # Move slot 0 from R0 to R1. Move slot 5462 from R1 to R0.
+        R 0 cluster setslot 0 migrating $R1_nodeid
+        R 1 cluster setslot 0 importing $R0_nodeid
+        R 1 cluster setslot 5462 migrating $R0_nodeid
+        R 0 cluster setslot 5462 importing $R1_nodeid
+        assert_equal [get_open_slots 0] "\[0->-$R1_nodeid\] \[5462-<-$R1_nodeid\]"
+        assert_equal [get_open_slots 1] "\[0-<-$R0_nodeid\] \[5462->-$R0_nodeid\]"
+        wait_for_slot_state 3 "\[0->-$R1_nodeid\] \[5462-<-$R1_nodeid\]"
+
         # Ensure that related nodes do not reconnect.
         R 1 debug disable-cluster-reconnection 1
         R 2 debug disable-cluster-reconnection 1
@@ -458,13 +468,33 @@ start_cluster 3 1 {tags {external:skip cluster}} {
         assert_equal {0-5461} [dict get [cluster_get_node_by_id 1 $R3_nodeid] slots]
         assert_equal {0-5461} [dict get [cluster_get_node_by_id 2 $R3_nodeid] slots]
 
+        # Check that in the R1 perspective, both migration-source and migration-target
+        # have moved from R0 to R1.
+        assert_equal [get_open_slots 0] "\[0->-$R1_nodeid\] \[5462-<-$R1_nodeid\]"
+        assert_equal [get_open_slots 1] "\[0-<-$R3_nodeid\] \[5462->-$R3_nodeid\]"
+        assert_equal [get_open_slots 3] "\[0->-$R1_nodeid\] \[5462-<-$R1_nodeid\]"
+
         # A failover occurred in shard, we will only go to this code branch,
         # verify we print the logs.
+
+        # Both importing slots and migrating slots are move to R3.
+        set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid () in shard $R3_shardid*"
+        verify_log_message -1 $pattern $loglines1
+        set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid () in shard $R3_shardid*"
+        verify_log_message -1 $pattern $loglines1
+
+        # Both slots are move to R3.
         set R0_slots 5462
         set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid () lost $R0_slots slot(s) and failed over to node $R3_nodeid*"
         verify_log_message -1 $pattern $loglines1
         verify_log_message -2 $pattern $loglines2
 
+        # Both importing slots and migrating slots are move to R3.
+        set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
+        verify_log_message -1 $pattern $loglines1
+        set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
+        verify_log_message -1 $pattern $loglines1
+
         R 1 debug disable-cluster-reconnection 0
         R 2 debug disable-cluster-reconnection 0
         R 3 debug disable-cluster-reconnection 0
diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl
@@ -1,13 +1,3 @@
-proc get_open_slots {srv_idx} {
-    set slots [dict get [cluster_get_myself $srv_idx] slots]
-    if {[regexp {\[.*} $slots slots]} {
-        set slots [regsub -all {[{}]} $slots ""]
-        return $slots
-    } else {
-        return {}
-    }
-}
-
 proc get_cluster_role {srv_idx} {
     set flags [dict get [cluster_get_myself $srv_idx] flags]
     set role [lindex $flags 1]
@@ -80,14 +70,6 @@ proc wait_for_role {srv_idx role} {
     wait_for_cluster_propagation
 }
 
-proc wait_for_slot_state {srv_idx pattern} {
-    wait_for_condition 100 100 {
-        [get_open_slots $srv_idx] eq $pattern
-    } else {
-        fail "incorrect slot state on R $srv_idx: expected $pattern; got [get_open_slots $srv_idx]"
-    }
-}
-
 # restart a server and wait for it to come back online
 proc fail_server {server_id} {
     set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]