@@ -583,3 +583,196 @@ start_cluster 3 2 {tags {external:skip cluster}} {
583583 assert_equal [dict get [cluster_get_node_by_id 4 $R4_nodeid ] slaveof] $R3_nodeid
584584 }
585585}
586+
587+ start_cluster 3 2 {tags {external:skip cluster}} {
588+ # This test consists of two phases.
589+ # The first phase, we will create a scenario where two primary are on the same shard. See #2279 for more details.
590+ # The second phase, we will test the behavior of the node when packets arrive out of order. See #2301 for more details.
591+ #
592+ # The first phase.
593+ # In the R0/R3/R4 shard, R0 is the primary (cluster-allow-replica-migration no), R3 is the replica, R4 will be a replica later.
594+ # 1. R0 goes down, and R3 trigger a failover and become the new primary.
595+ # 2. R0 (old primary) continues to be down while another R4 is added as a replica of R3 (new primary).
596+ # 3. R3 (new primary) goes down, and R4 trigger a failover and become the new primary.
597+ # 4. R0 (old primary) and R3 (old primary) come back up and start learning about the new topology.
598+ # 5. R0 (old primary) comes up thinking it was the primary, but has an older config epoch compared to R4 (new primary).
599+ # 6. R0 (old primary) learns about R4 (new primary) as a new node via gossip and assigns it a random shard_id.
600+ # 7. R0 (old primary) receives a direct ping from R4 (new primary).
601+ # a. R4 (new primary) advertises the same set of slots that R0 (old primary) was earlier owning.
602+ # b. Since R0 (old primary) assigns a random shard_id to R4 (new primary) early, R0 (old primary) thinks
603+ # that it is still a primary and it lost all its slots to R4 (new primary), which is in another shard.
604+ # R0 (old primary) become an empty primary.
605+ # c. R0 (empty primary) then updates the actual shard_id of R4 (new primary) while processing the ping extensions.
606+ # 9. R0 (empty primary) and R4 (new primary) end up being primaries in the same shard while R4 continues to own slots.
607+ #
608+ # The second phase.
609+ # In the R0/R3/R4 shard, R4 is the primary, R3 is the replica, and R0 is en empty primary.
610+ # 1. We will perform a failover on R3, and perform a replicate on R0 to make R0 a replica of R3.
611+ # 2. When R3 becomes the new primary node, it will broadcast a message to all nodes in the cluster.
612+ # 3. When R4 receives the message, it becomes the new replica and also will broadcast the message to all nodes in the cluster.
613+ # 4. When R0 becomes a replica after the replication, it will broadcast a message to all nodes in the cluster.
614+ # 5. Let's assume that R1 and R2 receive the message from R0 and R4 first and then the message from R3 (new primary) later.
615+ # 6. R1 will receive messages from R0 after the replication, R0 is a replica, and its primary is R3.
616+ # 7. R2 will receive messages from R4 after the failover, R4 is a replica, and its primary is R3.
617+ test " Combined the test cases of #2279 and #2301 to test #2431" {
618+ # ============== Phase 1 start ==============
619+ R 0 config set cluster-allow-replica-migration no
620+
621+ set CLUSTER_PACKET_TYPE_NONE -1
622+ set CLUSTER_PACKET_TYPE_ALL -2
623+
624+ # We make R4 become a fresh new node.
625+ isolate_node 4
626+
627+ # Set debug to R0 so that no packets can be exchanged when we resume it.
628+ R 0 debug disable-cluster-reconnection 1
629+ R 0 debug close-cluster-link-on-packet-drop 1
630+ R 0 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_ALL
631+
632+ # Pause R0 and wait for R3 to become a new primary.
633+ pause_process [srv 0 pid]
634+ R 3 cluster failover force
635+ wait_for_condition 1000 50 {
636+ [s -3 role] eq {master}
637+ } else {
638+ fail " Failed waiting for R3 to takeover primaryship"
639+ }
640+
641+ # Add R4 and wait for R4 to become a replica of R3.
642+ R 4 cluster meet [srv -3 host] [srv -3 port]
643+ wait_for_condition 50 100 {
644+ [cluster_get_node_by_id 4 [R 3 cluster myid]] != {}
645+ } else {
646+ fail " Node R4 never learned about node R3"
647+ }
648+ R 4 cluster replicate [R 3 cluster myid]
649+ wait_for_sync [srv -4 client]
650+
651+ # Pause R3 and wait for R4 to become a new primary.
652+ pause_process [srv -3 pid]
653+ R 4 cluster failover takeover
654+ wait_for_condition 1000 50 {
655+ [s -4 role] eq {master}
656+ } else {
657+ fail " Failed waiting for R4 to become primary"
658+ }
659+
660+ # Resume R0 and R3
661+ resume_process [srv 0 pid]
662+ resume_process [srv -3 pid]
663+
664+ # Make sure R0 drop all the links so that it won't get the pending packets.
665+ wait_for_condition 1000 50 {
666+ [R 0 cluster links] eq {}
667+ } else {
668+ fail " Failed waiting for A to drop all cluster links"
669+ }
670+
671+ # Un-debug R0 and let's start exchanging packets.
672+ R 0 debug disable-cluster-reconnection 0
673+ R 0 debug close-cluster-link-on-packet-drop 0
674+ R 0 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE
675+
676+ # ============== Phase 1 end ==============
677+
678+ wait_for_cluster_propagation
679+
680+ # ============== Phase 2 start ==============
681+
682+ set R0_nodeid [R 0 cluster myid]
683+ set R1_nodeid [R 1 cluster myid]
684+ set R2_nodeid [R 2 cluster myid]
685+ set R3_nodeid [R 3 cluster myid]
686+ set R4_nodeid [R 4 cluster myid]
687+
688+ set R0_shardid [R 0 cluster myshardid]
689+ set R3_shardid [R 3 cluster myshardid]
690+ set R4_shardid [R 4 cluster myshardid]
691+
692+ # R0 now is an empty primary, R4 is the primary, R3 is the replica.
693+ # They are both in the same shard, this may be changed in #2279, and
694+ # the assert can be removed then.
695+ assert_equal [s 0 role] " master"
696+ assert_equal [s -3 role] " slave"
697+ assert_equal [s -4 role] " master"
698+ assert_equal $R0_shardid $R3_shardid
699+ assert_equal $R0_shardid $R4_shardid
700+
701+ # Ensure that related nodes do not reconnect after we kill the cluster links.
702+ R 1 debug disable-cluster-reconnection 1
703+ R 2 debug disable-cluster-reconnection 1
704+ R 3 debug disable-cluster-reconnection 1
705+ R 4 debug disable-cluster-reconnection 1
706+
707+ # R3 doing the failover, and R0 doing the replicate with R3.
708+ # R3 become the new primary after the failover.
709+ # R4 become a replica after the failover.
710+ # R0 become a replica after the replicate.
711+ # Before we do that, kill the cluster link to create test conditions.
712+ # Ensure that R1 and R2 of the other shards do not receive packets from R3 (new primary),
713+ # but receive packets from R0 and R4 respectively first.
714+
715+ # R1 first receives the packet from R0.
716+ # Kill the cluster links between R1 and R3, and between R1 and R4 ensure that:
717+ # R1 can not receive messages from R3 (new primary),
718+ # R1 can not receive messages from R4 (replica),
719+ # and R1 can receive message from R0 (new replica).
720+ R 1 debug clusterlink kill all $R3_nodeid
721+ R 3 debug clusterlink kill all $R1_nodeid
722+ R 1 debug clusterlink kill all $R4_nodeid
723+ R 4 debug clusterlink kill all $R1_nodeid
724+
725+ # R2 first receives the packet from R4.
726+ # Kill the cluster links between R2 and R3, and between R2 and R0 ensure that:
727+ # R2 can not receive messages from R3 (new primary),
728+ # R2 can not receive messages from R0 (new replica),
729+ # and R2 can receive message from R4 (replica).
730+ R 2 debug clusterlink kill all $R3_nodeid
731+ R 3 debug clusterlink kill all $R2_nodeid
732+ R 2 debug clusterlink kill all $R0_nodeid
733+ R 0 debug clusterlink kill all $R2_nodeid
734+
735+ # R3 doing the failover, and R0 doing the replicate with R3
736+ R 3 cluster failover takeover
737+ wait_for_condition 1000 10 {
738+ [cluster_has_flag [cluster_get_node_by_id 0 $R3_nodeid ] master] eq 1
739+ } else {
740+ fail " R3 does not become the primary node"
741+ }
742+ R 0 cluster replicate $R3_nodeid
743+
744+ # Check that from the perspective of R1 and R2, when they first receive the
745+ # replica's packet, they correctly fix the sender's and its primary's role.
746+
747+ # Check that from the perspectives of R1, when receiving the packet from R0,
748+ # R0 is a replica, and its primary is R3, this is due to replicate.
749+ wait_for_condition 1000 10 {
750+ [cluster_has_flag [cluster_get_node_by_id 1 $R0_nodeid ] slave] eq 1 &&
751+ [cluster_has_flag [cluster_get_node_by_id 1 $R3_nodeid ] master] eq 1
752+ } else {
753+ puts " R1 cluster nodes:"
754+ puts [R 1 cluster nodes]
755+ fail " The node is not marked with the correct flag in R1's view"
756+ }
757+
758+ # Check that from the perspectives of R2, when receiving the packet from R4,
759+ # R4 is a replica, and its primary is R4, this is due to failover.
760+ wait_for_condition 1000 10 {
761+ [cluster_has_flag [cluster_get_node_by_id 2 $R4_nodeid ] slave] eq 1 &&
762+ [cluster_has_flag [cluster_get_node_by_id 2 $R3_nodeid ] master] eq 1
763+ } else {
764+ puts " R2 cluster nodes:"
765+ puts [R 2 cluster nodes]
766+ fail " The node is not marked with the correct flag in R2's view"
767+ }
768+
769+ # ============== Phase 2 end ==============
770+
771+ R 0 debug disable-cluster-reconnection 0
772+ R 1 debug disable-cluster-reconnection 0
773+ R 2 debug disable-cluster-reconnection 0
774+ R 3 debug disable-cluster-reconnection 0
775+ R 4 debug disable-cluster-reconnection 0
776+ wait_for_cluster_propagation
777+ }
778+ }
0 commit comments