@@ -837,6 +837,17 @@ private void processDeadTabletServer(DeadTabletServerEvent deadTabletServerEvent
837837 coordinatorContext .removeLiveTabletServer (tabletServerId );
838838 coordinatorChannelManager .removeTabletServer (tabletServerId );
839839
840+ // Here, we will first update alive tabletServer info for all tabletServers and
841+ // coordinatorServer metadata. The purpose of this approach is to prevent the scenario where
842+ // NotifyLeaderAndIsrRequest gets sent before UpdateMetadataRequest, which could cause the
843+ // leader to incorrectly adjust isr.
844+ Set <ServerInfo > serverInfos =
845+ new HashSet <>(coordinatorContext .getLiveTabletServers ().values ());
846+ // update coordinatorServer metadata cache.
847+ serverMetadataCache .updateMetadata (
848+ coordinatorContext .getCoordinatorServerInfo (), serverInfos );
849+ updateTabletServerMetadataCache (serverInfos , null , null , Collections .emptySet ());
850+
840851 TableBucketStateMachine tableBucketStateMachine = tableManager .getTableBucketStateMachine ();
841852 // get all table bucket whose leader is in this server and it not to be deleted
842853 Set <TableBucket > bucketsWithOfflineLeader =
@@ -865,11 +876,6 @@ private void processDeadTabletServer(DeadTabletServerEvent deadTabletServerEvent
865876 // trigger OfflineReplica state change for those newly offline replicas
866877 replicaStateMachine .handleStateChanges (replicas , OfflineReplica );
867878
868- Set <ServerInfo > serverInfos =
869- new HashSet <>(coordinatorContext .getLiveTabletServers ().values ());
870- // update coordinatorServer metadata cache.
871- serverMetadataCache .updateMetadata (
872- coordinatorContext .getCoordinatorServerInfo (), serverInfos );
873879 // update tabletServer metadata cache by send updateMetadata request.
874880 updateTabletServerMetadataCache (serverInfos , null , null , bucketsWithOfflineLeader );
875881 }
0 commit comments