|
27 | 27 | import com.alibaba.fluss.exception.FlussRuntimeException; |
28 | 28 | import com.alibaba.fluss.exception.InvalidCoordinatorException; |
29 | 29 | import com.alibaba.fluss.exception.InvalidUpdateVersionException; |
| 30 | +import com.alibaba.fluss.exception.TabletServerNotAvailableException; |
30 | 31 | import com.alibaba.fluss.exception.UnknownTableOrBucketException; |
31 | 32 | import com.alibaba.fluss.metadata.PhysicalTablePath; |
32 | 33 | import com.alibaba.fluss.metadata.TableBucket; |
|
39 | 40 | import com.alibaba.fluss.rpc.messages.CommitKvSnapshotResponse; |
40 | 41 | import com.alibaba.fluss.rpc.messages.CommitLakeTableSnapshotResponse; |
41 | 42 | import com.alibaba.fluss.rpc.messages.CommitRemoteLogManifestResponse; |
| 43 | +import com.alibaba.fluss.rpc.messages.ControlledShutdownResponse; |
42 | 44 | import com.alibaba.fluss.rpc.messages.PbCommitLakeTableSnapshotRespForTable; |
43 | 45 | import com.alibaba.fluss.rpc.protocol.ApiError; |
44 | 46 | import com.alibaba.fluss.server.coordinator.event.AccessContextEvent; |
45 | 47 | import com.alibaba.fluss.server.coordinator.event.AdjustIsrReceivedEvent; |
46 | 48 | import com.alibaba.fluss.server.coordinator.event.CommitKvSnapshotEvent; |
47 | 49 | import com.alibaba.fluss.server.coordinator.event.CommitLakeTableSnapshotEvent; |
48 | 50 | import com.alibaba.fluss.server.coordinator.event.CommitRemoteLogManifestEvent; |
| 51 | +import com.alibaba.fluss.server.coordinator.event.ControlledShutdownEvent; |
49 | 52 | import com.alibaba.fluss.server.coordinator.event.CoordinatorEvent; |
50 | 53 | import com.alibaba.fluss.server.coordinator.event.CoordinatorEventManager; |
51 | 54 | import com.alibaba.fluss.server.coordinator.event.CreatePartitionEvent; |
|
73 | 76 | import com.alibaba.fluss.server.metadata.CoordinatorMetadataCache; |
74 | 77 | import com.alibaba.fluss.server.metadata.ServerInfo; |
75 | 78 | import com.alibaba.fluss.server.metrics.group.CoordinatorMetricGroup; |
| 79 | +import com.alibaba.fluss.server.utils.ServerRpcMessageUtils; |
76 | 80 | import com.alibaba.fluss.server.zk.ZooKeeperClient; |
77 | 81 | import com.alibaba.fluss.server.zk.data.BucketAssignment; |
78 | 82 | import com.alibaba.fluss.server.zk.data.LakeTableSnapshot; |
|
104 | 108 |
|
105 | 109 | import static com.alibaba.fluss.server.coordinator.statemachine.BucketState.OfflineBucket; |
106 | 110 | import static com.alibaba.fluss.server.coordinator.statemachine.BucketState.OnlineBucket; |
| 111 | +import static com.alibaba.fluss.server.coordinator.statemachine.ReplicaLeaderElectionStrategy.CONTROLLED_SHUTDOWN_ELECTION; |
107 | 112 | import static com.alibaba.fluss.server.coordinator.statemachine.ReplicaState.OfflineReplica; |
108 | 113 | import static com.alibaba.fluss.server.coordinator.statemachine.ReplicaState.OnlineReplica; |
109 | 114 | import static com.alibaba.fluss.server.coordinator.statemachine.ReplicaState.ReplicaDeletionStarted; |
@@ -493,6 +498,11 @@ public void process(CoordinatorEvent event) { |
493 | 498 | completeFromCallable( |
494 | 499 | commitLakeTableSnapshotEvent.getRespCallback(), |
495 | 500 | () -> tryProcessCommitLakeTableSnapshot(commitLakeTableSnapshotEvent)); |
| 501 | + } else if (event instanceof ControlledShutdownEvent) { |
| 502 | + ControlledShutdownEvent controlledShutdownEvent = (ControlledShutdownEvent) event; |
| 503 | + completeFromCallable( |
| 504 | + controlledShutdownEvent.getRespCallback(), |
| 505 | + () -> tryProcessControlledShutdown(controlledShutdownEvent)); |
496 | 506 | } else if (event instanceof AccessContextEvent) { |
497 | 507 | AccessContextEvent<?> accessContextEvent = (AccessContextEvent<?>) event; |
498 | 508 | processAccessContext(accessContextEvent); |
@@ -839,6 +849,7 @@ private void processDeadTabletServer(DeadTabletServerEvent deadTabletServerEvent |
839 | 849 | LOG.info("Tablet server failure callback for {}.", tabletServerId); |
840 | 850 | coordinatorContext.removeOfflineBucketInServer(tabletServerId); |
841 | 851 | coordinatorContext.removeLiveTabletServer(tabletServerId); |
| 852 | + coordinatorContext.shuttingDownTabletServers().remove(tabletServerId); |
842 | 853 | coordinatorChannelManager.removeTabletServer(tabletServerId); |
843 | 854 |
|
844 | 855 | // Here, we will first update alive tabletServer info for all tabletServers and |
@@ -1102,6 +1113,79 @@ private CommitLakeTableSnapshotResponse tryProcessCommitLakeTableSnapshot( |
1102 | 1113 | return response; |
1103 | 1114 | } |
1104 | 1115 |
|
| 1116 | + private ControlledShutdownResponse tryProcessControlledShutdown( |
| 1117 | + ControlledShutdownEvent controlledShutdownEvent) { |
| 1118 | + ControlledShutdownResponse response = new ControlledShutdownResponse(); |
| 1119 | + |
| 1120 | + // TODO here we need to check tabletServerEpoch, avoid to receive controlled shutdown |
| 1121 | + // request from and old tabletServer. |
| 1122 | + int tabletServerEpoch = controlledShutdownEvent.getTabletServerEpoch(); |
| 1123 | + |
| 1124 | + int tabletServerId = controlledShutdownEvent.getTabletServerId(); |
| 1125 | + LOG.info( |
| 1126 | + "Try to process controlled shutdown for tabletServer: {}", |
| 1127 | + controlledShutdownEvent.getTabletServerId()); |
| 1128 | + |
| 1129 | + if (!coordinatorContext.liveOrShuttingDownTabletServers().contains(tabletServerId)) { |
| 1130 | + throw new TabletServerNotAvailableException( |
| 1131 | + "TabletServer" + tabletServerId + " is not available."); |
| 1132 | + } |
| 1133 | + |
| 1134 | + coordinatorContext.shuttingDownTabletServers().add(tabletServerId); |
| 1135 | + LOG.debug( |
| 1136 | + "All shutting down tabletServers: {}", |
| 1137 | + coordinatorContext.shuttingDownTabletServers()); |
| 1138 | + LOG.debug("All live tabletServers: {}", coordinatorContext.liveTabletServerSet()); |
| 1139 | + |
| 1140 | + List<TableBucketReplica> replicasToActOn = |
| 1141 | + coordinatorContext.replicasOnTabletServer(tabletServerId).stream() |
| 1142 | + .filter( |
| 1143 | + replica -> { |
| 1144 | + TableBucket tableBucket = replica.getTableBucket(); |
| 1145 | + return coordinatorContext.getAssignment(tableBucket).size() >= 1 |
| 1146 | + && coordinatorContext |
| 1147 | + .getBucketLeaderAndIsr(tableBucket) |
| 1148 | + .isPresent() |
| 1149 | + && !coordinatorContext.isToBeDeleted(tableBucket); |
| 1150 | + }) |
| 1151 | + .collect(Collectors.toList()); |
| 1152 | + |
| 1153 | + Set<TableBucket> bucketsLedByServer = new HashSet<>(); |
| 1154 | + Set<TableBucketReplica> replicasFollowedByServer = new HashSet<>(); |
| 1155 | + for (TableBucketReplica replica : replicasToActOn) { |
| 1156 | + TableBucket tableBucket = replica.getTableBucket(); |
| 1157 | + if (replica.getReplica() |
| 1158 | + == coordinatorContext.getBucketLeaderAndIsr(tableBucket).get().leader()) { |
| 1159 | + bucketsLedByServer.add(tableBucket); |
| 1160 | + } else { |
| 1161 | + replicasFollowedByServer.add(replica); |
| 1162 | + } |
| 1163 | + } |
| 1164 | + |
| 1165 | + tableBucketStateMachine.handleStateChange( |
| 1166 | + bucketsLedByServer, OnlineBucket, CONTROLLED_SHUTDOWN_ELECTION); |
| 1167 | + |
| 1168 | + coordinatorRequestBatch.newBatch(); |
| 1169 | + replicasFollowedByServer.forEach( |
| 1170 | + replica -> |
| 1171 | + coordinatorRequestBatch.addStopReplicaRequestForTabletServers( |
| 1172 | + Collections.singleton(tabletServerId), |
| 1173 | + replica.getTableBucket(), |
| 1174 | + false, |
| 1175 | + coordinatorContext.getBucketLeaderEpoch(replica.getTableBucket()))); |
| 1176 | + coordinatorRequestBatch.sendRequestToTabletServers( |
| 1177 | + coordinatorContext.getCoordinatorEpoch()); |
| 1178 | + |
| 1179 | + // If the tabletServer is a follower, updates the isr in ZK and notifies the current leader. |
| 1180 | + replicaStateMachine.handleStateChanges(replicasFollowedByServer, OfflineReplica); |
| 1181 | + |
| 1182 | + response.addAllRemainingLeaderBuckets( |
| 1183 | + coordinatorContext.getBucketsWithLeaderIn(tabletServerId).stream() |
| 1184 | + .map(ServerRpcMessageUtils::fromTableBucket) |
| 1185 | + .collect(Collectors.toList())); |
| 1186 | + return response; |
| 1187 | + } |
| 1188 | + |
1105 | 1189 | private void validateFencedEvent(FencedCoordinatorEvent event) { |
1106 | 1190 | TableBucket tb = event.getTableBucket(); |
1107 | 1191 | if (coordinatorContext.getTablePathById(tb.getTableId()) == null) { |
|
0 commit comments