Skip to content

Commit 08758e9

Browse files
authored
[kv] Commit kv snapshot to zk should be done asynchronously and not block coordinator (#1375)
1 parent 49eeec1 commit 08758e9

File tree

4 files changed

+96
-25
lines changed

4 files changed

+96
-25
lines changed

fluss-server/src/main/java/org/apache/fluss/server/coordinator/CompletedSnapshotStoreManager.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,18 @@
2626
import org.apache.fluss.server.kv.snapshot.SharedKvFileRegistry;
2727
import org.apache.fluss.server.kv.snapshot.ZooKeeperCompletedSnapshotHandleStore;
2828
import org.apache.fluss.server.zk.ZooKeeperClient;
29+
import org.apache.fluss.utils.MapUtils;
2930

3031
import org.slf4j.Logger;
3132
import org.slf4j.LoggerFactory;
3233

33-
import javax.annotation.concurrent.NotThreadSafe;
34+
import javax.annotation.concurrent.ThreadSafe;
3435

3536
import java.util.ArrayList;
36-
import java.util.HashMap;
3737
import java.util.List;
3838
import java.util.Map;
3939
import java.util.Set;
40+
import java.util.concurrent.ConcurrentHashMap;
4041
import java.util.concurrent.Executor;
4142
import java.util.function.Function;
4243

@@ -48,13 +49,14 @@
4849
* {@link CompletedSnapshotStore} not exist for a {@link TableBucket}, it will create a new {@link
4950
* CompletedSnapshotStore} for it.
5051
*/
51-
@NotThreadSafe
52+
@ThreadSafe
5253
public class CompletedSnapshotStoreManager {
5354

5455
private static final Logger LOG = LoggerFactory.getLogger(CompletedSnapshotStoreManager.class);
5556
private final int maxNumberOfSnapshotsToRetain;
5657
private final ZooKeeperClient zooKeeperClient;
57-
private final Map<TableBucket, CompletedSnapshotStore> bucketCompletedSnapshotStores;
58+
private final ConcurrentHashMap<TableBucket, CompletedSnapshotStore>
59+
bucketCompletedSnapshotStores;
5860
private final Executor ioExecutor;
5961
private final Function<ZooKeeperClient, CompletedSnapshotHandleStore>
6062
makeZookeeperCompletedSnapshotHandleStore;
@@ -67,7 +69,7 @@ public CompletedSnapshotStoreManager(
6769
maxNumberOfSnapshotsToRetain > 0, "maxNumberOfSnapshotsToRetain must be positive");
6870
this.maxNumberOfSnapshotsToRetain = maxNumberOfSnapshotsToRetain;
6971
this.zooKeeperClient = zooKeeperClient;
70-
this.bucketCompletedSnapshotStores = new HashMap<>();
72+
this.bucketCompletedSnapshotStores = MapUtils.newConcurrentHashMap();
7173
this.ioExecutor = ioExecutor;
7274
this.makeZookeeperCompletedSnapshotHandleStore = ZooKeeperCompletedSnapshotHandleStore::new;
7375
}
@@ -83,7 +85,7 @@ public CompletedSnapshotStoreManager(
8385
maxNumberOfSnapshotsToRetain > 0, "maxNumberOfSnapshotsToRetain must be positive");
8486
this.maxNumberOfSnapshotsToRetain = maxNumberOfSnapshotsToRetain;
8587
this.zooKeeperClient = zooKeeperClient;
86-
this.bucketCompletedSnapshotStores = new HashMap<>();
88+
this.bucketCompletedSnapshotStores = MapUtils.newConcurrentHashMap();
8789
this.ioExecutor = ioExecutor;
8890
this.makeZookeeperCompletedSnapshotHandleStore = makeZookeeperCompletedSnapshotHandleStore;
8991
}
@@ -191,7 +193,8 @@ private CompletedSnapshotStore createCompletedSnapshotStore(
191193
ioExecutor);
192194
}
193195

194-
public Map<TableBucket, CompletedSnapshotStore> getBucketCompletedSnapshotStores() {
196+
@VisibleForTesting
197+
Map<TableBucket, CompletedSnapshotStore> getBucketCompletedSnapshotStores() {
195198
return bucketCompletedSnapshotStores;
196199
}
197200
}

fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorEventProcessor.java

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.apache.fluss.server.coordinator.event.EventProcessor;
5757
import org.apache.fluss.server.coordinator.event.FencedCoordinatorEvent;
5858
import org.apache.fluss.server.coordinator.event.NewTabletServerEvent;
59+
import org.apache.fluss.server.coordinator.event.NotifyKvSnapshotOffsetEvent;
5960
import org.apache.fluss.server.coordinator.event.NotifyLeaderAndIsrResponseReceivedEvent;
6061
import org.apache.fluss.server.coordinator.event.watcher.TableChangeWatcher;
6162
import org.apache.fluss.server.coordinator.event.watcher.TabletServerChangeWatcher;
@@ -117,6 +118,7 @@ public class CoordinatorEventProcessor implements EventProcessor {
117118
private static final Logger LOG = LoggerFactory.getLogger(CoordinatorEventProcessor.class);
118119

119120
private final ZooKeeperClient zooKeeperClient;
121+
private final ExecutorService ioExecutor;
120122
private final CoordinatorContext coordinatorContext;
121123
private final ReplicaStateMachine replicaStateMachine;
122124
private final TableBucketStateMachine tableBucketStateMachine;
@@ -190,6 +192,7 @@ public CoordinatorEventProcessor(
190192
this.lakeTableTieringManager = lakeTableTieringManager;
191193
this.coordinatorMetricGroup = coordinatorMetricGroup;
192194
this.internalListenerName = conf.getString(ConfigOptions.INTERNAL_LISTENER_NAME);
195+
this.ioExecutor = ioExecutor;
193196
}
194197

195198
public CoordinatorEventManager getCoordinatorEventManager() {
@@ -455,9 +458,10 @@ public void process(CoordinatorEvent event) {
455458
adjustIsrReceivedEvent.getLeaderAndIsrMap())));
456459
} else if (event instanceof CommitKvSnapshotEvent) {
457460
CommitKvSnapshotEvent commitKvSnapshotEvent = (CommitKvSnapshotEvent) event;
458-
CompletableFuture<CommitKvSnapshotResponse> callback =
459-
commitKvSnapshotEvent.getRespCallback();
460-
completeFromCallable(callback, () -> tryProcessCommitKvSnapshot(commitKvSnapshotEvent));
461+
tryProcessCommitKvSnapshot(
462+
commitKvSnapshotEvent, commitKvSnapshotEvent.getRespCallback());
463+
} else if (event instanceof NotifyKvSnapshotOffsetEvent) {
464+
processNotifyKvSnapshotOffsetEvent((NotifyKvSnapshotOffsetEvent) event);
461465
} else if (event instanceof CommitRemoteLogManifestEvent) {
462466
CommitRemoteLogManifestEvent commitRemoteLogManifestEvent =
463467
(CommitRemoteLogManifestEvent) event;
@@ -936,21 +940,40 @@ private void validateLeaderAndIsr(TableBucket tableBucket, LeaderAndIsr newLeade
936940
}
937941
}
938942

939-
private CommitKvSnapshotResponse tryProcessCommitKvSnapshot(CommitKvSnapshotEvent event)
940-
throws Exception {
943+
private void tryProcessCommitKvSnapshot(
944+
CommitKvSnapshotEvent event, CompletableFuture<CommitKvSnapshotResponse> callback) {
941945
// validate
942-
validateFencedEvent(event);
946+
try {
947+
validateFencedEvent(event);
948+
} catch (Exception e) {
949+
callback.completeExceptionally(e);
950+
return;
951+
}
952+
// commit the kv snapshot asynchronously
953+
ioExecutor.execute(
954+
() -> {
955+
try {
956+
TableBucket tb = event.getTableBucket();
957+
CompletedSnapshot completedSnapshot =
958+
event.getAddCompletedSnapshotData().getCompletedSnapshot();
959+
// add completed snapshot
960+
CompletedSnapshotStore completedSnapshotStore =
961+
completedSnapshotStoreManager.getOrCreateCompletedSnapshotStore(tb);
962+
// this involves IO operation (ZK), so we do it in ioExecutor
963+
completedSnapshotStore.add(completedSnapshot);
964+
coordinatorEventManager.put(
965+
new NotifyKvSnapshotOffsetEvent(
966+
tb, completedSnapshot.getLogOffset()));
967+
callback.complete(new CommitKvSnapshotResponse());
968+
} catch (Exception e) {
969+
callback.completeExceptionally(e);
970+
}
971+
});
972+
}
943973

974+
private void processNotifyKvSnapshotOffsetEvent(NotifyKvSnapshotOffsetEvent event) {
944975
TableBucket tb = event.getTableBucket();
945-
CompletedSnapshot completedSnapshot =
946-
event.getAddCompletedSnapshotData().getCompletedSnapshot();
947-
// add completed snapshot
948-
CompletedSnapshotStore completedSnapshotStore =
949-
completedSnapshotStoreManager.getOrCreateCompletedSnapshotStore(tb);
950-
completedSnapshotStore.add(completedSnapshot);
951-
952-
// send notify snapshot request to all replicas.
953-
// TODO: this should be moved after sending AddCompletedSnapshotResponse
976+
long logOffset = event.getLogOffset();
954977
coordinatorRequestBatch.newBatch();
955978
coordinatorContext
956979
.getBucketLeaderAndIsr(tb)
@@ -961,10 +984,9 @@ private CommitKvSnapshotResponse tryProcessCommitKvSnapshot(CommitKvSnapshotEven
961984
coordinatorContext.getFollowers(
962985
tb, leaderAndIsr.leader()),
963986
tb,
964-
completedSnapshot.getLogOffset()));
987+
logOffset));
965988
coordinatorRequestBatch.sendNotifyKvSnapshotOffsetRequest(
966989
coordinatorContext.getCoordinatorEpoch());
967-
return new CommitKvSnapshotResponse();
968990
}
969991

970992
private CommitRemoteLogManifestResponse tryProcessCommitRemoteLogManifest(
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.fluss.server.coordinator.event;
20+
21+
import org.apache.fluss.metadata.TableBucket;
22+
23+
/** An event for notify kv snapshot offset to local tablet servers. */
24+
public class NotifyKvSnapshotOffsetEvent implements CoordinatorEvent {
25+
26+
private final TableBucket tableBucket;
27+
private final long logOffset;
28+
29+
public NotifyKvSnapshotOffsetEvent(TableBucket tableBucket, long logOffset) {
30+
this.tableBucket = tableBucket;
31+
this.logOffset = logOffset;
32+
}
33+
34+
public TableBucket getTableBucket() {
35+
return tableBucket;
36+
}
37+
38+
public long getLogOffset() {
39+
return logOffset;
40+
}
41+
}

fluss-server/src/test/java/org/apache/fluss/server/coordinator/CoordinatorEventProcessorTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,11 @@ void testNotifyOffsetsWithShrinkISR(@TempDir Path tempDir) throws Exception {
763763
completedSnapshot, coordinatorEpoch, bucketLeaderEpoch),
764764
responseCompletableFuture2));
765765
responseCompletableFuture2.get();
766-
verifyReceiveRequestExceptFor(3, leader, NotifyKvSnapshotOffsetRequest.class);
766+
retry(
767+
Duration.ofMinutes(1),
768+
() ->
769+
verifyReceiveRequestExceptFor(
770+
3, leader, NotifyKvSnapshotOffsetRequest.class));
767771
}
768772

769773
@Test
@@ -1082,6 +1086,7 @@ private void verifyReceiveRequestExceptFor(
10821086
.hasMessage("No requests pending for inbound response.");
10831087
} else {
10841088
// should contain NotifyKvSnapshotOffsetRequest
1089+
assertThat(testTabletServerGateway.pendingRequestSize()).isNotZero();
10851090
assertThat(testTabletServerGateway.getRequest(0)).isInstanceOf(requestClass);
10861091
}
10871092
}

0 commit comments

Comments
 (0)