Skip to content

Commit 6e8ab07

Browse files
authored
[server] Metadata RPC get table/partition metadata from serverCache instead of Zookeeper (#930)
1 parent abb3b86 commit 6e8ab07

File tree

14 files changed

+488
-316
lines changed

14 files changed

+488
-316
lines changed

fluss-server/src/main/java/com/alibaba/fluss/server/RpcServiceBase.java

Lines changed: 111 additions & 107 deletions
Large diffs are not rendered by default.

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/CoordinatorContext.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ public class CoordinatorContext {
7878
// a map of partition assignment, <table_id, partition_id> -> <bucket, bucket_replicas>
7979
private final Map<TablePartition, Map<Integer, List<Integer>>> partitionAssignments =
8080
new HashMap<>();
81-
// a map from partition_id -> partition_name
82-
private final Map<Long, String> partitionNameById = new HashMap<>();
81+
// a map from partition_id -> physicalTablePath
82+
private final Map<Long, PhysicalTablePath> pathByPartitionId = new HashMap<>();
8383
private final Map<PhysicalTablePath, Long> partitionIdByPath = new HashMap<>();
8484

8585
// a map from table_id to the table path
@@ -229,7 +229,7 @@ public void putTableInfo(TableInfo tableInfo) {
229229
}
230230

231231
public void putPartition(long partitionId, PhysicalTablePath physicalTablePath) {
232-
this.partitionNameById.put(partitionId, physicalTablePath.getPartitionName());
232+
this.pathByPartitionId.put(partitionId, physicalTablePath);
233233
this.partitionIdByPath.put(physicalTablePath, partitionId);
234234
}
235235

@@ -250,11 +250,20 @@ public boolean containsTableId(long tableId) {
250250
}
251251

252252
public boolean containsPartitionId(long partitionId) {
253-
return this.partitionNameById.containsKey(partitionId);
253+
return this.pathByPartitionId.containsKey(partitionId);
254254
}
255255

256256
public @Nullable String getPartitionName(long partitionId) {
257-
return this.partitionNameById.get(partitionId);
257+
PhysicalTablePath physicalTablePath = pathByPartitionId.get(partitionId);
258+
if (physicalTablePath == null) {
259+
return null;
260+
} else {
261+
return physicalTablePath.getPartitionName();
262+
}
263+
}
264+
265+
public Optional<PhysicalTablePath> getPhysicalTablePath(long partitionId) {
266+
return Optional.ofNullable(pathByPartitionId.get(partitionId));
258267
}
259268

260269
public Optional<Long> getPartitionId(PhysicalTablePath physicalTablePath) {
@@ -599,10 +608,10 @@ public void removePartition(TablePartition tablePartition) {
599608
bucket)));
600609
}
601610

602-
String partitionName = partitionNameById.remove(tablePartition.getPartitionId());
603-
if (partitionName != null) {
604-
TablePath tablePath = getTablePathById(tablePartition.getTableId());
605-
partitionIdByPath.remove(PhysicalTablePath.of(tablePath, partitionName));
611+
PhysicalTablePath physicalTablePath =
612+
pathByPartitionId.remove(tablePartition.getPartitionId());
613+
if (physicalTablePath != null) {
614+
partitionIdByPath.remove(physicalTablePath);
606615
}
607616
}
608617

@@ -616,7 +625,7 @@ private void clearTablesState() {
616625
tablePathById.clear();
617626
tableIdByPath.clear();
618627
tableInfoById.clear();
619-
partitionNameById.clear();
628+
pathByPartitionId.clear();
620629
partitionIdByPath.clear();
621630
}
622631

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/CoordinatorService.java

Lines changed: 111 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,12 @@
3131
import com.alibaba.fluss.metadata.DataLakeFormat;
3232
import com.alibaba.fluss.metadata.DatabaseDescriptor;
3333
import com.alibaba.fluss.metadata.PartitionSpec;
34+
import com.alibaba.fluss.metadata.PhysicalTablePath;
3435
import com.alibaba.fluss.metadata.ResolvedPartitionSpec;
36+
import com.alibaba.fluss.metadata.TableBucket;
3537
import com.alibaba.fluss.metadata.TableDescriptor;
38+
import com.alibaba.fluss.metadata.TableInfo;
39+
import com.alibaba.fluss.metadata.TablePartition;
3640
import com.alibaba.fluss.metadata.TablePath;
3741
import com.alibaba.fluss.rpc.gateway.CoordinatorGateway;
3842
import com.alibaba.fluss.rpc.messages.AdjustIsrRequest;
@@ -61,8 +65,11 @@
6165
import com.alibaba.fluss.rpc.messages.DropTableResponse;
6266
import com.alibaba.fluss.rpc.messages.LakeTieringHeartbeatRequest;
6367
import com.alibaba.fluss.rpc.messages.LakeTieringHeartbeatResponse;
68+
import com.alibaba.fluss.rpc.messages.MetadataRequest;
69+
import com.alibaba.fluss.rpc.messages.MetadataResponse;
6470
import com.alibaba.fluss.rpc.messages.PbHeartbeatReqForTable;
6571
import com.alibaba.fluss.rpc.messages.PbHeartbeatRespForTable;
72+
import com.alibaba.fluss.rpc.netty.server.Session;
6673
import com.alibaba.fluss.rpc.protocol.ApiError;
6774
import com.alibaba.fluss.security.acl.AclBinding;
6875
import com.alibaba.fluss.security.acl.AclBindingFilter;
@@ -72,6 +79,7 @@
7279
import com.alibaba.fluss.server.authorizer.AclCreateResult;
7380
import com.alibaba.fluss.server.authorizer.AclDeleteResult;
7481
import com.alibaba.fluss.server.authorizer.Authorizer;
82+
import com.alibaba.fluss.server.coordinator.event.AccessContextEvent;
7583
import com.alibaba.fluss.server.coordinator.event.AdjustIsrReceivedEvent;
7684
import com.alibaba.fluss.server.coordinator.event.CommitKvSnapshotEvent;
7785
import com.alibaba.fluss.server.coordinator.event.CommitLakeTableSnapshotEvent;
@@ -81,9 +89,13 @@
8189
import com.alibaba.fluss.server.entity.LakeTieringTableInfo;
8290
import com.alibaba.fluss.server.kv.snapshot.CompletedSnapshot;
8391
import com.alibaba.fluss.server.kv.snapshot.CompletedSnapshotJsonSerde;
92+
import com.alibaba.fluss.server.metadata.BucketMetadata;
93+
import com.alibaba.fluss.server.metadata.PartitionMetadata;
8494
import com.alibaba.fluss.server.metadata.ServerMetadataCache;
95+
import com.alibaba.fluss.server.metadata.TableMetadata;
8596
import com.alibaba.fluss.server.zk.ZooKeeperClient;
8697
import com.alibaba.fluss.server.zk.data.BucketAssignment;
98+
import com.alibaba.fluss.server.zk.data.LeaderAndIsr;
8799
import com.alibaba.fluss.server.zk.data.PartitionAssignment;
88100
import com.alibaba.fluss.server.zk.data.TableAssignment;
89101
import com.alibaba.fluss.server.zk.data.TableRegistration;
@@ -93,9 +105,11 @@
93105
import javax.annotation.Nullable;
94106

95107
import java.io.UncheckedIOException;
108+
import java.util.ArrayList;
96109
import java.util.HashMap;
97110
import java.util.List;
98111
import java.util.Map;
112+
import java.util.Optional;
99113
import java.util.concurrent.CompletableFuture;
100114
import java.util.function.Supplier;
101115

@@ -121,6 +135,7 @@ public final class CoordinatorService extends RpcServiceBase implements Coordina
121135
private final int defaultReplicationFactor;
122136
private final Supplier<EventManager> eventManagerSupplier;
123137
private final Supplier<Integer> coordinatorEpochSupplier;
138+
private final ServerMetadataCache metadataCache;
124139

125140
// null if the cluster hasn't configured datalake format
126141
private final @Nullable DataLakeFormat dataLakeFormat;
@@ -137,13 +152,7 @@ public CoordinatorService(
137152
@Nullable Authorizer authorizer,
138153
@Nullable LakeCatalog lakeCatalog,
139154
LakeTableTieringManager lakeTableTieringManager) {
140-
super(
141-
remoteFileSystem,
142-
ServerType.COORDINATOR,
143-
zkClient,
144-
metadataCache,
145-
metadataManager,
146-
authorizer);
155+
super(remoteFileSystem, ServerType.COORDINATOR, zkClient, metadataManager, authorizer);
147156
this.defaultBucketNumber = conf.getInt(ConfigOptions.DEFAULT_BUCKET_NUMBER);
148157
this.defaultReplicationFactor = conf.getInt(ConfigOptions.DEFAULT_REPLICATION_FACTOR);
149158
this.eventManagerSupplier =
@@ -153,6 +162,7 @@ public CoordinatorService(
153162
this.dataLakeFormat = conf.getOptional(ConfigOptions.DATALAKE_FORMAT).orElse(null);
154163
this.lakeCatalog = lakeCatalog;
155164
this.lakeTableTieringManager = lakeTableTieringManager;
165+
this.metadataCache = metadataCache;
156166
checkState(
157167
(dataLakeFormat == null) == (lakeCatalog == null),
158168
"dataLakeFormat and lakeCatalog must both be null or both non-null, but dataLakeFormat is %s, lakeCatalog is %s.",
@@ -406,6 +416,28 @@ public CompletableFuture<DropPartitionResponse> dropPartition(DropPartitionReque
406416
return CompletableFuture.completedFuture(response);
407417
}
408418

419+
@Override
420+
public CompletableFuture<MetadataResponse> metadata(MetadataRequest request) {
421+
String listenerName = currentListenerName();
422+
Session session = currentSession();
423+
424+
AccessContextEvent<MetadataResponse> metadataResponseAccessContextEvent =
425+
new AccessContextEvent<>(
426+
ctx ->
427+
makeMetadataResponse(
428+
request,
429+
listenerName,
430+
session,
431+
authorizer,
432+
metadataCache,
433+
(tablePath) -> getTableMetadata(ctx, tablePath),
434+
ctx::getPhysicalTablePath,
435+
(physicalTablePath) ->
436+
getPartitionMetadata(ctx, physicalTablePath)));
437+
eventManagerSupplier.get().put(metadataResponseAccessContextEvent);
438+
return metadataResponseAccessContextEvent.getResultFuture();
439+
}
440+
409441
public CompletableFuture<AdjustIsrResponse> adjustIsr(AdjustIsrRequest request) {
410442
CompletableFuture<AdjustIsrResponse> response = new CompletableFuture<>();
411443
eventManagerSupplier
@@ -545,4 +577,76 @@ private void validateHeartbeatRequest(
545577
heartbeatReqForTable.getTableId()));
546578
}
547579
}
580+
581+
private TableMetadata getTableMetadata(CoordinatorContext ctx, TablePath tablePath) {
582+
// always get table info from zk.
583+
TableInfo tableInfo = metadataManager.getTable(tablePath);
584+
long tableId = ctx.getTableIdByPath(tablePath);
585+
List<BucketMetadata> bucketMetadataList;
586+
if (tableId == TableInfo.UNKNOWN_TABLE_ID) {
587+
// TODO no need to get assignment from zk if refactor client metadata cache. Trace by
588+
// https://github.com/alibaba/fluss/issues/483
589+
// get table assignment from zk.
590+
bucketMetadataList =
591+
getTableMetadataFromZk(
592+
zkClient, tablePath, tableInfo.getTableId(), tableInfo.isPartitioned());
593+
} else {
594+
// get table assignment from coordinatorContext.
595+
bucketMetadataList =
596+
getBucketMetadataFromContext(
597+
ctx, tableId, null, ctx.getTableAssignment(tableId));
598+
}
599+
return new TableMetadata(tableInfo, bucketMetadataList);
600+
}
601+
602+
private PartitionMetadata getPartitionMetadata(
603+
CoordinatorContext ctx, PhysicalTablePath partitionPath) {
604+
TablePath tablePath =
605+
new TablePath(partitionPath.getDatabaseName(), partitionPath.getTableName());
606+
String partitionName = partitionPath.getPartitionName();
607+
long tableId = ctx.getTableIdByPath(tablePath);
608+
if (tableId == TableInfo.UNKNOWN_TABLE_ID) {
609+
// TODO no need to get assignment from zk if refactor client metadata cache. Trace by
610+
// https://github.com/alibaba/fluss/issues/483
611+
return getPartitionMetadataFromZk(partitionPath, zkClient);
612+
} else {
613+
Optional<Long> partitionIdOpt = ctx.getPartitionId(partitionPath);
614+
if (partitionIdOpt.isPresent()) {
615+
long partitionId = partitionIdOpt.get();
616+
List<BucketMetadata> bucketMetadataList =
617+
getBucketMetadataFromContext(
618+
ctx,
619+
tableId,
620+
partitionId,
621+
ctx.getPartitionAssignment(
622+
new TablePartition(tableId, partitionId)));
623+
return new PartitionMetadata(
624+
tableId, partitionName, partitionId, bucketMetadataList);
625+
} else {
626+
return getPartitionMetadataFromZk(partitionPath, zkClient);
627+
}
628+
}
629+
}
630+
631+
private static List<BucketMetadata> getBucketMetadataFromContext(
632+
CoordinatorContext ctx,
633+
long tableId,
634+
@Nullable Long partitionId,
635+
Map<Integer, List<Integer>> tableAssigment) {
636+
List<BucketMetadata> bucketMetadataList = new ArrayList<>();
637+
tableAssigment.forEach(
638+
(bucketId, serverIds) -> {
639+
TableBucket tableBucket = new TableBucket(tableId, partitionId, bucketId);
640+
Optional<LeaderAndIsr> optLeaderAndIsr = ctx.getBucketLeaderAndIsr(tableBucket);
641+
Integer leader = optLeaderAndIsr.map(LeaderAndIsr::leader).orElse(null);
642+
BucketMetadata bucketMetadata =
643+
new BucketMetadata(
644+
bucketId,
645+
leader,
646+
ctx.getBucketLeaderEpoch(tableBucket),
647+
serverIds);
648+
bucketMetadataList.add(bucketMetadata);
649+
});
650+
return bucketMetadataList;
651+
}
548652
}

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/event/AccessContextEvent.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@
2222
import java.util.function.Function;
2323

2424
/**
25-
* An event designed to safely access the {@link CoordinatorContext}. This is intended solely for
26-
* testing purposes. Since {@link CoordinatorContext} is not thread-safe, directly accessing it in
27-
* tests can lead to unsafe operations. This event ensures safe access to the {@link
28-
* CoordinatorContext} during testing
25+
* An event designed to safely access the {@link CoordinatorContext}. Since {@link
26+
* CoordinatorContext} is not thread-safe, directly accessing it in tests can lead to unsafe
27+
* operations. This event ensures safe access to the {@link CoordinatorContext}.
2928
*
3029
* @param <T> the type of the result of the access operation
3130
*/

fluss-server/src/main/java/com/alibaba/fluss/server/metadata/ServerMetadataSnapshot.java

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717
package com.alibaba.fluss.server.metadata;
1818

19+
import com.alibaba.fluss.annotation.VisibleForTesting;
1920
import com.alibaba.fluss.cluster.Cluster;
2021
import com.alibaba.fluss.cluster.ServerNode;
2122
import com.alibaba.fluss.cluster.TabletServerInfo;
2223
import com.alibaba.fluss.metadata.PhysicalTablePath;
23-
import com.alibaba.fluss.metadata.TableInfo;
2424
import com.alibaba.fluss.metadata.TablePath;
2525

2626
import javax.annotation.Nullable;
@@ -49,9 +49,7 @@ public class ServerMetadataSnapshot {
4949
private final Map<Long, TablePath> pathByTableId;
5050
// partition table.
5151
private final Map<PhysicalTablePath, Long> partitionIdByPath;
52-
private final Map<Long, String> partitionNameById;
53-
54-
private final Map<Long, TableInfo> tableInfoByTableId;
52+
private final Map<Long, PhysicalTablePath> physicalPathByPartitionId;
5553

5654
// a map of bucket metadata of none-partition table, table_id -> <bucket, bucketMetadata>
5755
private final Map<Long, Map<Integer, BucketMetadata>> bucketMetadataMapForTables;
@@ -66,7 +64,6 @@ public ServerMetadataSnapshot(
6664
Map<TablePath, Long> tableIdByPath,
6765
Map<Long, TablePath> pathByTableId,
6866
Map<PhysicalTablePath, Long> partitionIdByPath,
69-
Map<Long, TableInfo> tableInfoByTableId,
7067
Map<Long, Map<Integer, BucketMetadata>> bucketMetadataMapForTables,
7168
Map<Long, Map<Integer, BucketMetadata>> bucketMetadataMapForPartitions) {
7269
this.coordinatorServer = coordinatorServer;
@@ -76,14 +73,12 @@ public ServerMetadataSnapshot(
7673
this.pathByTableId = Collections.unmodifiableMap(pathByTableId);
7774

7875
this.partitionIdByPath = Collections.unmodifiableMap(partitionIdByPath);
79-
Map<Long, String> tempPartitionNameById = new HashMap<>();
76+
Map<Long, PhysicalTablePath> tempPhysicalPathByPartitionId = new HashMap<>();
8077
partitionIdByPath.forEach(
8178
((physicalTablePath, partitionId) ->
82-
tempPartitionNameById.put(
83-
partitionId, physicalTablePath.getPartitionName())));
84-
this.partitionNameById = Collections.unmodifiableMap(tempPartitionNameById);
79+
tempPhysicalPathByPartitionId.put(partitionId, physicalTablePath)));
80+
this.physicalPathByPartitionId = Collections.unmodifiableMap(tempPhysicalPathByPartitionId);
8581

86-
this.tableInfoByTableId = Collections.unmodifiableMap(tableInfoByTableId);
8782
this.bucketMetadataMapForTables = Collections.unmodifiableMap(bucketMetadataMapForTables);
8883
this.bucketMetadataMapForPartitions =
8984
Collections.unmodifiableMap(bucketMetadataMapForPartitions);
@@ -98,7 +93,6 @@ public static ServerMetadataSnapshot empty() {
9893
Collections.emptyMap(),
9994
Collections.emptyMap(),
10095
Collections.emptyMap(),
101-
Collections.emptyMap(),
10296
Collections.emptyMap());
10397
}
10498

@@ -151,12 +145,8 @@ public Optional<Long> getPartitionId(PhysicalTablePath physicalTablePath) {
151145
return Optional.ofNullable(partitionIdByPath.get(physicalTablePath));
152146
}
153147

154-
public Optional<String> getPartitionName(long partitionId) {
155-
return Optional.ofNullable(partitionNameById.get(partitionId));
156-
}
157-
158-
public Optional<TableInfo> getTableInfo(long tableId) {
159-
return Optional.ofNullable(tableInfoByTableId.get(tableId));
148+
public Optional<PhysicalTablePath> getPhysicalTablePath(long partitionId) {
149+
return Optional.ofNullable(physicalPathByPartitionId.get(partitionId));
160150
}
161151

162152
public Map<Integer, BucketMetadata> getBucketMetadataForTable(long tableId) {
@@ -171,15 +161,21 @@ public Map<PhysicalTablePath, Long> getPartitionIdByPath() {
171161
return partitionIdByPath;
172162
}
173163

174-
public Map<Long, TableInfo> getTableInfoByTableId() {
175-
return tableInfoByTableId;
176-
}
177-
178164
public Map<Long, Map<Integer, BucketMetadata>> getBucketMetadataMapForTables() {
179165
return bucketMetadataMapForTables;
180166
}
181167

182168
public Map<Long, Map<Integer, BucketMetadata>> getBucketMetadataMapForPartitions() {
183169
return bucketMetadataMapForPartitions;
184170
}
171+
172+
@VisibleForTesting
173+
public @Nullable ServerInfo getCoordinatorServer() {
174+
return coordinatorServer;
175+
}
176+
177+
@VisibleForTesting
178+
public Map<Integer, ServerInfo> getAliveTabletServers() {
179+
return aliveTabletServers;
180+
}
185181
}

0 commit comments

Comments
 (0)