Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ public static NetworkClient createNetworkClient(AbstractConfig config,
hostResolver,
clientTelemetrySender,
config.getLong(CommonClientConfigs.METADATA_RECOVERY_REBOOTSTRAP_TRIGGER_MS_CONFIG),
MetadataRecoveryStrategy.forName(config.getString(CommonClientConfigs.METADATA_RECOVERY_STRATEGY_CONFIG))
MetadataRecoveryStrategy.forName(config.getString(CommonClientConfigs.METADATA_RECOVERY_STRATEGY_CONFIG)),
config.getBoolean(CommonClientConfigs.METADATA_CLUSTER_CHECK_ENABLE_CONFIG)
);
} catch (Throwable t) {
closeQuietly(selector, "Selector");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@
*/
public interface MetadataUpdater extends Closeable {

/**
* Gets the current cluster id without blocking.
* @return the cluster id, or null if unknown
*/
default String clusterId() {
return null;
}

/**
* Gets the current cluster info without blocking.
*/
Expand Down
59 changes: 49 additions & 10 deletions clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.kafka.clients;

import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.ClusterResource;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.TopicPartition;
Expand Down Expand Up @@ -120,6 +121,9 @@ private enum State {

private final MetadataRecoveryStrategy metadataRecoveryStrategy;

/* Whether to send the cluster ID and node ID on ApiVersions RPC for checking by the broker */
private final boolean metadataClusterCheckEnable;

private final Time time;

/**
Expand Down Expand Up @@ -154,7 +158,8 @@ public NetworkClient(Selectable selector,
boolean discoverBrokerVersions,
ApiVersions apiVersions,
LogContext logContext,
MetadataRecoveryStrategy metadataRecoveryStrategy) {
MetadataRecoveryStrategy metadataRecoveryStrategy,
boolean metadataClusterCheckEnable) {
this(selector,
metadata,
clientId,
Expand All @@ -171,7 +176,8 @@ public NetworkClient(Selectable selector,
apiVersions,
logContext,
Long.MAX_VALUE,
metadataRecoveryStrategy);
metadataRecoveryStrategy,
metadataClusterCheckEnable);
}

public NetworkClient(Selectable selector,
Expand All @@ -190,7 +196,8 @@ public NetworkClient(Selectable selector,
ApiVersions apiVersions,
LogContext logContext,
long rebootstrapTriggerMs,
MetadataRecoveryStrategy metadataRecoveryStrategy) {
MetadataRecoveryStrategy metadataRecoveryStrategy,
boolean metadataClusterCheckEnable) {
this(null,
metadata,
selector,
Expand All @@ -211,7 +218,8 @@ public NetworkClient(Selectable selector,
new DefaultHostResolver(),
null,
rebootstrapTriggerMs,
metadataRecoveryStrategy);
metadataRecoveryStrategy,
metadataClusterCheckEnable);
}

public NetworkClient(Selectable selector,
Expand All @@ -230,7 +238,8 @@ public NetworkClient(Selectable selector,
ApiVersions apiVersions,
Sensor throttleTimeSensor,
LogContext logContext,
MetadataRecoveryStrategy metadataRecoveryStrategy) {
MetadataRecoveryStrategy metadataRecoveryStrategy,
boolean metadataClusterCheckEnable) {
this(null,
metadata,
selector,
Expand All @@ -251,7 +260,8 @@ public NetworkClient(Selectable selector,
new DefaultHostResolver(),
null,
Long.MAX_VALUE,
metadataRecoveryStrategy);
metadataRecoveryStrategy,
metadataClusterCheckEnable);
}

public NetworkClient(Selectable selector,
Expand All @@ -269,7 +279,8 @@ public NetworkClient(Selectable selector,
boolean discoverBrokerVersions,
ApiVersions apiVersions,
LogContext logContext,
MetadataRecoveryStrategy metadataRecoveryStrategy) {
MetadataRecoveryStrategy metadataRecoveryStrategy,
boolean metadataClusterCheckEnable) {
this(metadataUpdater,
null,
selector,
Expand All @@ -290,7 +301,8 @@ public NetworkClient(Selectable selector,
new DefaultHostResolver(),
null,
Long.MAX_VALUE,
metadataRecoveryStrategy);
metadataRecoveryStrategy,
metadataClusterCheckEnable);
}

public NetworkClient(MetadataUpdater metadataUpdater,
Expand All @@ -313,7 +325,8 @@ public NetworkClient(MetadataUpdater metadataUpdater,
HostResolver hostResolver,
ClientTelemetrySender clientTelemetrySender,
long rebootstrapTriggerMs,
MetadataRecoveryStrategy metadataRecoveryStrategy) {
MetadataRecoveryStrategy metadataRecoveryStrategy,
boolean metadataClusterCheckEnable) {
/* It would be better if we could pass `DefaultMetadataUpdater` from the public constructor, but it's not
* possible because `DefaultMetadataUpdater` is an inner class and it can only be instantiated after the
* super constructor is invoked.
Expand Down Expand Up @@ -346,6 +359,7 @@ public NetworkClient(MetadataUpdater metadataUpdater,
this.telemetrySender = (clientTelemetrySender != null) ? new TelemetrySender(clientTelemetrySender) : null;
this.rebootstrapTriggerMs = rebootstrapTriggerMs;
this.metadataRecoveryStrategy = metadataRecoveryStrategy;
this.metadataClusterCheckEnable = metadataClusterCheckEnable;
}

/**
Expand Down Expand Up @@ -1024,7 +1038,10 @@ private void handleApiVersionsResponse(List<ClientResponse> responses,
InFlightRequest req, long now, ApiVersionsResponse apiVersionsResponse) {
final String node = req.destination;
if (apiVersionsResponse.data().errorCode() != Errors.NONE.code()) {
if (req.request.version() == 0 || apiVersionsResponse.data().errorCode() != Errors.UNSUPPORTED_VERSION.code()) {
if (metadataRecoveryStrategy == MetadataRecoveryStrategy.REBOOTSTRAP && apiVersionsResponse.data().errorCode() == Errors.REBOOTSTRAP_REQUIRED.code()) {
Copy link
Copy Markdown
Collaborator

@mingyen066 mingyen066 May 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we clean up the connection here like handleRebootstrap does?

private void handleRebootstrap(List<ClientResponse> responses, long now) {
if (metadataRecoveryStrategy == MetadataRecoveryStrategy.REBOOTSTRAP && metadataUpdater.needsRebootstrap(now, rebootstrapTriggerMs)) {
this.metadataUpdater.fetchNodes().forEach(node -> {
String nodeId = node.idString();
this.selector.close(nodeId);
if (connectionStates.isConnecting(nodeId) || connectionStates.isConnected(nodeId)) {
log.info("Disconnecting from node {} due to client rebootstrap.", nodeId);
processDisconnection(responses, nodeId, now, ChannelState.LOCAL_CLOSE);
}
});
metadataUpdater.rebootstrap(now);
}
}

log.info("Rebootstrap requested by server due to cluster metadata mismatch.");
metadataUpdater.rebootstrap(now);
} else if (req.request.version() == 0 || apiVersionsResponse.data().errorCode() != Errors.UNSUPPORTED_VERSION.code()) {
log.warn("Received error {} from node {} when making an ApiVersionsRequest with correlation id {}. Disconnecting.",
Errors.forCode(apiVersionsResponse.data().errorCode()), node, req.header.correlationId());
this.selector.close(node);
Expand Down Expand Up @@ -1107,6 +1124,19 @@ private void handleInitiateApiVersionRequests(long now) {
// not before ready.
this.connectionStates.checkingApiVersions(node);
ApiVersionsRequest.Builder apiVersionRequestBuilder = entry.getValue();
// If we know the cluster ID and node ID we are connecting to, we can include
// those details in the ApiVersions request for checking in the broker,
// provided that the metadata recovery strategy is not NONE. (KIP-1242)
if (metadataRecoveryStrategy != MetadataRecoveryStrategy.NONE && metadataClusterCheckEnable) {
String clusterId = this.metadataUpdater.clusterId();
int nodeId = Integer.parseInt(node);
// When connecting to coordinators, the client uses large positive node ID
// values which do not match the target broker's node ID. Exclude those.
if (clusterId != null && nodeId > 0 && nodeId < Integer.MAX_VALUE / 2) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be nodeId >= 0?

apiVersionRequestBuilder.setClusterId(clusterId);
apiVersionRequestBuilder.setNodeId(nodeId);
}
}
ClientRequest clientRequest = newClientRequest(node, apiVersionRequestBuilder, now, true);
doSend(clientRequest, true, now);
iter.remove();
Expand Down Expand Up @@ -1193,6 +1223,15 @@ class DefaultMetadataUpdater implements MetadataUpdater {
this.inProgress = null;
}

@Override
public String clusterId() {
ClusterResource clusterResource = metadata.fetch().clusterResource();
if (clusterResource != null) {
return clusterResource.clusterId();
}
return null;
}

@Override
public List<Node> fetchNodes() {
return metadata.fetch().nodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ public Builder(
this.data = data.duplicate();
}

public void setClusterId(String clusterId) {
this.data.setClusterId(clusterId);
}

public void setNodeId(int nodeId) {
this.data.setNodeId(nodeId);
}

@Override
public ApiVersionsRequest build(short version) {
return new ApiVersionsRequest(data, version);
Expand Down Expand Up @@ -94,6 +102,13 @@ public boolean hasUnsupportedRequestVersion() {
}

public boolean isValid() {
if (version() >= 5) {
// Either cluster ID and node ID are both specified, or neither is.
if ((data.clusterId() == null && data.nodeId() != -1) || (data.clusterId() != null && data.nodeId() == -1)) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document in ApiVersionsRequest.json that both fields must be defined together?

return false;
}
}

if (version() >= 3) {
return SOFTWARE_NAME_VERSION_PATTERN.matcher(data.clientSoftwareName()).matches() &&
SOFTWARE_NAME_VERSION_PATTERN.matcher(data.clientSoftwareVersion()).matches();
Expand Down
Loading
Loading