3434import org .apache .kafka .clients .admin .AlterConfigOp ;
3535import org .apache .kafka .clients .admin .AlterConfigsResult ;
3636import org .apache .kafka .clients .admin .Config ;
37- import org .apache .kafka .common .KafkaException ;
3837import org .apache .kafka .common .KafkaFuture ;
39- import org .apache .kafka .common .config .ConfigException ;
4038import org .apache .kafka .common .config .ConfigResource ;
4139import org .apache .kafka .common .errors .SslAuthenticationException ;
4240
@@ -190,7 +188,7 @@ public KafkaRoller(Reconciliation reconciliation, Vertx vertx, PodOperator podOp
190188 private boolean maybeInitBrokerAdminClient () {
191189 if (this .brokerAdminClient == null ) {
192190 try {
193- this .brokerAdminClient = adminClient (nodes . stream (). filter ( NodeRef :: broker ). collect ( Collectors . toSet ()) , false );
191+ this .brokerAdminClient = adminClient (nodes , false );
194192 } catch (ForceableProblem | FatalProblem e ) {
195193 LOGGER .warnCr (reconciliation , "Failed to create brokerAdminClient." , e );
196194 return false ;
@@ -206,13 +204,7 @@ private boolean maybeInitBrokerAdminClient() {
206204 private boolean maybeInitControllerAdminClient () {
207205 if (this .controllerAdminClient == null ) {
208206 try {
209- // TODO: Currently, when running in KRaft mode Kafka does not support using Kafka Admin API with controller
210- // nodes. This is tracked in https://github.com/strimzi/strimzi-kafka-operator/issues/9692.
211- // Therefore use broker nodes of the cluster to initialise adminClient for quorum health check.
212- // Once Kafka Admin API is supported for controllers, nodes.stream().filter(NodeRef:controller)
213- // can be used here. Until then pass an empty set of nodes so the client is initialized with
214- // the brokers service.
215- this .controllerAdminClient = adminClient (Set .of (), false );
207+ this .controllerAdminClient = adminClient (nodes , true );
216208 } catch (ForceableProblem | FatalProblem e ) {
217209 LOGGER .warnCr (reconciliation , "Failed to create controllerAdminClient." , e );
218210 return false ;
@@ -400,7 +392,7 @@ private void restartIfNecessary(NodeRef nodeRef, RestartContext restartContext)
400392 if (!restartContext .podStuck ) {
401393 // We want to give pods chance to get ready before we try to connect to the or consider them for rolling.
402394 // This is important especially for pods which were just started. But only in case when they are not stuck.
403- // If the pod is stuck, it suggests that it is running already for some time and it will not become ready.
395+ // If the pod is stuck, it suggests that it is running already for some time, and it will not become ready.
404396 // Waiting for it would likely just waste time.
405397 LOGGER .debugCr (reconciliation , "Waiting for pod {} to become ready before checking its state" , nodeRef .podName ());
406398 try {
@@ -583,32 +575,12 @@ private void checkIfRestartOrReconfigureRequired(NodeRef nodeRef, boolean isCont
583575
584576 if (isController ) {
585577 if (maybeInitControllerAdminClient ()) {
586- String controllerQuorumFetchTimeout = CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_DEFAULT ;
587- String desiredConfig = kafkaConfigProvider .apply (nodeRef .nodeId ());
588-
589- if (desiredConfig != null ) {
590- OrderedProperties orderedProperties = new OrderedProperties ();
591- controllerQuorumFetchTimeout = orderedProperties .addStringPairs (desiredConfig ).asMap ().getOrDefault (CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_NAME , CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_DEFAULT );
592- }
593-
594- restartContext .quorumCheck = quorumCheck (controllerAdminClient , Long .parseLong (controllerQuorumFetchTimeout ));
578+ restartContext .quorumCheck = quorumCheck (controllerAdminClient , nodeRef );
595579 } else {
596- //TODO When https://github.com/strimzi/strimzi-kafka-operator/issues/9692 is complete
597- // we should change this logic to immediately restart this pod because we cannot connect to it.
598- if (isBroker ) {
599- // If it is a combined node (controller and broker) and the admin client cannot be initialised,
600- // restart this pod. There is no reason to continue as we won't be able to
601- // connect an admin client to this pod for other checks later.
602- LOGGER .infoCr (reconciliation , "KafkaQuorumCheck cannot be initialised for {} because none of the brokers do not seem to responding to connection attempts. " +
603- "Restarting pod because it is a combined node so it is one of the brokers that is not responding." , nodeRef );
604- reasonToRestartPod .add (RestartReason .POD_UNRESPONSIVE );
605- markRestartContextWithForceRestart (restartContext );
606- return ;
607- } else {
608- // If it is a controller only node throw an UnforceableProblem, so we try again until the backOff
609- // is finished, then it will move on to the next controller and eventually the brokers.
610- throw new UnforceableProblem ("KafkaQuorumCheck cannot be initialised for " + nodeRef + " because none of the brokers do not seem to responding to connection attempts" );
611- }
580+ LOGGER .infoCr (reconciliation , "Pod {} needs to be restarted, because it does not seem to responding to connection attempts" , nodeRef );
581+ reasonToRestartPod .add (RestartReason .POD_UNRESPONSIVE );
582+ markRestartContextWithForceRestart (restartContext );
583+ return ;
612584 }
613585 }
614586
@@ -811,7 +783,6 @@ private void awaitReadiness(Pod pod, long timeout, TimeUnit unit) throws FatalPr
811783 * @param <E> The exception type
812784 * @return The result of the future
813785 * @throws E The exception type returned from {@code exceptionMapper}.
814- * @throws TimeoutException If the given future is not completed before the timeout.
815786 * @throws InterruptedException If the waiting was interrupted.
816787 */
817788 private static <T , E extends Exception > T await (Future <T > future , long timeout , TimeUnit unit ,
@@ -855,36 +826,35 @@ protected Future<Void> restart(Pod pod, RestartContext restartContext) {
855826 * Returns an AdminClient instance bootstrapped from the given nodes. If nodes is an
856827 * empty set, use the brokers service to bootstrap the client.
857828 */
858- /* test */ Admin adminClient (Set <NodeRef > nodes , boolean ceShouldBeFatal ) throws ForceableProblem , FatalProblem {
859- // If no nodes are passed initialize the admin client using the brokers service
860- // TODO when https://github.com/strimzi/strimzi-kafka-operator/issues/9692 is completed review whether
861- // this function can be reverted to expect nodes to be non empty
862- String bootstrapHostnames ;
863- if (nodes .isEmpty ()) {
864- bootstrapHostnames = String .format ("%s:%s" , DnsNameGenerator .of (namespace , KafkaResources .bootstrapServiceName (cluster )).serviceDnsName (), KafkaCluster .REPLICATION_PORT );
865- } else {
866- bootstrapHostnames = nodes .stream ().map (node -> DnsNameGenerator .podDnsName (namespace , KafkaResources .brokersServiceName (cluster ), node .podName ()) + ":" + KafkaCluster .REPLICATION_PORT ).collect (Collectors .joining ("," ));
867- }
868-
829+ /* test */ Admin adminClient (Set <NodeRef > nodes , boolean isController ) throws ForceableProblem , FatalProblem {
830+ String bootstrapHostnames = null ;
869831 try {
870- LOGGER .debugCr (reconciliation , "Creating AdminClient for {}" , bootstrapHostnames );
871- return adminClientProvider .createAdminClient (bootstrapHostnames , coTlsPemIdentity .pemTrustSet (), coTlsPemIdentity .pemAuthIdentity ());
872- } catch (KafkaException e ) {
873- if (ceShouldBeFatal && (e instanceof ConfigException
874- || e .getCause () instanceof ConfigException )) {
875- throw new FatalProblem ("An error while try to create an admin client with bootstrap brokers " + bootstrapHostnames , e );
832+ if (isController ) {
833+ bootstrapHostnames = nodes .stream ().filter (NodeRef ::controller ).map (node -> DnsNameGenerator .podDnsName (namespace , KafkaResources .brokersServiceName (cluster ), node .podName ()) + ":" + KafkaCluster .CONTROLPLANE_PORT ).collect (Collectors .joining ("," ));
834+ LOGGER .debugCr (reconciliation , "Creating AdminClient for {}" , bootstrapHostnames );
835+ return adminClientProvider .createControllerAdminClient (bootstrapHostnames , coTlsPemIdentity .pemTrustSet (), coTlsPemIdentity .pemAuthIdentity ());
876836 } else {
877- throw new ForceableProblem ("An error while try to create an admin client with bootstrap brokers " + bootstrapHostnames , e );
837+ bootstrapHostnames = nodes .stream ().filter (NodeRef ::broker ).map (node -> DnsNameGenerator .podDnsName (namespace , KafkaResources .brokersServiceName (cluster ), node .podName ()) + ":" + KafkaCluster .REPLICATION_PORT ).collect (Collectors .joining ("," ));
838+ LOGGER .debugCr (reconciliation , "Creating AdminClient for {}" , bootstrapHostnames );
839+ return adminClientProvider .createAdminClient (bootstrapHostnames , coTlsPemIdentity .pemTrustSet (), coTlsPemIdentity .pemAuthIdentity ());
878840 }
879841 } catch (RuntimeException e ) {
880- throw new ForceableProblem ("An error while try to create an admin client with bootstrap brokers " + bootstrapHostnames , e );
842+ throw new ForceableProblem ("An error while try to create an admin client with bootstrap " + bootstrapHostnames , e );
881843 }
882844 }
883845
884- /* test */ KafkaQuorumCheck quorumCheck (Admin ac , long controllerQuorumFetchTimeoutMs ) {
885- return new KafkaQuorumCheck (reconciliation , ac , vertx , controllerQuorumFetchTimeoutMs );
846+ /* test */ KafkaQuorumCheck quorumCheck (Admin ac , NodeRef nodeRef ) {
847+ String controllerQuorumFetchTimeout = CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_DEFAULT ;
848+ String desiredConfig = kafkaConfigProvider .apply (nodeRef .nodeId ());
849+
850+ if (desiredConfig != null ) {
851+ OrderedProperties orderedProperties = new OrderedProperties ();
852+ controllerQuorumFetchTimeout = orderedProperties .addStringPairs (desiredConfig ).asMap ().getOrDefault (CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_NAME , CONTROLLER_QUORUM_FETCH_TIMEOUT_MS_CONFIG_DEFAULT );
853+ }
854+ return new KafkaQuorumCheck (reconciliation , ac , vertx , Long .parseLong (controllerQuorumFetchTimeout ));
886855 }
887856
857+
888858 /* test */ KafkaAvailability availability (Admin ac ) {
889859 return new KafkaAvailability (reconciliation , ac );
890860 }
0 commit comments