Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.security.GeneralSecurityException;
import java.time.Duration;

/**
* Creates HTTP client and interacts with Kafka Agent's REST endpoint
Expand All @@ -34,6 +35,11 @@ public class KafkaAgentClient {
private static final String BROKER_STATE_REST_PATH = "/v1/broker-state/";
private static final int KAFKA_AGENT_HTTPS_PORT = 8443;
private static final char[] KEYSTORE_PASSWORD = "changeit".toCharArray();
// Bounds the connect and full HTTP request lifecycle so that a broker which accepts the TCP connection but
// never produces a response (e.g. alive but stuck on IO) cannot block the KafkaRoller's single-threaded
// executor indefinitely. The Kafka Agent only serves a small broker-state JSON, so 10 seconds is well above
// the expected response time on a healthy broker yet small enough to keep the roller responsive.
/* test */ static final Duration HTTP_REQUEST_TIMEOUT = Duration.ofSeconds(10);
private final String namespace;
private final Reconciliation reconciliation;
private final String cluster;
Expand Down Expand Up @@ -92,20 +98,45 @@ private HttpClient createHttpClient() {
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(keyManagerFactory.getKeyManagers(), trustManagerFactory.getTrustManagers(), null);

return HttpClient.newBuilder()
return httpClientBuilder()
.sslContext(sslContext)
.build();
Comment on lines +101 to 103
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I did not realize it before. But why not add the SSL context as a parameter and have it return the HTTP client instead of the half-finished builder?

Copy link
Copy Markdown
Contributor

@tinaselenge tinaselenge Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand these new methods make it easier to test but are they really necessary? It seems to me it is quite straight forward to set the timeout without additional methods that are called only once. Otherwise I agree with Jakub that it makes more sense that the method returns complete HTTP client, rather than finish building here.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking into account that this method was written for adding tests which seem to be not that useful, I agree that we should just remove the method and configure timeout and SSLContext here.

Copy link
Copy Markdown
Contributor

@tinaselenge tinaselenge May 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chon3806 Are you happy with the suggestions in above comments? So you could set the timeout directly here instead of a separate method, for example:

 return HttpClient.newBuilder()
                    .connectTimeout(HTTP_REQUEST_TIMEOUT)
                    .sslContext(sslContext)
                    .build();

} catch (GeneralSecurityException | IOException e) {
throw new RuntimeException("Failed to configure HTTP client", e);
}
}

/**
Comment thread
scholzj marked this conversation as resolved.
* Returns an {@link HttpClient.Builder} pre-configured with {@link #HTTP_REQUEST_TIMEOUT} as the connect timeout.
* Centralising the timeout wiring here keeps {@link #createHttpClient()} focused on TLS setup and lets tests
* assert that the configured connect timeout is applied without needing a TLS identity.
*
* @return an {@link HttpClient.Builder} with the connect timeout already applied
*/
/* test */ static HttpClient.Builder httpClientBuilder() {
return HttpClient.newBuilder().connectTimeout(HTTP_REQUEST_TIMEOUT);
}

/**
* Builds a {@code GET} {@link HttpRequest} for the given URI with {@link #HTTP_REQUEST_TIMEOUT} applied as the
Comment thread
scholzj marked this conversation as resolved.
* full request timeout. Centralising the timeout wiring here lets tests assert the request timeout without
* needing a live HTTP endpoint.
*
* @param uri the target URI for the request
*
* @return a {@code GET} {@link HttpRequest} for {@code uri} with the request timeout already applied
*/
/* test */ static HttpRequest buildRequest(URI uri) {
return HttpRequest.newBuilder()
.uri(uri)
.timeout(HTTP_REQUEST_TIMEOUT)
.GET()
.build();
}

String doGet(URI uri) {
try {
HttpRequest req = HttpRequest.newBuilder()
.uri(uri)
.GET()
.build();
HttpRequest req = buildRequest(uri);

var response = httpClient.send(req, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
import io.strimzi.operator.common.Reconciliation;
import org.junit.jupiter.api.Test;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.time.Duration;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
Expand Down Expand Up @@ -60,4 +65,30 @@ public void testErrorResponse() {
assertEquals(0, actual.remainingLogsToRecover());
assertEquals(0, actual.remainingSegmentsToRecover());
}

/**
* Regression guard: a previous version of KafkaAgentClient configured no timeout on the per-request side,
* which let a broker stuck on IO block the KafkaRoller's single-threaded executor indefinitely. Verify that
* requests built via the package-private helper carry the configured timeout.
*/
@Test
public void testBuildRequestAppliesHttpRequestTimeout() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to my other comment, I'm not sure if these tests are really that helpful. We set the timeout on the client and request, then testing if these timeouts are set here. If we want to make sure, we are not indefinitely waiting for a response, maybe simulating a slow response that exceeds the timeout and check for timeout error to really test the behaviour. But even that might be overkill?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think testing it like this might be tricky and questionable. What is it we want to test?

  • That the JDK uses the timeouts we configured and does not just ignore them?
  • Or that they help with the problem in Kafka / KafkaAgent?

For the first one I would argue it is probably not our concern. For the second one, it might be hard to actually make sure the test hangs in the right way how it would in production. So I wonder how reliably can we replicate the same situation.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure these new tests add much value and we set timeouts in many places for requests and we don't test them like that. So I was trying to suggest alternative that is more for your second point but wasn't anyway sure if that is the right thing to do either.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that is a fair point that the current test is probably not much useful. I'm just not sure how easy it is to make it useful.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this test is not really useful, I left a similar comment for the other one.

HttpRequest request = KafkaAgentClient.buildRequest(URI.create("https://example.invalid/v1/broker-state/"));
Duration timeout = request.timeout().orElseThrow(() -> new AssertionError("HTTP request timeout was not configured"));
assertEquals(KafkaAgentClient.HTTP_REQUEST_TIMEOUT, timeout, "Request timeout must equal HTTP_REQUEST_TIMEOUT");
assertTrue(timeout.toMillis() > 0, "HTTP request timeout must be positive but was " + timeout);
}

/**
* Regression guard: a previous version of KafkaAgentClient configured no connect timeout, which let an
* unresponsive broker block the KafkaRoller's single-threaded executor indefinitely. Verify that clients
* built via the package-private helper carry the configured connect timeout.
*/
@Test
public void testHttpClientBuilderAppliesConnectTimeout() {
HttpClient client = KafkaAgentClient.httpClientBuilder().build();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the end the httpClientBuilder() is just setting the timeout on the JDK HttpClient class so this test is only checking that the timeout is set properly but ... it's not our goal testing that a JDK class works imho, so I can't see this test really useful.

Duration connectTimeout = client.connectTimeout().orElseThrow(() -> new AssertionError("HTTP connect timeout was not configured"));
assertEquals(KafkaAgentClient.HTTP_REQUEST_TIMEOUT, connectTimeout, "Connect timeout must equal HTTP_REQUEST_TIMEOUT");
assertTrue(connectTimeout.toMillis() > 0, "HTTP connect timeout must be positive but was " + connectTimeout);
}
}
Loading