Skip to content

Commit 1083b8e

Browse files
authored
Attemp to fix flakiness CoreFeaturesIT (strimzi#11758)
Signed-off-by: see-quick <maros.orsak159@gmail.com>
1 parent 6a7a507 commit 1083b8e

3 files changed

Lines changed: 25 additions & 26 deletions

File tree

.azure/build-pipeline.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ stages:
3232
artifactRunVersion: ''
3333
artifactRunId: ''
3434
variables:
35-
STRIMZI_TEST_CONTAINER_LOGGING_ENABLED: false
35+
# set to false when none error occurs during 2 weeks - https://github.com/strimzi/strimzi-kafka-operator/issues/11839
36+
STRIMZI_TEST_CONTAINER_LOGGING_ENABLED: true
3637

3738
# Builds Strimzi docs
3839
- stage: build_docs

topic-operator/src/main/java/io/strimzi/operator/topic/TopicOperatorMain.java

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public class TopicOperatorMain implements Liveness, Readiness {
5151
/* test */ final BatchingTopicController controller;
5252

5353
private SharedIndexInformer<KafkaTopic> informer; // guarded by this
54-
Thread shutdownHook; // guarded by this
54+
/* test */ volatile Thread shutdownHook;
5555

5656
private final ResourceEventHandler<KafkaTopic> resourceEventHandler;
5757
private final HealthCheckAndMetricsServer healthAndMetricsServer;
@@ -93,7 +93,7 @@ synchronized void start() {
9393
throw new IllegalStateException();
9494
}
9595

96-
shutdownHook = new Thread(this::shutdown, "TopicOperator-shutdown-hook");
96+
shutdownHook = new Thread(this::stop, "TopicOperator-shutdown-hook");
9797
LOGGER.infoOp("Installing shutdown hook");
9898
Runtime.getRuntime().addShutdownHook(shutdownHook);
9999
LOGGER.infoOp("Starting health and metrics");
@@ -119,28 +119,17 @@ synchronized void start() {
119119
}
120120

121121
synchronized void stop() {
122+
// Execute the actual shutdown sequence (idempotent).
122123
if (shutdownHook == null) {
123-
throw new IllegalStateException();
124+
LOGGER.debugOp("Already shut down.");
125+
return;
124126
}
125-
// shutdown(), will be be invoked indirectly by calling
126-
// hook.run() has the side effect of nullifying this.shutdownHook
127-
// so retain a reference now so we have something to call
128-
// removeShutdownHook() with.
129-
var hook = shutdownHook;
130-
// Call run (not start()) on the thread so that shutdown() is executed
131-
// on this thread.
132-
shutdown();
133-
// stop() is _not_ called from the shutdown hook, so calling
134-
// removeShutdownHook() should not cause IAE.
135-
Runtime.getRuntime().removeShutdownHook(hook);
136-
}
137-
138-
private synchronized void shutdown() {
127+
shutdownHook = null;
128+
LOGGER.infoOp("Shutdown initiated");
139129
// Note: This method can be invoked on either via the shutdown hook thread or
140130
// on the thread(s) on which stop()/start() are called
141-
LOGGER.infoOp("Shutdown initiated");
142131
try {
143-
shutdownHook = null;
132+
// Idempotent resource teardown.
144133
if (informer != null) {
145134
informer.stop();
146135
informer = null;
@@ -152,6 +141,7 @@ private synchronized void shutdown() {
152141
this.healthAndMetricsServer.stop();
153142
LOGGER.infoOp("Shutdown completed normally");
154143
} catch (InterruptedException e) {
144+
Thread.currentThread().interrupt();
155145
LOGGER.infoOp("Interrupted during shutdown");
156146
throw new RuntimeException(e);
157147
}
@@ -170,7 +160,7 @@ public static void main(String[] args) {
170160
public boolean isAlive() {
171161
boolean running;
172162
synchronized (this) {
173-
running = informer.isRunning();
163+
running = informer != null && informer.isRunning();
174164
}
175165
if (!running) {
176166
LOGGER.infoOp("isAlive returning false because informer is not running");
@@ -184,7 +174,7 @@ public boolean isAlive() {
184174
public boolean isReady() {
185175
boolean running;
186176
synchronized (this) {
187-
running = informer.isRunning();
177+
running = informer != null && informer.isRunning();
188178
}
189179
if (!running) {
190180
LOGGER.infoOp("isReady returning false because informer is not running");

topic-operator/src/test/java/io/strimzi/operator/topic/CoreFeaturesIT.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ public static void afterAll() {
143143

144144
@AfterEach
145145
public void afterEach() {
146-
if (operator != null) {
146+
// Only check operator state if it wasn't intentionally shut down
147+
if (operator != null && operator.shutdownHook != null) {
147148
assertTrue(operator.queue.isAlive());
148149
assertTrue(operator.queue.isReady());
149150
}
@@ -158,6 +159,11 @@ public void afterEach() {
158159
kafkaAdminClient = null;
159160
}
160161

162+
if (kafkaAdminClientOp != null) {
163+
kafkaAdminClientOp.close();
164+
kafkaAdminClientOp = null;
165+
}
166+
161167
if (kafkaCluster != null) {
162168
kafkaCluster.stop();
163169
kafkaCluster = null;
@@ -2095,9 +2101,11 @@ public void shouldTerminateIfQueueFull() throws InterruptedException, TimeoutExc
20952101
// then
20962102
assertNull(operator.shutdownHook, "Expect the operator to shutdown");
20972103

2098-
// finally, because the @After method of this class asserts that the operator is running
2099-
// we start a new operator
2100-
kafkaAdminClientOp = null;
2104+
// clean up the shutdown operator and start a new one
2105+
if (kafkaAdminClientOp != null) {
2106+
kafkaAdminClientOp.close();
2107+
kafkaAdminClientOp = null;
2108+
}
21012109
operator = null;
21022110
maybeStartOperator(topicOperatorConfig(NAMESPACE, kafkaCluster));
21032111
}

0 commit comments

Comments
 (0)