Skip to content

Commit c9c800c

Browse files
Introduce graceful shutdown capability for Listener containers (#157)
* include listener graceful shutdown details app.yml template doc Signed-off-by: Nithin <[email protected]> * Introduce graceful shutdown capability for Listener containers Signed-off-by: Nithin <[email protected]> --------- Signed-off-by: Nithin <[email protected]>
1 parent 2db7973 commit c9c800c

File tree

2 files changed

+89
-9
lines changed

2 files changed

+89
-9
lines changed

src/main/java/hlf/java/rest/client/listener/DynamicKafkaListener.java

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@
77
import java.util.List;
88
import java.util.concurrent.CompletableFuture;
99
import java.util.concurrent.ExecutionException;
10+
import java.util.concurrent.atomic.AtomicInteger;
1011
import lombok.Getter;
1112
import lombok.SneakyThrows;
1213
import lombok.extern.slf4j.Slf4j;
1314
import org.apache.kafka.clients.consumer.ConsumerRecord;
1415
import org.springframework.beans.factory.annotation.Autowired;
16+
import org.springframework.beans.factory.annotation.Value;
1517
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
1618
import org.springframework.cloud.context.scope.refresh.RefreshScopeRefreshedEvent;
1719
import org.springframework.context.annotation.Configuration;
20+
import org.springframework.context.event.ContextClosedEvent;
1821
import org.springframework.context.event.ContextRefreshedEvent;
1922
import org.springframework.context.event.EventListener;
2023
import org.springframework.core.task.TaskExecutor;
@@ -51,6 +54,17 @@ public class DynamicKafkaListener {
5154

5255
@Autowired private CommonErrorHandler topicTransactionErrorHandler;
5356

57+
private final AtomicInteger inFlightRecords = new AtomicInteger(0);
58+
59+
@Value("${kafka.general.consumer-shutdown-timeout-in-sec:30}")
60+
private int shutdownTimeoutInSeconds;
61+
62+
@EventListener(ContextClosedEvent.class)
63+
public void onContextClosed() {
64+
log.info("Application context closing, performing graceful Kafka shutdown");
65+
performGracefulShutdown();
66+
}
67+
5468
@EventListener
5569
public void handleEvent(ContextRefreshedEvent event) {
5670
log.info("Initializing Kafka Consumers..");
@@ -128,58 +142,122 @@ private Object determineMessageListenerForTransactions(KafkaProperties.Consumer
128142
*/
129143
private Object getMultithreadedBatchAcknowledgingMessageListener() {
130144
return new BatchAcknowledgingMessageListener<String, String>() {
131-
132145
@SneakyThrows
133146
@Override
134147
public void onMessage(
135148
List<ConsumerRecord<String, String>> consumerRecords, Acknowledgment acknowledgment) {
136149
log.debug("Consumer got assigned with a Batch of size : {}", consumerRecords.size());
137150

151+
// Track the number of records we're processing
152+
inFlightRecords.addAndGet(consumerRecords.size());
153+
138154
List<CompletableFuture<Void>> transactionSubmissionTasks = new ArrayList<>();
139155

140156
// Dispatch workers for asynchronously processing Individual records
141157
for (ConsumerRecord<String, String> message : consumerRecords) {
142158
transactionSubmissionTasks.add(
143159
CompletableFuture.runAsync(
144160
() -> {
145-
transactionConsumer.listen(message);
161+
try {
162+
transactionConsumer.listen(message);
163+
} finally {
164+
// No need to decrement here as we'll do it after all tasks complete or fail
165+
}
146166
},
147167
defaultTaskExecutor));
148168
}
149169

170+
boolean batchSuccess = true;
171+
int failedIndex = -1;
172+
150173
for (int i = 0; i < transactionSubmissionTasks.size(); i++) {
151174
try {
152175
transactionSubmissionTasks.get(i).get();
153176
} catch (InterruptedException | ExecutionException e) {
177+
batchSuccess = false;
178+
failedIndex = i;
154179

155180
final Throwable cause = e.getCause();
156181

157182
if (cause instanceof ServiceException) {
158183
log.error(
159184
"One of the Consumer Record in Async Batch Processor failed with message {}",
160185
cause.getMessage());
161-
throw new BatchListenerFailedException(
162-
"Failed to process a Consumer Record from the Batch", i);
163186
}
164187

165188
if (cause instanceof InterruptedException) {
166189
throw e;
167190
}
168191
}
169192
}
193+
194+
// Always decrement the counter for all records in the batch
195+
inFlightRecords.addAndGet(-consumerRecords.size());
196+
170197
// If the entire Records were processed successfully, Ack & commit the entire Batch
171-
acknowledgment.acknowledge();
198+
if (batchSuccess) {
199+
acknowledgment.acknowledge();
200+
} else {
201+
throw new BatchListenerFailedException(
202+
"Failed to process a Consumer Record from the Batch", failedIndex);
203+
}
172204
}
173205
};
174206
}
175207

176208
private Object getPerRecordAcknowledgingListener() {
177-
178209
return (AcknowledgingMessageListener<String, String>)
179210
(message, acknowledgment) -> {
180-
transactionConsumer.listen(message);
181-
// Manually ack the single Record
182-
acknowledgment.acknowledge();
211+
try {
212+
// Increment counter before processing
213+
inFlightRecords.incrementAndGet();
214+
215+
transactionConsumer.listen(message);
216+
// Manually ack the single Record
217+
acknowledgment.acknowledge();
218+
} finally {
219+
// Always decrement counter, even if exception occurred
220+
inFlightRecords.decrementAndGet();
221+
}
183222
};
184223
}
224+
225+
private void performGracefulShutdown() {
226+
log.info("Starting graceful shutdown of Kafka consumers");
227+
228+
// Stop all containers from polling new messages
229+
if (!CollectionUtils.isEmpty(existingContainers)) {
230+
existingContainers.forEach(
231+
container -> {
232+
log.info("Stopping container: {}", container.metrics().keySet().iterator().next());
233+
container.stop();
234+
});
235+
}
236+
237+
// Wait for in-flight messages to be processed
238+
log.info(
239+
"All Kafka containers stopped from polling. Waiting for {} in-flight records to be processed...",
240+
inFlightRecords.get());
241+
242+
long startTime = System.currentTimeMillis();
243+
244+
try {
245+
while (inFlightRecords.get() > 0
246+
&& System.currentTimeMillis() - startTime < (shutdownTimeoutInSeconds * 1000L)) {
247+
log.info("Still waiting for {} records to be acknowledged", inFlightRecords.get());
248+
Thread.sleep(500);
249+
}
250+
} catch (InterruptedException e) {
251+
Thread.currentThread().interrupt();
252+
log.error("Interrupted during shutdown wait", e);
253+
}
254+
255+
if (inFlightRecords.get() > 0) {
256+
log.warn("{} records were not acknowledged before shutdown timeout", inFlightRecords.get());
257+
} else {
258+
log.info("All records successfully processed and acknowledged");
259+
}
260+
261+
log.info("Kafka consumer graceful shutdown completed");
262+
}
185263
}

src/main/resources/application.template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ fabric:
4040
chaincodeId: chaincode-id of the deployed chaincode in this Channel
4141
listenerTopics: topics to which event messages will be sent
4242
kafka:
43+
general:
44+
consumer-shutdown-timeout-in-sec: <Timeout for graceful shutdown of consumers. During SIGINT this will be the max grace duration for the in-flight records to get committed before exiting the application>
4345
integration-points: <Note : This is a list and Multiple integration points can be configured>
4446
- groupId: test_group_id
4547
enableParallelListenerCapabilities: boolean

0 commit comments

Comments
 (0)