apache · Nikita-Shupletsov · Jan 27, 2026 · Jan 30, 2026 · mjsax · Jan 30, 2026
diff --git a/...gration-tests/src/test/java/org/apache/kafka/streams/integration/KafkaStreamsWrapper.java b/...gration-tests/src/test/java/org/apache/kafka/streams/integration/KafkaStreamsWrapper.java
@@ -48,7 +48,11 @@ public List<StreamThread> streamThreads() {
     public void setStreamThreadStateListener(final StreamThread.StateListener listener) {
         if (state == State.CREATED) {
             for (final StreamThread thread : threads) {
-                thread.setStateListener(listener);
+                StreamThread.StateListener originalListener = thread.getStateListener();
+                thread.setStateListener((t, newState, oldState) -> {
+                    originalListener.onChange(t, newState, oldState);
+                    listener.onChange(t, newState, oldState);
+                });
             }
         } else {
             throw new IllegalStateException("Can only set StateListener in CREATED state. " +

diff --git a/...c/test/java/org/apache/kafka/streams/integration/RebalanceTaskClosureIntegrationTest.java b/...c/test/java/org/apache/kafka/streams/integration/RebalanceTaskClosureIntegrationTest.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.LongSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.streams.CloseOptions;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TopologyWrapper;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.internals.StreamThread;
+import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.state.StoreBuilder;
+import org.apache.kafka.streams.state.internals.AbstractStoreBuilder;
+import org.apache.kafka.streams.state.internals.CacheFlushListener;
+import org.apache.kafka.streams.state.internals.CachedStateStore;
+import org.apache.kafka.streams.state.internals.RocksDBStore;
+import org.apache.kafka.test.MockApiProcessorSupplier;
+import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.apache.kafka.streams.utils.TestUtils.safeUniqueTestName;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class RebalanceTaskClosureIntegrationTest {
+
+    private static final int NUM_BROKERS = 1;
+    protected static final String INPUT_TOPIC_NAME = "input-topic";
+    private static final int NUM_PARTITIONS = 3;
+
+    private final EmbeddedKafkaCluster cluster = new EmbeddedKafkaCluster(NUM_BROKERS);
+
+    private KafkaStreamsWrapper streams1;
+    private KafkaStreamsWrapper streams2;
+    private String safeTestName;
+
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws InterruptedException, IOException {
+        cluster.start();
+        cluster.createTopic(INPUT_TOPIC_NAME, NUM_PARTITIONS, 1);
+        safeTestName = safeUniqueTestName(testInfo);
+    }
+
+    @AfterEach
+    public void after() {
+        cluster.stop();
+        if (streams1 != null) {
+            streams1.close(Duration.ofSeconds(30));
+        }
+        if (streams2 != null) {
+            streams2.close(Duration.ofSeconds(30));
+        }
+    }
+
+    @Test
+    public void shouldClosePendingTasksToInitAfterRebalance() throws Exception {
+        final CountDownLatch recycleLatch = new CountDownLatch(1);
+        final CountDownLatch pendingShutdownLatch = new CountDownLatch(1);
+        // Count how many times we initialize and close stores
+        final AtomicInteger initCount = new AtomicInteger();
+        final AtomicInteger closeCount = new AtomicInteger();
+        final StoreBuilder<KeyValueStore<Bytes, byte[]>> storeBuilder = new AbstractStoreBuilder<>("testStateStore", Serdes.Integer(), Serdes.ByteArray(), new MockTime()) {
+
+            @Override
+            public KeyValueStore<Bytes, byte[]> build() {
+                return new TestRocksDBStore(name, recycleLatch, pendingShutdownLatch, initCount, closeCount);
+            }
+        };
+
+        final TopologyWrapper topology = new TopologyWrapper();
+        topology.addSource("ingest", INPUT_TOPIC_NAME);
+        topology.addProcessor("my-processor", new MockApiProcessorSupplier<>(), "ingest");
+        topology.addStateStore(storeBuilder, "my-processor");
+
+        streams1 = new KafkaStreamsWrapper(topology, props("1"));
+        streams1.setStreamThreadStateListener((t, newState, oldState) -> {
+            if (newState == StreamThread.State.PENDING_SHUTDOWN) {
+                pendingShutdownLatch.countDown();
+            }
+        });
+        streams1.start();
+
+        TestUtils.waitForCondition(() -> streams1.state() == KafkaStreams.State.RUNNING, "Streams never reached RUNNING state");
+
+        streams2 = new KafkaStreamsWrapper(topology, props("2"));
+        streams2.start();
+
+        TestUtils.waitForCondition(() -> streams2.state() == KafkaStreams.State.RUNNING, "Streams never reached RUNNING state");
+
+        // starting the second KS app triggered a rebalance. Which in turn will recycle active tasks that need to become standby.
+        // That's exactly what we are waiting for
+        recycleLatch.await();
+
+        // sending a message to disable retries in the consumer client. if there are no messages, it retries the whole sequence of actions,
+        // including the rebalance data. which we don't want, because we just staged the right condition
+        IntegrationTestUtils.produceKeyValuesSynchronously(INPUT_TOPIC_NAME, List.of(new KeyValue<>(1L, "key")),
+                TestUtils.producerConfig(cluster.bootstrapServers(), LongSerializer.class, StringSerializer.class, new Properties()), cluster.time);
+        // Now we can close both apps. The StreamThreadStateListener will unblock the clearCache call, letting the rebalance finish.
+        // We don't want it to happen any sooner, because we want the stream thread to stop before it gets to moving messages from task registry to state updater.
+        streams1.close(CloseOptions.groupMembershipOperation(CloseOptions.GroupMembershipOperation.LEAVE_GROUP));
+        streams2.close(CloseOptions.groupMembershipOperation(CloseOptions.GroupMembershipOperation.LEAVE_GROUP));
+
+        assertEquals(initCount.get(), closeCount.get());
+    }
+
+    private Properties props(final String storePathSuffix) {
+        final Properties streamsConfiguration = new Properties();
+
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, safeTestName);
+        streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers());
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        streamsConfiguration.put(ConsumerConfig.DEFAULT_API_TIMEOUT_MS_CONFIG, 1000);
+        streamsConfiguration.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, 1000);
+        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath() + "/" + storePathSuffix);
+        streamsConfiguration.put(StreamsConfig.STATESTORE_CACHE_MAX_BYTES_CONFIG, 0);
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.LongSerde.class);
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.StringSerde.class);
+        streamsConfiguration.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1);
+        streamsConfiguration.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 1);
+
+        return streamsConfiguration;
+    }
+
+    private static class TestRocksDBStore extends RocksDBStore implements CachedStateStore<Bytes, byte[]> {
+
+        private final CountDownLatch recycleLatch;
+        private final CountDownLatch pendingShutdownLatch;
+        private final AtomicInteger initCount;
+        private final AtomicInteger closeCount;
+
+        public TestRocksDBStore(final String name,
+                                final CountDownLatch recycleLatch,
+                                final CountDownLatch pendingShutdownLatch,
+                                final AtomicInteger initCount,
+                                final AtomicInteger closeCount) {
+            super(name, "rocksdb");
+            this.recycleLatch = recycleLatch;
+            this.pendingShutdownLatch = pendingShutdownLatch;
+            this.initCount = initCount;
+            this.closeCount = closeCount;
+        }
+
+        @Override
+        public void init(final StateStoreContext stateStoreContext,
+                         final StateStore root) {
+            initCount.incrementAndGet();
+            super.init(stateStoreContext, root);
+        }
+
+        @Override
+        public boolean setFlushListener(final CacheFlushListener<Bytes, byte[]> listener,
+                                        final boolean sendOldValues) {
+            return false;
+        }
+
+        @Override
+        public void flushCache() {
+        }
+
+        @Override
+        public void clearCache() {
+            // Clear cache is called during recycle, so we use it as a hook
+            recycleLatch.countDown();
+            try {
+                pendingShutdownLatch.await();
+            } catch (InterruptedException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public synchronized void close() {
+            closeCount.incrementAndGet();
+            super.close();
+        }
+    }
+
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
@@ -1414,10 +1414,19 @@ void shutdown(final boolean clean) {
         // TODO: change type to `StreamTask`
         final Set<Task> activeTasks = new TreeSet<>(Comparator.comparing(Task::id));
         activeTasks.addAll(tasks.activeTasks());
+        final Set<Task> standbyTasks = new TreeSet<>(Comparator.comparing(Task::id));
+        standbyTasks.addAll(tasks.standbyTasks());
+        for (Task pendingTask : tasks.pendingTasksToInit()) {
+            if (pendingTask.isActive()) {
+                activeTasks.add(pendingTask);
+            } else {
+                standbyTasks.add(pendingTask);
+            }
+        }
 
         executeAndMaybeSwallow(
             clean,
-            () -> closeAndCleanUpTasks(activeTasks, standbyTaskIterable(), clean),
+            () -> closeAndCleanUpTasks(activeTasks, standbyTasks, clean),
             e -> firstException.compareAndSet(null, e),
             e -> log.warn("Ignoring an exception while unlocking remaining task directories.", e)
         );
@@ -1523,7 +1532,7 @@ private Collection<Task> tryCloseCleanActiveTasks(final Collection<Task> activeT
                                                       final boolean clean,
                                                       final AtomicReference<RuntimeException> firstException) {
         if (!clean) {
-            return activeTaskIterable();
+            return activeTasksToClose;
         }
         final Comparator<Task> byId = Comparator.comparing(Task::id);
         final Set<Task> tasksToCommit = new TreeSet<>(byId);
@@ -1616,7 +1625,7 @@ private Collection<Task> tryCloseCleanStandbyTasks(final Collection<Task> standb
                                                        final boolean clean,
                                                        final AtomicReference<RuntimeException> firstException) {
         if (!clean) {
-            return standbyTaskIterable();
+            return standbyTasksToClose;
         }
         final Set<Task> tasksToCloseDirty = new TreeSet<>(Comparator.comparing(Task::id));
 

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java
@@ -193,7 +193,9 @@ public synchronized void removeTask(final Task taskToRemove) {
             throw new IllegalStateException("Attempted to remove a task that is not closed or suspended: " + taskId);
         }
 
-        if (taskToRemove.isActive()) {
+        if (pendingTasksToInit.contains(taskToRemove)) {
+            pendingTasksToInit.remove(taskToRemove);
+        } else if (taskToRemove.isActive()) {
             if (activeTasksPerId.remove(taskId) == null) {
                 throw new IllegalArgumentException("Attempted to remove an active task that is not owned: " + taskId);
             }
@@ -203,7 +205,7 @@ public synchronized void removeTask(final Task taskToRemove) {
                 throw new IllegalArgumentException("Attempted to remove a standby task that is not owned: " + taskId);
             }
         }
-        failedTaskIds.remove(taskToRemove.id());
+        failedTaskIds.remove(taskId);
     }
 
     @Override
@@ -301,6 +303,11 @@ public synchronized Collection<Task> activeTasks() {
         return Collections.unmodifiableCollection(activeTasksPerId.values());
     }
 
+    @Override
+    public synchronized Collection<Task> standbyTasks() {
+        return Collections.unmodifiableCollection(standbyTasksPerId.values());
+    }
+
     /**
      * All tasks returned by any of the getters are read-only and should NOT be modified;
      * and the returned task could be modified by other threads concurrently

diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TasksRegistry.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TasksRegistry.java
@@ -71,6 +71,8 @@ public interface TasksRegistry {
 
     Collection<Task> activeTasks();
 
+    Collection<Task> standbyTasks();
+
     Set<Task> allTasks();
 
     Set<Task> allNonFailedTasks();

diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java
@@ -312,12 +312,8 @@ public void shouldLockTasksOnClose() {
         final StreamTask activeTask1 = statefulTask(taskId00, taskId00ChangelogPartitions)
             .inState(State.RUNNING)
             .withInputPartitions(taskId00Partitions).build();
-        final StreamTask activeTask2 = statefulTask(taskId01, taskId01ChangelogPartitions)
-            .inState(State.RUNNING)
-            .withInputPartitions(taskId01Partitions).build();
         final TasksRegistry tasks = mock(TasksRegistry.class);
         final TaskManager taskManager = setUpTaskManager(ProcessingMode.AT_LEAST_ONCE, tasks, true);
-        when(tasks.allTasks()).thenReturn(Set.of(activeTask1, activeTask2));
         final KafkaFuture<Void> mockFuture = KafkaFuture.completedFuture(null);
         when(schedulingTaskManager.lockTasks(any())).thenReturn(mockFuture);
 
@@ -3501,7 +3497,6 @@ public void shouldCloseActiveTasksAndIgnoreExceptionsOnUncleanShutdown() {
             .when(task02).suspend();
         doThrow(new RuntimeException("whatever")).when(activeTaskCreator).close();
 
-        when(tasks.allTasks()).thenReturn(Set.of(task00, task01, task02));
         when(tasks.activeTasks()).thenReturn(Set.of(task00, task01, task02));
 
         taskManager.shutdown(false);
@@ -3535,6 +3530,7 @@ public void shouldCloseStandbyTasksOnShutdown() {
 
         when(stateUpdater.tasks()).thenReturn(Set.of(standbyTask00)).thenReturn(Set.of());
         when(stateUpdater.standbyTasks()).thenReturn(Set.of(standbyTask00));
+        when(tasks.standbyTasks()).thenReturn(Set.of(standbyTask00));
 
         final CompletableFuture<StateUpdater.RemovedTaskResult> futureForStandbyTask = new CompletableFuture<>();
         when(stateUpdater.remove(taskId00)).thenReturn(futureForStandbyTask);
@@ -3667,6 +3663,34 @@ public void shouldShutDownStateUpdaterAndCloseDirtyTasksFailedDuringRemoval() {
         verify(removedFailedStandbyTaskDuringRemoval).closeDirty();
     }
 
+    @Test
+    public void shouldShutDownPendingTasksToInit() {
+        final TasksRegistry tasks = mock(TasksRegistry.class);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.AT_LEAST_ONCE, tasks);
+
+        final StandbyTask standbyTask00 = standbyTask(taskId00, taskId00ChangelogPartitions)
+                .inState(State.RUNNING)
+                .withInputPartitions(taskId00Partitions)
+                .build();
+
+        final StreamTask activeTask01 = statefulTask(taskId01, taskId00ChangelogPartitions)
+                .inState(State.RUNNING)
+                .withInputPartitions(taskId00Partitions).build();
+
+        when(tasks.pendingTasksToInit()).thenReturn(Set.of(standbyTask00, activeTask01));
+
+        taskManager.shutdown(true);
+
+        verify(standbyTask00).prepareCommit(true);
+        verify(standbyTask00).postCommit(true);
+        verify(standbyTask00).suspend();
+        verify(standbyTask00).closeClean();
+
+        verify(activeTask01).prepareCommit(true);
+        verify(activeTask01).suspend();
+        verify(activeTask01).closeClean();
+    }
+
     @Test
     public void shouldInitializeNewStandbyTasks() {
         final StandbyTask task01 = standbyTask(taskId01, taskId01ChangelogPartitions)