Skip to content

Commit 820771e

Browse files
ConnectionPool: fix continuation leak when idle-timer reschedules (#641)
When the idle-timeout timer fires while a lease request is queued (because a keep-alive is consuming the connection's only stream), `connectionIdleTimerTriggered` takes the `rescheduleIdleTimer` path. That path replaced the stale idle timer with a fresh one but dropped the old timer's `cancellationContinuation` — a `CheckedContinuation<Void, Never>` registered by `ConnectionPool.runTimer`'s child task in `withCheckedContinuation`. Dropped without being resumed, the Swift runtime eventually emits: SWIFT TASK CONTINUATION MISUSE: runTimer(_:in:) leaked its continuation without resuming it. Fix: - `ConnectionState.rescheduleIdleTimer()` and `ConnectionGroup.rescheduleIdleTimer(_:)` now also return the old idle timer's `TimerCancellationToken`. - `PoolStateMachine.connectionIdleTimerTriggered(_:)` forwards that token via `.makeConnectionsCancelAndScheduleTimers` so `ConnectionPool.runConnectionAction` resumes the continuation before scheduling the new timer. A new regression test in `PoolStateMachineTests` drives the state machine into the exact race and asserts the returned action surfaces the old token. The test fails on `main` and passes with this fix. Signed-off-by: Lukas Tetzlaff <lukastetzlaff@gmail.com> Co-authored-by: Fabian Fett <fabianfett@apple.com>
1 parent b0bbd59 commit 820771e

4 files changed

Lines changed: 114 additions & 6 deletions

File tree

Sources/ConnectionPoolModule/PoolStateMachine+ConnectionGroup.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ extension PoolStateMachine {
698698
}
699699

700700
@inlinable
701-
mutating func rescheduleIdleTimer(_ connectionID: Connection.ID) -> ConnectionTimer? {
701+
mutating func rescheduleIdleTimer(_ connectionID: Connection.ID) -> (ConnectionTimer, TimerCancellationToken?)? {
702702
guard let index = self.connections.firstIndex(where: { $0.id == connectionID }) else {
703703
// because of a race this connection (connection close runs against trigger of timeout)
704704
// was already removed from the state machine.

Sources/ConnectionPoolModule/PoolStateMachine+ConnectionState.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,16 +336,16 @@ extension PoolStateMachine {
336336
}
337337

338338
@inlinable
339-
mutating func rescheduleIdleTimer() -> ConnectionTimer? {
339+
mutating func rescheduleIdleTimer() -> (ConnectionTimer, TimerCancellationToken?)? {
340340
switch self.state {
341341
case .backingOff, .starting:
342342
preconditionFailure("Invalid state: \(self.state)")
343343

344-
case .idle(let connection, let maxStreams, let keepAlive, .some):
344+
case .idle(let connection, let maxStreams, let keepAlive, .some(let oldIdleTimer)):
345345
let idleTimerState = self._nextTimer()
346346
let idleTimer = ConnectionTimer(timerID: idleTimerState.timerID, connectionID: self.id, usecase: .idleTimeout)
347347
self.state = .idle(connection, maxStreams: maxStreams, keepAlive: keepAlive, idleTimer: idleTimerState)
348-
return idleTimer
348+
return (idleTimer, oldIdleTimer.cancellationContinuation)
349349

350350
case .idle(_,_,_, .none):
351351
return nil

Sources/ConnectionPoolModule/PoolStateMachine.swift

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,8 +620,26 @@ struct PoolStateMachine<
620620
// might pickup the lease request (which would cancel the idle timeout) or the lease request might
621621
// already be handled by another (currently busy connection), in which case the idle timeout can
622622
// trigger eventually.
623-
let timer = self.connections.rescheduleIdleTimer(connectionID)
624-
return .init(request: .none, connection: .scheduleTimers(timer.map { [self.mapTimers($0)] } ?? []))
623+
guard let (newTimer, oldCancellationToken) = self.connections.rescheduleIdleTimer(connectionID) else {
624+
return .none()
625+
}
626+
let scheduledTimers = Max2Sequence(self.mapTimers(newTimer))
627+
// We must propagate the old idle timer's cancellation continuation so that the
628+
// `ConnectionPool.runTimer` child task that stored it can be resumed. Dropping it here
629+
// produces a "SWIFT TASK CONTINUATION MISUSE: runTimer(_:in:) leaked its continuation
630+
// without resuming it" runtime warning (the stored `CheckedContinuation` is never
631+
// resumed and eventually deallocated).
632+
if let oldCancellationToken {
633+
return .init(
634+
request: .none,
635+
connection: .makeConnectionsCancelAndScheduleTimers(
636+
.init(),
637+
.init(element: oldCancellationToken),
638+
scheduledTimers
639+
)
640+
)
641+
}
642+
return .init(request: .none, connection: .scheduleTimers(scheduledTimers))
625643
}
626644

627645
guard let closeAction = self.connections.closeConnectionIfIdle(connectionID) else {

Tests/ConnectionPoolModuleTests/PoolStateMachineTests.swift

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,96 @@ typealias TestPoolStateMachine = PoolStateMachine<
13981398
// Should only lease 1 (the waiting request count)
13991399
#expect(leasedRequests.count == 1)
14001400
}
1401+
1402+
/// Regression test: when the idle-timeout timer fires while a lease request is waiting
1403+
/// in the queue (because a keep-alive is currently consuming the connection's only
1404+
/// stream), `connectionIdleTimerTriggered` reschedules a fresh idle timer. Prior to the
1405+
/// fix, the old idle timer's `TimerCancellationToken` was silently dropped, which in
1406+
/// `ConnectionPool.runTimer` manifests as:
1407+
///
1408+
/// SWIFT TASK CONTINUATION MISUSE: runTimer(_:in:) leaked its continuation
1409+
/// without resuming it.
1410+
///
1411+
/// because the stored `CheckedContinuation<Void, Never>` is never resumed. This test
1412+
/// drives the state machine into that exact race and asserts the returned action
1413+
/// surfaces the old cancellation token so the caller can resume it.
1414+
@available(macOS 13.0, iOS 16.0, tvOS 16.0, watchOS 9.0, *)
1415+
@Test func testIdleTimerReschedulePreservesOldCancellationToken() {
1416+
var configuration = PoolConfiguration()
1417+
configuration.minimumConnectionCount = 0
1418+
configuration.maximumConnectionSoftLimit = 1
1419+
configuration.maximumConnectionHardLimit = 1
1420+
configuration.keepAliveDuration = .seconds(2)
1421+
configuration.idleTimeoutDuration = .seconds(4)
1422+
1423+
var stateMachine = TestPoolStateMachine(
1424+
configuration: configuration,
1425+
generator: .init(),
1426+
timerCancellationTokenType: MockTimerCancellationToken.self,
1427+
clock: MockClock()
1428+
)
1429+
1430+
// Lease a request — triggers creation of a demand connection.
1431+
let request1 = MockRequest(connectionType: MockConnection.self)
1432+
let leaseAction1 = stateMachine.leaseConnection(request1)
1433+
guard case .makeConnection(let makeRequest, _) = leaseAction1.connection else {
1434+
Issue.record("expected .makeConnection, got \(leaseAction1.connection)")
1435+
return
1436+
}
1437+
1438+
// Establish the connection — because the request is queued it is leased immediately.
1439+
let connection = MockConnection(id: makeRequest.connectionID)
1440+
let establishedAction = stateMachine.connectionEstablished(connection, maxStreams: 1)
1441+
#expect(establishedAction.request == .leaseConnection(.init(element: request1), connection))
1442+
1443+
// Release the connection — parks with both keep-alive and idle timers because this
1444+
// is a demand connection (index >= minimumConcurrentConnections).
1445+
let releaseAction = stateMachine.releaseConnection(connection, streams: 1)
1446+
let keepAliveTimer = TestPoolStateMachine.Timer(
1447+
.init(timerID: 0, connectionID: connection.id, usecase: .keepAlive),
1448+
duration: configuration.keepAliveDuration!
1449+
)
1450+
let idleTimer = TestPoolStateMachine.Timer(
1451+
.init(timerID: 1, connectionID: connection.id, usecase: .idleTimeout),
1452+
duration: configuration.idleTimeoutDuration
1453+
)
1454+
#expect(releaseAction.connection == .scheduleTimers([keepAliveTimer, idleTimer]))
1455+
1456+
// Register cancellation tokens for both timers (simulates the child task in
1457+
// `ConnectionPool.runTimer` storing its continuation via `timerScheduled`).
1458+
let keepAliveToken = MockTimerCancellationToken(keepAliveTimer)
1459+
let idleToken = MockTimerCancellationToken(idleTimer)
1460+
#expect(stateMachine.timerScheduled(keepAliveTimer, cancelContinuation: keepAliveToken) == nil)
1461+
#expect(stateMachine.timerScheduled(idleTimer, cancelContinuation: idleToken) == nil)
1462+
1463+
// Keep-alive fires — the connection transitions to keepAlive: .running, idleTimer: .some.
1464+
let keepAliveFired = stateMachine.timerTriggered(keepAliveTimer)
1465+
#expect(keepAliveFired.connection == .runKeepAlive(connection, keepAliveToken))
1466+
1467+
// Queue a second request. Because the running keep-alive consumes the only stream,
1468+
// the request stays in the queue.
1469+
let request2 = MockRequest(connectionType: MockConnection.self)
1470+
_ = stateMachine.leaseConnection(request2)
1471+
1472+
// Idle timer fires while the keep-alive is still running and the queue is non-empty.
1473+
// This is the `rescheduleIdleTimer` code path.
1474+
let newIdleTimer = TestPoolStateMachine.Timer(
1475+
.init(timerID: 2, connectionID: connection.id, usecase: .idleTimeout),
1476+
duration: configuration.idleTimeoutDuration
1477+
)
1478+
let idleFired = stateMachine.timerTriggered(idleTimer)
1479+
1480+
// The returned action must carry the old idle timer's cancellation token so the
1481+
// pool can resume its continuation. Before the fix this was an empty
1482+
// `.scheduleTimers([newIdleTimer])` and `idleToken` was dropped on the floor.
1483+
#expect(
1484+
idleFired.connection == .makeConnectionsCancelAndScheduleTimers(
1485+
.init(),
1486+
.init(element: idleToken),
1487+
.init(newIdleTimer)
1488+
)
1489+
)
1490+
}
14011491
}
14021492

14031493
struct SomeError: Error {}

0 commit comments

Comments
 (0)