2929#import " MTRMetricsCollector.h"
3030#import " MTRMetrics_Internal.h"
3131
32+ #if DEBUG
33+ #import " MTRCommissioningOperation_Test.h"
34+ #endif
35+
36+ #include < math.h>
37+ #include < stdatomic.h>
38+
3239#include < controller/ExampleOperationalCredentialsIssuer.h>
3340#include < lib/core/CHIPError.h>
3441#include < setup_payload/SetupPayload.h>
3542
3643using namespace chip ;
3744using namespace chip ::Tracing::DarwinFramework;
3845
46+ // Interval after which the post-PASE watchdog fires. The watchdog releases the
47+ // controller's single in-flight commissioning slot if a client establishes PASE
48+ // (and asks to be notified via paseSessionEstablishmentComplete:) but then never
49+ // calls commissionNodeWithID: or cancelCommissioningForNodeID:. Without it,
50+ // currentCommissioning is never cleared and every subsequent commissioning
51+ // attempt fails with CHIP_ERROR_BUSY (0xDB) until the process restarts.
52+ //
53+ // Five minutes is well past the commissionee fail-safe (60-180s): the watchdog
54+ // is not racing the on-device handshake (the fail-safe handles that), it only
55+ // reclaims our local slot for a client that has gone away, so it is generous
56+ // enough not to tear a slow-but-live client out from under the user.
57+ static const NSTimeInterval kMTRPostPASEWatchdogInterval = 5 * 60 ;
58+
59+ #if DEBUG
60+ // Test-only override for kMTRPostPASEWatchdogInterval so tests can exercise the
61+ // watchdog without waiting five minutes. Atomic because the test setter may run
62+ // on a different thread than the _delegateQueue read in -_armPostPASEWatchdog.
63+ // Only compiled into DEBUG (test) builds; never ships in the release framework.
64+ static _Atomic (NSTimeInterval ) sMTRPostPASEWatchdogIntervalForTesting = 0;
65+ #endif
66+
3967@interface MTRCommissioningOperationDeviceAttestationDelegate : NSObject <MTRDeviceAttestationDelegate>
4068@property (nonatomic , weak ) MTRCommissioningOperation * commissioningOperation;
4169@end
@@ -49,6 +77,41 @@ @implementation MTRCommissioningOperation {
4977 id <MTRCommissioningDelegate> __weak _delegate;
5078 dispatch_queue_t _delegateQueue;
5179 MTRDeviceController_Concrete * __weak _controller;
80+ // Watchdog timer armed once PASE has been established to bound how long we
81+ // hold the controller in the "in-progress commissioning" state if the
82+ // client drops the flow without ever calling commissionNodeWithID: or
83+ // cancelCommissioningForNodeID:. Without this watchdog,
84+ // currentCommissioning is never cleared and every subsequent commissioning
85+ // attempt hits CHIP_ERROR_BUSY (0xDB) until the process restarts.
86+ //
87+ // _postPASEWatchdog is only ever created, cancelled, or cleared on
88+ // _delegateQueue (the queue all delegate callbacks and the timer's
89+ // event_handler run on), so it needs no separate lock.
90+ dispatch_source_t _postPASEWatchdog;
91+ // Backing storage for isWaitingAfterPASEEstablished. Accessed via C11
92+ // atomics because it is read and written from arbitrary client queues
93+ // (MTRDeviceController_Concrete maintains it) as well as _delegateQueue.
94+ _Atomic (BOOL ) _isWaitingAfterPASEEstablished;
95+ }
96+
97+ @synthesize commissioningID = _commissioningID;
98+
99+ - (BOOL )isWaitingAfterPASEEstablished
100+ {
101+ return atomic_load_explicit (&_isWaitingAfterPASEEstablished, memory_order_relaxed);
102+ }
103+
104+ - (void )setIsWaitingAfterPASEEstablished : (BOOL )isWaitingAfterPASEEstablished
105+ {
106+ BOOL previous = atomic_exchange_explicit (&_isWaitingAfterPASEEstablished, isWaitingAfterPASEEstablished, memory_order_relaxed);
107+ if (previous && !isWaitingAfterPASEEstablished) {
108+ // The client has either started commissioning (via commissionNodeWithID:)
109+ // or moved on; the watchdog no longer needs to fire. Cancel it on
110+ // _delegateQueue, which is where _postPASEWatchdog is owned.
111+ dispatch_async (_delegateQueue, ^{
112+ [self _cancelPostPASEWatchdog ];
113+ });
114+ }
52115}
53116
54117- (instancetype )initWithParameters : (MTRCommissioningParameters *)parameters
@@ -92,8 +155,14 @@ - (instancetype)initWithParameters:(MTRCommissioningParameters *)parameters
92155 }
93156
94157 _parameters = [parameters copy ];
95- _setupPayload = payload;
96- _commissioningID = commissioningID;
158+ // Honor the `copy` semantic on the @property declaration: a caller passing
159+ // an NSMutableString must not be able to mutate our stored payload after
160+ // construction. The implicit-cancel path in setupCommissioningSessionWithPayload:
161+ // compares the stored payload against the caller's freshly-derived
162+ // pairingCode via -isEqualToString:, so any post-construction drift here
163+ // would silently corrupt that decision.
164+ _setupPayload = [payload copy ];
165+ _commissioningID = [commissioningID copy ];
97166 _isInternallyCreated = isInternallyCreated;
98167 _delegate = delegate;
99168 _delegateQueue = queue;
@@ -104,6 +173,112 @@ - (instancetype)initWithParameters:(MTRCommissioningParameters *)parameters
104173 return self;
105174}
106175
176+ - (void )dealloc
177+ {
178+ // Defensive: ensure the watchdog timer doesn't outlive us. No other strong
179+ // reference can exist (or we wouldn't be deallocating) and the timer's
180+ // event_handler retains self for its duration, so it cannot be in flight
181+ // here; a synchronous cancel is safe.
182+ if (_postPASEWatchdog) {
183+ dispatch_source_cancel (_postPASEWatchdog);
184+ _postPASEWatchdog = nil ;
185+ }
186+ }
187+
188+ // Must be called on _delegateQueue.
189+ - (BOOL )_armPostPASEWatchdog
190+ {
191+ if (_postPASEWatchdog) {
192+ // Already armed; should not happen, but be defensive.
193+ return YES ;
194+ }
195+
196+ NSTimeInterval intervalSeconds = kMTRPostPASEWatchdogInterval ;
197+ #if DEBUG
198+ NSTimeInterval intervalOverride = atomic_load_explicit (&sMTRPostPASEWatchdogIntervalForTesting , memory_order_relaxed);
199+ if (intervalOverride > 0 ) {
200+ intervalSeconds = intervalOverride;
201+ }
202+ #endif
203+
204+ dispatch_source_t timer = dispatch_source_create (DISPATCH_SOURCE_TYPE_TIMER, 0 , 0 , _delegateQueue);
205+ if (!timer) {
206+ MTR_LOG_ERROR (" %@ failed to create post-PASE watchdog timer" , self);
207+ return NO ;
208+ }
209+
210+ dispatch_source_set_timer (timer,
211+ dispatch_time (DISPATCH_TIME_NOW, (int64_t ) (intervalSeconds * NSEC_PER_SEC)),
212+ DISPATCH_TIME_FOREVER,
213+ 1 * NSEC_PER_SEC);
214+
215+ mtr_weakify (self);
216+ dispatch_source_set_event_handler (timer, ^{
217+ mtr_strongify (self);
218+ if (!self) {
219+ return ;
220+ }
221+ // Late-fire guard: the timer block may already have been enqueued on
222+ // _delegateQueue ahead of a cancel that flipped the waiting flag back
223+ // to NO. If the client has legitimately advanced, do not fire.
224+ if (!self.isWaitingAfterPASEEstablished ) {
225+ return ;
226+ }
227+ [self _firePostPASEWatchdog ];
228+ });
229+
230+ dispatch_resume (timer);
231+ _postPASEWatchdog = timer;
232+ return YES ;
233+ }
234+
235+ // Must be called on _delegateQueue (it mutates _postPASEWatchdog).
236+ - (void )_cancelPostPASEWatchdog
237+ {
238+ if (_postPASEWatchdog) {
239+ dispatch_source_cancel (_postPASEWatchdog);
240+ _postPASEWatchdog = nil ;
241+ }
242+ }
243+
244+ // Must be called on _delegateQueue (it is the watchdog's event_handler).
245+ - (void )_firePostPASEWatchdog
246+ {
247+ [self _cancelPostPASEWatchdog ];
248+
249+ MTRDeviceController_Concrete * strongController = _controller;
250+ MTR_LOG_ERROR (" %@ post-PASE watchdog fired for commissioning %@ -- client never advanced past paseSessionEstablishmentComplete; cancelling commissioning to release controller" , self, _commissioningID);
251+
252+ if (!strongController) {
253+ // Controller is gone -- the wedge we wanted to prevent is moot (no
254+ // controller state to reclaim), but still drive our own terminal error
255+ // so the delegate state settles.
256+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_TIMEOUT]];
257+ return ;
258+ }
259+
260+ // If a successor MTRCommissioningOperation has already taken our slot, that
261+ // path already cleared our delegate via commissioningDone:, so do not
262+ // dispatch a terminal error -- it would tear down the successor's state.
263+ if (strongController.currentCommissioning != self) {
264+ MTR_LOG (" %@ post-PASE watchdog fired but commissioning %@ was already replaced; suppressing spurious timeout" , self, _commissioningID);
265+ return ;
266+ }
267+
268+ // Single-notification contract: we want to deliver exactly one terminal
269+ // error -- TIMEOUT, the user-visible "we gave up waiting for you to advance
270+ // past PASE" signal -- rather than letting -_cancelCommissioning: emit its
271+ // own CANCELLED. Unlike the client-initiated -stop path (which must leave
272+ // isWaitingAfterPASEEstablished YES so -_cancelCommissioning: synthesizes
273+ // the completion that frees the controller), here *we* own the terminal
274+ // notification, so clear the flag first -- on _delegateQueue, where it is
275+ // owned -- so -_cancelCommissioning: observes NO and stays quiet. The
276+ // watchdog timer is already cancelled above.
277+ atomic_store_explicit (&_isWaitingAfterPASEEstablished, NO , memory_order_relaxed);
278+ [self stop ];
279+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_TIMEOUT]];
280+ }
281+
107282static inline void emitMetricForSetupPayload (NSString * payload)
108283{
109284 std::vector<SetupPayload> payloads;
@@ -169,15 +344,52 @@ - (void)startWithController:(MTRDeviceController *)controller
169344
170345- (BOOL )stop
171346{
347+ // Tear down the watchdog *timer* but deliberately do NOT clear
348+ // isWaitingAfterPASEEstablished here. -_cancelCommissioning:forNodeID:error:
349+ // reads that flag to decide whether it must synthesize the
350+ // OnCommissioningComplete(CANCELLED) notification itself: when we are parked
351+ // at the post-PASE wait state there is no in-flight PASE/commissioning on
352+ // the C++ side, so StopPairing produces no callback and the only thing that
353+ // drives the client's completion (and releases the controller's single
354+ // commissioning slot) is that synthesized notification. If we cleared the
355+ // flag before calling into the controller, -_cancelCommissioning: would
356+ // observe NO, skip the synthesized completion, and the operation would hang
357+ // forever -- wedging the controller for all subsequent commissioning.
358+ // -_cancelCommissioning: itself clears the flag once it has fired the
359+ // notification.
360+ //
361+ // Cancelling the timer is sufficient to satisfy the watchdog's purpose for
362+ // this code path: an explicit stop is exactly the "client advanced/gave up"
363+ // signal the watchdog exists to substitute for, so it must not fire after.
364+ dispatch_async (_delegateQueue, ^{
365+ [self _cancelPostPASEWatchdog ];
366+ });
367+
172368 MTRDeviceController_Concrete * strongController = _controller;
173369 if (!strongController) {
174370 // Nothing to do; controller is gone, so we are stopped no matter what.
175371 return NO ;
176372 }
177-
178373 return [strongController stopCommissioning: self forCommissioningID: _commissioningID];
179374}
180375
376+ // Clears the waiting-after-PASE flag and tears down the watchdog. Safe to call
377+ // from any queue: the flag write is atomic, and the dispatch_source cancel is
378+ // bounced onto _delegateQueue, which owns _postPASEWatchdog.
379+ //
380+ // Use this on terminal paths that do NOT route through -_cancelCommissioning:
381+ // (e.g. -_dispatchCommissioningError: and the terminal-success notification),
382+ // where there is no downstream consumer of the flag. Do NOT use it ahead of a
383+ // -_cancelCommissioning: call that needs to observe the flag as YES so it can
384+ // synthesize OnCommissioningComplete (see -stop).
385+ - (void )_clearPostPASEWaiting
386+ {
387+ atomic_store_explicit (&_isWaitingAfterPASEEstablished, NO , memory_order_relaxed);
388+ dispatch_async (_delegateQueue, ^{
389+ [self _cancelPostPASEWatchdog ];
390+ });
391+ }
392+
181393- (void )_earlyFailCommissioning : (CHIP_ERROR)error
182394{
183395 // This handles failures before we have cleared the way for doing a commissioning at all, and in
@@ -208,6 +420,10 @@ - (void)_dispatchCommissioningError:(NSError *)error withMetrics:(MTRMetrics *)m
208420
209421- (void )_dispatchCommissioningError : (NSError *)error forCommissioningID : (NSNumber *)commissioningID withMetrics : (MTRMetrics *)metrics
210422{
423+ // Any terminal error path implies the watchdog (if armed) no longer needs
424+ // to fire, so clear the waiting flag and tear it down.
425+ [self _clearPostPASEWaiting ];
426+
211427 MTRDeviceController_Concrete * strongController = _controller;
212428
213429 MTR_LOG (" %@ Device commissioning failed with controller %@ metrics %@" , self, strongController, metrics);
@@ -263,7 +479,21 @@ - (void)controller:(MTRDeviceController *)controller commissioningSessionEstabli
263479 // commissioning ourselves if not.
264480 if ([strongDelegate respondsToSelector: @selector (commissioning:paseSessionEstablishmentComplete: )]) {
265481 dispatch_async (_delegateQueue, ^{
266- self.isWaitingAfterPASEEstablished = YES ;
482+ // Arm the watchdog and set the waiting flag on _delegateQueue, which
483+ // is where all watchdog mutations happen. A client that handles
484+ // paseSessionEstablishmentComplete: but then never calls
485+ // commissionNodeWithID: or cancelCommissioningForNodeID: would
486+ // otherwise permanently wedge the controller's commissioning state.
487+ BOOL armed = [self _armPostPASEWatchdog ];
488+ if (!armed) {
489+ // Failing to arm the watchdog means we cannot bound the
490+ // post-PASE wait -- the very wedge this logic prevents. Surface
491+ // a no-memory error so the client tears down.
492+ MTR_LOG_ERROR (" %@ failed to arm post-PASE watchdog; aborting commissioning to avoid an unbounded wait" , self);
493+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_NO_MEMORY]];
494+ return ;
495+ }
496+ atomic_store_explicit (&self->_isWaitingAfterPASEEstablished , YES , memory_order_relaxed);
267497 [strongDelegate commissioning: self paseSessionEstablishmentComplete: error];
268498 });
269499
@@ -303,6 +533,12 @@ - (void)controller:(MTRDeviceController *)controller
303533 return ;
304534 }
305535
536+ // Defensive: on the success path the controller should already have flipped
537+ // isWaitingAfterPASEEstablished back to NO (from commissionNodeWithID:), but
538+ // this is the terminal success notification, so clear and tear down the
539+ // watchdog unconditionally rather than depending on that upstream invariant.
540+ [self _clearPostPASEWaiting ];
541+
306542 id <MTRCommissioningDelegate> strongDelegate = _delegate;
307543 MTRDeviceController_Concrete * strongController = _controller;
308544
@@ -502,3 +738,22 @@ - (void)deviceAttestationCompletedForController:(MTRDeviceController *)controlle
502738}
503739
504740@end
741+
742+ #if DEBUG
743+ #pragma mark - PostPASEWatchdogTesting
744+
745+ @implementation MTRCommissioningOperation (PostPASEWatchdogTesting)
746+
747+ + (void )setPostPASEWatchdogIntervalForTesting : (NSTimeInterval )interval
748+ {
749+ // Clamp non-finite, negative, or absurdly large values to 0 (== "use the
750+ // production interval"). _armPostPASEWatchdog casts (interval *
751+ // NSEC_PER_SEC) to int64_t, which is UB (and UBSan-trapping) for +inf, NaN,
752+ // or values that overflow int64_t after the multiply; 1e9 seconds is well
753+ // past any plausible test interval and leaves ample headroom.
754+ NSTimeInterval clamped = (isfinite (interval) && interval > 0 && interval < 1e9 ) ? interval : 0 ;
755+ atomic_store_explicit (&sMTRPostPASEWatchdogIntervalForTesting , clamped, memory_order_relaxed);
756+ }
757+
758+ @end
759+ #endif // DEBUG
0 commit comments