2929#import " MTRMetricsCollector.h"
3030#import " MTRMetrics_Internal.h"
3131
32+ #if DEBUG
33+ #import " MTRCommissioningOperation_Test.h"
34+ #endif
35+
36+ #include < math.h>
37+ #include < stdatomic.h>
38+
3239#include < controller/ExampleOperationalCredentialsIssuer.h>
3340#include < lib/core/CHIPError.h>
3441#include < setup_payload/SetupPayload.h>
3542
3643using namespace chip ;
3744using namespace chip ::Tracing::DarwinFramework;
3845
46+ // Interval after which the post-PASE watchdog fires. The watchdog releases the
47+ // controller's single in-flight commissioning slot if a client establishes PASE
48+ // (and asks to be notified via paseSessionEstablishmentComplete:) but then never
49+ // calls commissionNodeWithID: or cancelCommissioningForNodeID:. Without it,
50+ // currentCommissioning is never cleared and every subsequent commissioning
51+ // attempt fails with CHIP_ERROR_BUSY (0xDB) until the process restarts.
52+ //
53+ // Five minutes is well past the commissionee fail-safe (60-180s): the watchdog
54+ // is not racing the on-device handshake (the fail-safe handles that), it only
55+ // reclaims our local slot for a client that has gone away, so it is generous
56+ // enough not to tear a slow-but-live client out from under the user.
57+ static const NSTimeInterval kMTRPostPASEWatchdogInterval = 5 * 60 ;
58+
59+ #if DEBUG
60+ // Test-only override for kMTRPostPASEWatchdogInterval so tests can exercise the
61+ // watchdog without waiting five minutes. Atomic because the test setter may run
62+ // on a different thread than the _delegateQueue read in -_armPostPASEWatchdog.
63+ // Only compiled into DEBUG (test) builds; never ships in the release framework.
64+ static _Atomic (NSTimeInterval ) sMTRPostPASEWatchdogIntervalForTesting = 0;
65+ #endif
66+
3967@interface MTRCommissioningOperationDeviceAttestationDelegate : NSObject <MTRDeviceAttestationDelegate>
4068@property (nonatomic , weak ) MTRCommissioningOperation * commissioningOperation;
4169@end
@@ -49,6 +77,41 @@ @implementation MTRCommissioningOperation {
4977 id <MTRCommissioningDelegate> __weak _delegate;
5078 dispatch_queue_t _delegateQueue;
5179 MTRDeviceController_Concrete * __weak _controller;
80+ // Watchdog timer armed once PASE has been established to bound how long we
81+ // hold the controller in the "in-progress commissioning" state if the
82+ // client drops the flow without ever calling commissionNodeWithID: or
83+ // cancelCommissioningForNodeID:. Without this watchdog,
84+ // currentCommissioning is never cleared and every subsequent commissioning
85+ // attempt hits CHIP_ERROR_BUSY (0xDB) until the process restarts.
86+ //
87+ // _postPASEWatchdog is only ever created, cancelled, or cleared on
88+ // _delegateQueue (the queue all delegate callbacks and the timer's
89+ // event_handler run on), so it needs no separate lock.
90+ dispatch_source_t _postPASEWatchdog;
91+ // Backing storage for isWaitingAfterPASEEstablished. Accessed via C11
92+ // atomics because it is read and written from arbitrary client queues
93+ // (MTRDeviceController_Concrete maintains it) as well as _delegateQueue.
94+ _Atomic (BOOL ) _isWaitingAfterPASEEstablished;
95+ }
96+
97+ @synthesize commissioningID = _commissioningID;
98+
99+ - (BOOL )isWaitingAfterPASEEstablished
100+ {
101+ return atomic_load_explicit (&_isWaitingAfterPASEEstablished, memory_order_relaxed);
102+ }
103+
104+ - (void )setIsWaitingAfterPASEEstablished : (BOOL )isWaitingAfterPASEEstablished
105+ {
106+ BOOL previous = atomic_exchange_explicit (&_isWaitingAfterPASEEstablished, isWaitingAfterPASEEstablished, memory_order_relaxed);
107+ if (previous && !isWaitingAfterPASEEstablished) {
108+ // The client has either started commissioning (via commissionNodeWithID:)
109+ // or moved on; the watchdog no longer needs to fire. Cancel it on
110+ // _delegateQueue, which is where _postPASEWatchdog is owned.
111+ dispatch_async (_delegateQueue, ^{
112+ [self _cancelPostPASEWatchdog ];
113+ });
114+ }
52115}
53116
54117- (instancetype )initWithParameters : (MTRCommissioningParameters *)parameters
@@ -92,8 +155,14 @@ - (instancetype)initWithParameters:(MTRCommissioningParameters *)parameters
92155 }
93156
94157 _parameters = [parameters copy ];
95- _setupPayload = payload;
96- _commissioningID = commissioningID;
158+ // Honor the `copy` semantic on the @property declaration: a caller passing
159+ // an NSMutableString must not be able to mutate our stored payload after
160+ // construction. The implicit-cancel path in setupCommissioningSessionWithPayload:
161+ // compares the stored payload against the caller's freshly-derived
162+ // pairingCode via -isEqualToString:, so any post-construction drift here
163+ // would silently corrupt that decision.
164+ _setupPayload = [payload copy ];
165+ _commissioningID = [commissioningID copy ];
97166 _isInternallyCreated = isInternallyCreated;
98167 _delegate = delegate;
99168 _delegateQueue = queue;
@@ -104,6 +173,110 @@ - (instancetype)initWithParameters:(MTRCommissioningParameters *)parameters
104173 return self;
105174}
106175
176+ - (void )dealloc
177+ {
178+ // Defensive: ensure the watchdog timer doesn't outlive us. No other strong
179+ // reference can exist (or we wouldn't be deallocating) and the timer's
180+ // event_handler retains self for its duration, so it cannot be in flight
181+ // here; a synchronous cancel is safe.
182+ if (_postPASEWatchdog) {
183+ dispatch_source_cancel (_postPASEWatchdog);
184+ _postPASEWatchdog = nil ;
185+ }
186+ }
187+
188+ // Must be called on _delegateQueue.
189+ - (BOOL )_armPostPASEWatchdog
190+ {
191+ if (_postPASEWatchdog) {
192+ // Already armed; should not happen, but be defensive.
193+ return YES ;
194+ }
195+
196+ NSTimeInterval intervalSeconds = kMTRPostPASEWatchdogInterval ;
197+ #if DEBUG
198+ NSTimeInterval intervalOverride = atomic_load_explicit (&sMTRPostPASEWatchdogIntervalForTesting , memory_order_relaxed);
199+ if (intervalOverride > 0 ) {
200+ intervalSeconds = intervalOverride;
201+ }
202+ #endif
203+
204+ dispatch_source_t timer = dispatch_source_create (DISPATCH_SOURCE_TYPE_TIMER, 0 , 0 , _delegateQueue);
205+ if (!timer) {
206+ MTR_LOG_ERROR (" %@ failed to create post-PASE watchdog timer" , self);
207+ return NO ;
208+ }
209+
210+ dispatch_source_set_timer (timer,
211+ dispatch_time (DISPATCH_TIME_NOW, (int64_t ) (intervalSeconds * NSEC_PER_SEC)),
212+ DISPATCH_TIME_FOREVER,
213+ 1 * NSEC_PER_SEC);
214+
215+ mtr_weakify (self);
216+ dispatch_source_set_event_handler (timer, ^{
217+ mtr_strongify (self);
218+ if (!self) {
219+ return ;
220+ }
221+ // Late-fire guard: the timer block may already have been enqueued on
222+ // _delegateQueue ahead of a cancel that flipped the waiting flag back
223+ // to NO. If the client has legitimately advanced, do not fire.
224+ if (!self.isWaitingAfterPASEEstablished ) {
225+ return ;
226+ }
227+ [self _firePostPASEWatchdog ];
228+ });
229+
230+ dispatch_resume (timer);
231+ _postPASEWatchdog = timer;
232+ return YES ;
233+ }
234+
235+ // Must be called on _delegateQueue (it mutates _postPASEWatchdog).
236+ - (void )_cancelPostPASEWatchdog
237+ {
238+ if (_postPASEWatchdog) {
239+ dispatch_source_cancel (_postPASEWatchdog);
240+ _postPASEWatchdog = nil ;
241+ }
242+ }
243+
244+ // Must be called on _delegateQueue (it is the watchdog's event_handler).
245+ - (void )_firePostPASEWatchdog
246+ {
247+ [self _cancelPostPASEWatchdog ];
248+
249+ MTRDeviceController_Concrete * strongController = _controller;
250+ MTR_LOG_ERROR (" %@ post-PASE watchdog fired for commissioning %@ -- client never advanced past paseSessionEstablishmentComplete; cancelling commissioning to release controller" , self, _commissioningID);
251+
252+ if (!strongController) {
253+ // Controller is gone -- the wedge we wanted to prevent is moot (no
254+ // controller state to reclaim), but still drive our own terminal error
255+ // so the delegate state settles.
256+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_TIMEOUT]];
257+ return ;
258+ }
259+
260+ // If a successor MTRCommissioningOperation has already taken our slot, that
261+ // path already cleared our delegate via commissioningDone:, so do not
262+ // dispatch a terminal error -- it would tear down the successor's state.
263+ if (strongController.currentCommissioning != self) {
264+ MTR_LOG (" %@ post-PASE watchdog fired but commissioning %@ was already replaced; suppressing spurious timeout" , self, _commissioningID);
265+ return ;
266+ }
267+
268+ // Single-notification contract: -stop clears isWaitingAfterPASEEstablished
269+ // *before* it calls into the controller, so -_cancelCommissioning observes
270+ // the flag as NO and does NOT synthesize its own OnCommissioningComplete
271+ // (CANCELLED). We then deliver exactly one terminal error -- TIMEOUT, the
272+ // user-visible "we gave up waiting for you to advance past PASE" signal --
273+ // via the standard failure path. Routing TIMEOUT (rather than letting
274+ // _cancelCommissioning emit CANCELLED) keeps the client's error specific to
275+ // the watchdog, and clearing the flag first avoids double-notifying.
276+ [self stop ];
277+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_TIMEOUT]];
278+ }
279+
107280static inline void emitMetricForSetupPayload (NSString * payload)
108281{
109282 std::vector<SetupPayload> payloads;
@@ -169,15 +342,32 @@ - (void)startWithController:(MTRDeviceController *)controller
169342
170343- (BOOL )stop
171344{
345+ // Clear the waiting flag (atomic) and tear down the watchdog before driving
346+ // the controller transition. The atomic flag doubles as the late-fire
347+ // guard: any watchdog event_handler already enqueued on _delegateQueue ahead
348+ // of the cancel sees the flag is NO and bails out instead of routing a
349+ // spurious timeout.
350+ [self _clearPostPASEWaiting ];
351+
172352 MTRDeviceController_Concrete * strongController = _controller;
173353 if (!strongController) {
174354 // Nothing to do; controller is gone, so we are stopped no matter what.
175355 return NO ;
176356 }
177-
178357 return [strongController stopCommissioning: self forCommissioningID: _commissioningID];
179358}
180359
360+ // Clears the waiting-after-PASE flag and tears down the watchdog. Safe to call
361+ // from any queue: the flag write is atomic, and the dispatch_source cancel is
362+ // bounced onto _delegateQueue, which owns _postPASEWatchdog.
363+ - (void )_clearPostPASEWaiting
364+ {
365+ atomic_store_explicit (&_isWaitingAfterPASEEstablished, NO , memory_order_relaxed);
366+ dispatch_async (_delegateQueue, ^{
367+ [self _cancelPostPASEWatchdog ];
368+ });
369+ }
370+
181371- (void )_earlyFailCommissioning : (CHIP_ERROR)error
182372{
183373 // This handles failures before we have cleared the way for doing a commissioning at all, and in
@@ -208,6 +398,10 @@ - (void)_dispatchCommissioningError:(NSError *)error withMetrics:(MTRMetrics *)m
208398
209399- (void )_dispatchCommissioningError : (NSError *)error forCommissioningID : (NSNumber *)commissioningID withMetrics : (MTRMetrics *)metrics
210400{
401+ // Any terminal error path implies the watchdog (if armed) no longer needs
402+ // to fire, so clear the waiting flag and tear it down.
403+ [self _clearPostPASEWaiting ];
404+
211405 MTRDeviceController_Concrete * strongController = _controller;
212406
213407 MTR_LOG (" %@ Device commissioning failed with controller %@ metrics %@" , self, strongController, metrics);
@@ -263,7 +457,21 @@ - (void)controller:(MTRDeviceController *)controller commissioningSessionEstabli
263457 // commissioning ourselves if not.
264458 if ([strongDelegate respondsToSelector: @selector (commissioning:paseSessionEstablishmentComplete: )]) {
265459 dispatch_async (_delegateQueue, ^{
266- self.isWaitingAfterPASEEstablished = YES ;
460+ // Arm the watchdog and set the waiting flag on _delegateQueue, which
461+ // is where all watchdog mutations happen. A client that handles
462+ // paseSessionEstablishmentComplete: but then never calls
463+ // commissionNodeWithID: or cancelCommissioningForNodeID: would
464+ // otherwise permanently wedge the controller's commissioning state.
465+ BOOL armed = [self _armPostPASEWatchdog ];
466+ if (!armed) {
467+ // Failing to arm the watchdog means we cannot bound the
468+ // post-PASE wait -- the very wedge this logic prevents. Surface
469+ // a no-memory error so the client tears down.
470+ MTR_LOG_ERROR (" %@ failed to arm post-PASE watchdog; aborting commissioning to avoid an unbounded wait" , self);
471+ [self _dispatchCommissioningError: [MTRError errorForCHIPErrorCode: CHIP_ERROR_NO_MEMORY]];
472+ return ;
473+ }
474+ atomic_store_explicit (&self->_isWaitingAfterPASEEstablished , YES , memory_order_relaxed);
267475 [strongDelegate commissioning: self paseSessionEstablishmentComplete: error];
268476 });
269477
@@ -303,6 +511,12 @@ - (void)controller:(MTRDeviceController *)controller
303511 return ;
304512 }
305513
514+ // Defensive: on the success path the controller should already have flipped
515+ // isWaitingAfterPASEEstablished back to NO (from commissionNodeWithID:), but
516+ // this is the terminal success notification, so clear and tear down the
517+ // watchdog unconditionally rather than depending on that upstream invariant.
518+ [self _clearPostPASEWaiting ];
519+
306520 id <MTRCommissioningDelegate> strongDelegate = _delegate;
307521 MTRDeviceController_Concrete * strongController = _controller;
308522
@@ -502,3 +716,22 @@ - (void)deviceAttestationCompletedForController:(MTRDeviceController *)controlle
502716}
503717
504718@end
719+
720+ #if DEBUG
721+ #pragma mark - PostPASEWatchdogTesting
722+
723+ @implementation MTRCommissioningOperation (PostPASEWatchdogTesting)
724+
725+ + (void )setPostPASEWatchdogIntervalForTesting : (NSTimeInterval )interval
726+ {
727+ // Clamp non-finite, negative, or absurdly large values to 0 (== "use the
728+ // production interval"). _armPostPASEWatchdog casts (interval *
729+ // NSEC_PER_SEC) to int64_t, which is UB (and UBSan-trapping) for +inf, NaN,
730+ // or values that overflow int64_t after the multiply; 1e9 seconds is well
731+ // past any plausible test interval and leaves ample headroom.
732+ NSTimeInterval clamped = (isfinite (interval) && interval > 0 && interval < 1e9 ) ? interval : 0 ;
733+ atomic_store_explicit (&sMTRPostPASEWatchdogIntervalForTesting , clamped, memory_order_relaxed);
734+ }
735+
736+ @end
737+ #endif // DEBUG
0 commit comments