@@ -177,3 +177,152 @@ def test_frequency_calculation_respects_interval(self, tmp_path):
177177 # Should be 30/60 + 180/60 = 0.5 + 3 = 3.5 minutes
178178 # NOT 2 checks * some_default
179179 assert result ['total_minutes' ] == 3.5
180+
181+
182+ class TestNotificationIdempotency :
183+ """Tests that unconfigured notification channels don't block last_notified_status.
184+
185+ Regression test for a bug where adding webhooks to the dispatcher caused
186+ every check to re-trigger notifications. The webhook channel returned
187+ success=False with "Not configured" when WEBHOOK_URLS was unset, which
188+ prevented last_notified_status from being saved. On subsequent runs, the
189+ checker saw last_notified_status != current_reported and entered the
190+ "recovering missed notification" path, spamming all channels every 3 minutes.
191+ """
192+
193+ def _make_status (self , status , timestamp = '2026-03-01T12:00:00' ):
194+ """Helper to build a status dict."""
195+ return {
196+ 'status' : status ,
197+ 'description' : f'{ status } description' ,
198+ 'confidence' : 0.99 ,
199+ 'probabilities' : {'green' : 0.99 , 'yellow' : 0.005 , 'red' : 0.005 },
200+ 'detection' : {'trains' : [{'id' : 'TT' , 'x' : 500 }], 'delays_platforms' : [],
201+ 'delays_segments' : [], 'delays_bunching' : [], 'delay_summaries' : []},
202+ 'image_path' : '/tmp/test.jpg' ,
203+ 'image_dimensions' : {'width' : 1860 , 'height' : 800 },
204+ 'timestamp' : timestamp ,
205+ }
206+
207+ def _run_check_status (self , cache_before , notify_results , detection_status = 'red' ):
208+ """Run check_status with mocked dependencies and return the final cache.
209+
210+ Sets up a scenario where the reported status is already `detection_status`
211+ (hysteresis has already transitioned), and the new detection also returns
212+ `detection_status`, so the notification path can be exercised via the
213+ "recovering missed notification" logic.
214+ """
215+ from api import check_status as cs_module
216+
217+ saved_caches = []
218+
219+ def fake_write_cache (data ):
220+ saved_caches .append (data .copy ())
221+ return True
222+
223+ download_result = {
224+ 'success' : True ,
225+ 'filepath' : '/tmp/test.jpg' ,
226+ 'width' : 1860 ,
227+ 'height' : 800 ,
228+ }
229+ detection_result = {
230+ 'status' : detection_status ,
231+ 'description' : 'Not operating' ,
232+ 'status_confidence' : 0.99 ,
233+ 'probabilities' : {'green' : 0.005 , 'yellow' : 0.005 , 'red' : 0.99 },
234+ 'detection' : {'trains' : [], 'delays_platforms' : [],
235+ 'delays_segments' : [], 'delays_bunching' : [], 'delay_summaries' : []},
236+ }
237+
238+ with patch .object (cs_module , 'download_muni_image' , return_value = download_result ), \
239+ patch .object (cs_module , 'detect_muni_status' , return_value = detection_result ), \
240+ patch .object (cs_module , 'read_cache' , return_value = cache_before ), \
241+ patch .object (cs_module , 'write_cache' , side_effect = fake_write_cache ), \
242+ patch .object (cs_module , 'write_cached_image' , return_value = True ), \
243+ patch .object (cs_module , 'write_cached_badge' , return_value = True ), \
244+ patch .object (cs_module , 'notify_status_change' , return_value = notify_results ), \
245+ patch .object (cs_module , 'log_status_check' , return_value = 1 ), \
246+ patch ('lib.analytics.check_database_health' , return_value = {'exists' : True , 'has_data' : True , 'check_count' : 1 }), \
247+ patch .object (cs_module , 'archive_image' , return_value = False ), \
248+ patch .object (cs_module , 'should_archive_baseline' , return_value = False ):
249+ cs_module .check_status (should_write_cache = True )
250+
251+ return saved_caches
252+
253+ def _make_cache_with_missed_notification (self ):
254+ """Build a cache simulating a missed notification.
255+
256+ The reported status has already transitioned to red (via hysteresis),
257+ but last_notified_status is still 'green' because the previous
258+ notification run failed to save it. This triggers the "recovering
259+ missed notification" path on the next check.
260+ """
261+ red_status = self ._make_status ('red' , '2026-03-01T12:06:00' )
262+ return {
263+ # 3 consecutive red statuses — hysteresis has already transitioned
264+ 'statuses' : [
265+ self ._make_status ('red' , '2026-03-01T12:06:00' ),
266+ self ._make_status ('red' , '2026-03-01T12:03:00' ),
267+ self ._make_status ('red' , '2026-03-01T12:00:00' ),
268+ ],
269+ 'reported_status' : red_status ,
270+ 'best_status' : red_status ,
271+ 'pending_status' : None ,
272+ 'pending_streak' : 0 ,
273+ 'cached_at' : '2026-03-01T12:06:00' ,
274+ 'last_successful_check' : '2026-03-01T12:06:00' ,
275+ 'consecutive_failures' : 0 ,
276+ 'last_error' : None ,
277+ # KEY: last_notified_status is still 'green' — the notification was missed
278+ 'last_notified_status' : 'green' ,
279+ }
280+
281+ def test_unconfigured_channel_does_not_block_last_notified (self ):
282+ """Unconfigured channels should NOT prevent last_notified_status from being saved.
283+
284+ This is the core regression test. When a channel returns success=False
285+ with "Not configured", it should be treated as a no-op, not a failure.
286+ If last_notified_status is not saved, every subsequent check will
287+ re-trigger notifications via the "recovering missed notification" path.
288+ """
289+ cache_before = self ._make_cache_with_missed_notification ()
290+
291+ # Dispatcher returns: RSS succeeded, others "Not configured"
292+ notify_results = {
293+ 'rss' : {'success' : True },
294+ 'bluesky' : {'success' : False , 'error' : 'Not configured: BLUESKY_HANDLE' },
295+ 'mastodon' : {'success' : False , 'error' : 'Not configured: MASTODON_ACCESS_TOKEN' },
296+ 'webhooks' : {'success' : False , 'error' : 'Not configured: no WEBHOOK_URLS' },
297+ }
298+
299+ saved_caches = self ._run_check_status (cache_before , notify_results )
300+
301+ # The final cache write should have last_notified_status updated to 'red'
302+ # Bug behavior: last_notified_status stays 'green' because all_succeeded=False
303+ final_cache = saved_caches [- 1 ]
304+ assert final_cache ['last_notified_status' ] == 'red' , \
305+ "last_notified_status should be updated even when unconfigured channels return success=False"
306+
307+ def test_real_failure_blocks_last_notified (self ):
308+ """A genuine notification failure SHOULD prevent last_notified_status update.
309+
310+ This ensures we didn't over-correct — if a configured channel actually
311+ fails (network error, auth error, etc.), we should NOT update
312+ last_notified_status so recovery is attempted on the next run.
313+ """
314+ cache_before = self ._make_cache_with_missed_notification ()
315+
316+ # Dispatcher returns: RSS succeeded, but Bluesky had a real failure
317+ notify_results = {
318+ 'rss' : {'success' : True },
319+ 'bluesky' : {'success' : False , 'error' : 'HTTP 500: Internal Server Error' },
320+ 'webhooks' : {'success' : False , 'error' : 'Not configured: no WEBHOOK_URLS' },
321+ }
322+
323+ saved_caches = self ._run_check_status (cache_before , notify_results )
324+
325+ # last_notified_status should NOT be updated because Bluesky actually failed
326+ final_cache = saved_caches [- 1 ]
327+ assert final_cache .get ('last_notified_status' ) != 'red' , \
328+ "last_notified_status should NOT be updated when a real channel failure occurs"
0 commit comments