@@ -297,6 +297,49 @@ print(sum(1 for r in results if isinstance(r, dict) and r.get("status") == "miss
297297PY
298298}
299299
300+ # Count the authoritative *expected* screenshot set: the number of golden PNGs
301+ # stored in the reference directory. The reference dir is the manifest -- it is
302+ # the single source of truth for how many screenshots a suite must produce. We
303+ # deliberately do NOT derive the expected count from whatever the harness chose
304+ # to deliver, because a harness that silently drops a test (a hang, a crash, a
305+ # transport that never delivered the frame) simply omits it from its delivered
306+ # set, leaving no "missing_actual" record behind. Counting goldens instead means
307+ # a dropped test is always visible as an uncovered golden. Top-level *.png only;
308+ # the reference dirs are flat <testName>.png sets.
309+ cn1ss_count_reference () {
310+ local dir=" $1 "
311+ if [ -z " $dir " ] || [ ! -d " $dir " ]; then
312+ echo 0
313+ return
314+ fi
315+ local n
316+ n=$( find " $dir " -maxdepth 1 -name ' *.png' -type f 2> /dev/null | wc -l)
317+ echo " ${n// [^0-9]/ } "
318+ }
319+
320+ # Count the goldens that were actually rendered AND compared against their
321+ # reference, i.e. results with status "equal" or "different". A "missing_actual"
322+ # (listed but no image), a "missing_expected" (new image with no golden yet) and
323+ # any test that never appeared at all are all NOT covered. expected - covered is
324+ # therefore the number of expected screenshots that failed to materialise.
325+ cn1ss_count_covered () {
326+ local json=" $1 "
327+ if [ -z " $json " ] || [ ! -s " $json " ]; then
328+ echo 0
329+ return
330+ fi
331+ python3 - " $json " << 'PY '
332+ import json, sys
333+ try:
334+ data = json.load(open(sys.argv[1]))
335+ results = data.get("results", []) if isinstance(data, dict) else []
336+ except Exception:
337+ print(0)
338+ sys.exit(0)
339+ print(sum(1 for r in results if isinstance(r, dict) and r.get("status") in ("equal", "different")))
340+ PY
341+ }
342+
300343# Shared function to generate report, compare screenshots, and post PR comment
301344cn1ss_process_and_report () {
302345 local platform_title=" $1 "
@@ -411,31 +454,52 @@ cn1ss_process_and_report() {
411454 return 15
412455 fi
413456
414- # Missing-screenshot regression guard. Every expected test must produce its
415- # screenshot; a test that runs but emits nothing is recorded as status
416- # "missing_actual". When a test hangs or the rendering pipeline crashes
417- # partway (the Metal DialogTheme hang), every test from that point on
418- # becomes missing_actual and the suite silently drops from 122 captures to
419- # 107 - all still listed, just unproduced. We fail when the number of
420- # missing screenshots exceeds CN1SS_ALLOWED_MISSING (default 0: no missing
421- # screenshots tolerated). A pipeline with a known, steady-state gap raises
422- # its own tolerance (e.g. the iOS jobs set CN1SS_ALLOWED_MISSING=2 for
423- # OrientationLock + MutableImageReadback, which do not render on either iOS
424- # backend). Set CN1SS_SKIP_COUNT_CHECK=1 to bypass while intentionally
425- # seeding a brand new reference set. Enforced on every pipeline that opts
426- # into strict mode (CN1SS_FAIL_ON_MISMATCH=1).
427- if [ " ${CN1SS_SKIP_COUNT_CHECK:- 0} " != " 1" ]; then
428- local missing_count allowed_missing
429- missing_count=$( cn1ss_count_missing " $compare_json_out " )
430- missing_count=" ${missing_count// [^0-9]/ } " ; : " ${missing_count:= 999999} "
457+ # ------------------------------------------------------------------------
458+ # Screenshot count-regression guard. DO NOT WEAKEN OR REMOVE.
459+ #
460+ # Every golden in the reference directory must be re-produced and compared
461+ # on every run. The reference set is the manifest: expected == number of
462+ # golden PNGs (cn1ss_count_reference), covered == goldens that were rendered
463+ # and compared (cn1ss_count_covered, i.e. status equal|different). When a
464+ # test hangs, crashes or its frame never gets delivered, it drops out of the
465+ # comparison entirely and `covered` falls below `expected` -- which is the
466+ # ONLY reliable signal, because a dropped test leaves no per-test record
467+ # behind to count (the older missing_actual-only check was blind to this and
468+ # let suites silently shrink from 124 captures to 58 while still going green).
469+ #
470+ # We fail when expected - covered exceeds CN1SS_ALLOWED_MISSING (default 0:
471+ # no uncovered goldens tolerated). A pipeline with a known, steady-state gap
472+ # sets its own tolerance and documents why (e.g. the iOS jobs allow 2 for
473+ # OrientationLock + MutableImageReadback, which do not render on the iOS
474+ # backends). CN1SS_MIN_SCREENSHOTS can raise the floor above the on-disk
475+ # golden count (useful before the reference set is fully seeded). The only
476+ # bypass is CN1SS_SKIP_COUNT_CHECK=1, reserved for the deliberate, manual act
477+ # of seeding a brand new reference set; it is loud in the log so it can never
478+ # be mistaken for normal operation.
479+ # ------------------------------------------------------------------------
480+ if [ " ${CN1SS_SKIP_COUNT_CHECK:- 0} " = " 1" ]; then
481+ cn1ss_log " WARNING: CN1SS_SKIP_COUNT_CHECK=1 -- screenshot count-regression guard BYPASSED. This must only be used while intentionally seeding a new reference set."
482+ else
483+ local expected_count covered_count uncovered_count allowed_missing min_floor
484+ expected_count=$( cn1ss_count_reference " $ref_dir " )
485+ expected_count=" ${expected_count// [^0-9]/ } " ; : " ${expected_count:= 0} "
486+ min_floor=" ${CN1SS_MIN_SCREENSHOTS:- 0} "
487+ min_floor=" ${min_floor// [^0-9]/ } " ; : " ${min_floor:= 0} "
488+ if [ " $min_floor " -gt " $expected_count " ]; then
489+ expected_count=" $min_floor "
490+ fi
491+ covered_count=$( cn1ss_count_covered " $compare_json_out " )
492+ covered_count=" ${covered_count// [^0-9]/ } " ; : " ${covered_count:= 0} "
431493 allowed_missing=" ${CN1SS_ALLOWED_MISSING:- 0} "
432494 allowed_missing=" ${allowed_missing// [^0-9]/ } " ; : " ${allowed_missing:= 0} "
433- if [ " $missing_count " -gt " $allowed_missing " ]; then
434- cn1ss_log " FATAL: $missing_count screenshot(s) missing (no image produced) but only $allowed_missing tolerated (CN1SS_ALLOWED_MISSING)."
435- cn1ss_log " A test failed to emit its screenshot - the suite likely hung or crashed before finishing. See the 'missing actual' entries above."
495+ uncovered_count=$(( expected_count - covered_count ))
496+ [ " $uncovered_count " -lt 0 ] && uncovered_count=0
497+ if [ " $uncovered_count " -gt " $allowed_missing " ]; then
498+ cn1ss_log " FATAL: $uncovered_count of $expected_count expected screenshot(s) were not produced and compared (only $covered_count covered); $allowed_missing tolerated (CN1SS_ALLOWED_MISSING)."
499+ cn1ss_log " A test failed to emit its screenshot, or the suite hung/crashed before finishing. The golden set under the comparison directory is the source of truth for how many screenshots must be produced."
436500 return 17
437501 fi
438- cn1ss_log " Missing-screenshot check passed: $missing_count missing <= $allowed_missing tolerated."
502+ cn1ss_log " Screenshot count check passed: $covered_count of $expected_count goldens covered ( $uncovered_count uncovered <= $allowed_missing tolerated) ."
439503 fi
440504 fi
441505
0 commit comments