Update docker/build-push-action digest to 53b7df9 #3748

Workflow file for this run

	name: CI

	on:
	workflow_dispatch:
	merge_group: {}
	push:
	branches: [main]
	paths-ignore:
	# NOTE: pyproject.toml IS NOT ignored. Auto-release uses a PR-based
	# bump flow (auto-release.yml → opens auto/bump-vX.Y.Z PR with
	# auto-merge); the resulting squash-merge to main carries only a
	# pyproject.toml diff and MUST trigger CI so the workflow_run
	# listener in auto-release.yml can fire and tag + publish. The old
	# CI→release→CI loop concern is already handled by the bot-author
	# filter and the "tag exists?" check inside auto-release.yml.
	# Documentation & prose
	- "docs/**"
	- "!docs/operations/ci-topology.md"
	- "!docs/observability/**"
	- "*.md"
	- "!README.md"
	- "LICENSE"
	- "CONTRIBUTORS.md"
	# Runtime state (never committed)
	- ".sdd/**"
	# Non-Python packages & SDKs
	- "sdk/typescript/**"
	- "packages/vscode/**"
	- "packages/cursor-plugin/**"
	- "packaging/**"
	- "Formula/**"
	# Deployment & infra configs
	- "deploy/**"
	- "docker/**"
	- "docker-compose.yaml"
	- "Dockerfile"
	- "action.yml"
	- "action/**"
	# CI tool configs (don't re-run tests for codecov/sonar tweaks)
	- "codecov.yml"
	- "sonar-project.properties"
	# GitHub meta (templates, labels, funding - not ci.yml)
	- ".github/ISSUE_TEMPLATE/**"
	- ".github/FUNDING.yml"
	- ".github/CODEOWNERS"
	- ".github/pull_request_template.md"
	- ".github/dependabot.yml"
	- ".github/labeler.yml"
	- ".github/release-drafter.yml"
	- ".github/copilot-instructions.md"
	- ".github/codeql/**"
	# Non-code project files
	- "marketing/**"
	- "benchmarks/**"
	- "examples/**"
	- "plans/**"
	- "agents/**"
	- "commands/**"
	- "rules/**"
	- ".bernstein/**"
	- ".plugin/**"
	- "scripts/gen_tickets_*.py"
	- "scripts/gen_roadmap_*.py"
	- "scripts/generate_benchmark_docs.py"
	pull_request:
	paths-ignore:
	# Documentation & prose
	- "docs/**"
	- "!docs/operations/ci-topology.md"
	- "!docs/observability/**"
	- "*.md"
	- "!README.md"
	- "LICENSE"
	- "CONTRIBUTORS.md"
	# Runtime state (never committed)
	- ".sdd/**"
	# Non-Python packages & SDKs
	- "sdk/typescript/**"
	- "packages/vscode/**"
	- "packages/cursor-plugin/**"
	- "packaging/**"
	- "Formula/**"
	# Deployment & infra configs
	- "deploy/**"
	- "docker/**"
	- "docker-compose.yaml"
	- "Dockerfile"
	- "action.yml"
	- "action/**"
	# CI tool configs
	- "codecov.yml"
	- "sonar-project.properties"
	# GitHub meta
	- ".github/ISSUE_TEMPLATE/**"
	- ".github/FUNDING.yml"
	- ".github/CODEOWNERS"
	- ".github/pull_request_template.md"
	- ".github/dependabot.yml"
	- ".github/labeler.yml"
	- ".github/release-drafter.yml"
	- ".github/copilot-instructions.md"
	- ".github/codeql/**"
	# Non-code project files
	- "marketing/**"
	- "benchmarks/**"
	- "examples/**"
	- "plans/**"
	- "agents/**"
	- "commands/**"
	- "rules/**"
	- ".bernstein/**"
	- ".plugin/**"
	- "scripts/gen_tickets_*.py"
	- "scripts/gen_roadmap_*.py"
	- "scripts/generate_benchmark_docs.py"

	# Concurrency policy for heavy CI (see #1273):
	#
	# - Pull requests: per-PR group (keyed off pull_request.number, stable
	# across pushes to the same PR), cancel-in-progress=true. New commits
	# on the same PR cancel older CI runs so reviewers only ever wait on
	# the latest push and we don't burn minutes on stale SHAs.
	#
	# - Pushes to main: branch-scoped group, cancel-in-progress=true.
	# A rapid wave of merges can cancel older heavy CI runs on main so
	# the latest push supersedes stale ones. Per-SHA main observability is
	# provided separately by main-sha-marker.yml, which is keyed by
	# github.sha and is not cancellable by newer main pushes.
	#
	# The conditional in `group:` selects the right key per event type, and
	# `cancel-in-progress` fires for both pull_request and push: a rapid
	# wave of merges on `main` (13 commits in 90 min during the May 2026
	# META wave) used to saturate the runner queue because each sha-unique
	# group kept its own full-matrix run alive. Branch-scoped grouping +
	# always-cancel-in-progress lets the latest push supersede stale ones.
	concurrency:
	group: ci-${{ github.workflow }}-${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) \|\| format('branch-{0}', github.ref) }}
	cancel-in-progress: true

	# Default-deny for the workflow token; individual jobs escalate only
	# the scopes they actually need (Scorecard token-permissions, Sonar S8264).
	permissions:
	contents: read

	jobs:
	# ─── Planner (determines which downstream jobs may legitimately skip) ──
	#
	# Inspired by pypa/pip's CI: a planner job classifies the PR (or push)
	# diff and emits boolean outputs. Downstream skips are then either
	# "intentional" (planner said so) or suspicious (cancelled / crashed).
	# The aggregator gate at the bottom uses these outputs to distinguish
	# the two and refuses to pass on suspicious skips. See #1273.
	determine-changes:
	name: Determine changes
	runs-on: ubuntu-latest
	timeout-minutes: 3
	permissions:
	contents: read
	outputs:
	python_changed: ${{ steps.classify.outputs.python_changed }}
	tests_changed: ${{ steps.classify.outputs.tests_changed }}
	gha_workflows_changed: ${{ steps.classify.outputs.gha_workflows_changed }}
	docs_only: ${{ steps.classify.outputs.docs_only }}
	# macos_sensitive: true when the diff touches platform-specific code
	# paths whose macOS behaviour cannot be exercised on ubuntu/windows
	# runners. Used by the `test` matrix gate (see #1468) to skip the
	# macos-latest cells on PRs that do not need them, freeing the
	# macOS hosted-runner pool during burst-merge waves. The nightly
	# workflow (ci-macos-nightly.yml) provides the safety-net coverage.
	macos_sensitive: ${{ steps.classify.outputs.macos_sensitive }}
	steps:
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	fetch-depth: 0
	- id: classify
	name: Classify changed paths
	env:
	BASE_REF: ${{ github.base_ref }}
	EVENT_NAME: ${{ github.event_name }}
	run: \|
	# On pull_request, diff against the base branch. On push to main
	# (or any other push event), diff against the previous commit so
	# the planner stays useful for filter-skipped downstream jobs.
	#
	# NOTE: checkout above uses fetch-depth: 0, so HEAD has full history.
	# However, origin/${BASE_REF} is NOT fetched by default in the PR
	# checkout (which is on the merge ref) - we must fetch it explicitly.
	# Using --depth=1 here previously caused `...` (merge-base) diffs to
	# fail when the merge-base wasn't in the shallow window, especially
	# after parallel main merges. Fetch the base ref WITHOUT --depth.
	CHANGED=""
	diff_failed=false
	if [ "$EVENT_NAME" = "pull_request" ]; then
	# Fetch base ref with full history (no --depth) so merge-base resolves.
	git fetch --no-tags origin "refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" \|\| true
	# If the local clone is still shallow for any reason, unshallow it.
	if [ -f "$(git rev-parse --git-dir)/shallow" ]; then
	git fetch --unshallow origin "${BASE_REF}" \|\| git fetch origin "${BASE_REF}" \|\| true
	fi
	# Diagnostic output - keeps future flakes debuggable in the action log.
	echo "::group::git diagnostic"
	git remote -v \|\| true
	git log --oneline -5 \|\| true
	echo "origin/${BASE_REF} -> $(git rev-parse "origin/${BASE_REF}" 2>&1 \|\| echo UNRESOLVED)"
	echo "HEAD -> $(git rev-parse HEAD 2>&1 \|\| echo UNRESOLVED)"
	echo "::endgroup::"
	if ! CHANGED=$(git diff --name-only "origin/${BASE_REF}...HEAD" 2>&1); then
	echo "::warning::git diff failed against origin/${BASE_REF}; falling back to safe over-broad result"
	echo "diff stderr: $CHANGED"
	diff_failed=true
	fi
	else
	# `before` may be 000... on first push of a branch; fall back to HEAD~1.
	BEFORE="${{ github.event.before }}"
	if [ -z "$BEFORE" ] \|\| [ "$BEFORE" = "0000000000000000000000000000000000000000" ]; then
	CHANGED=$(git diff --name-only "HEAD~1...HEAD" 2>/dev/null \|\| git ls-files)
	else
	CHANGED=$(git diff --name-only "${BEFORE}...HEAD" 2>/dev/null \|\| git ls-files)
	fi
	fi
	# Fail-safe fallback: when diff fails for any reason, emit the safe
	# over-broad classification so downstream jobs run anyway. Correctness
	# wins over efficiency - never fail this job for a clone-shape issue.
	if [ "$diff_failed" = "true" ]; then
	{
	echo "python_changed=true"
	echo "tests_changed=true"
	echo "gha_workflows_changed=true"
	echo "docs_only=false"
	echo "macos_sensitive=true"
	} \| tee -a "$GITHUB_OUTPUT"
	exit 0
	fi
	echo "Changed files:"
	printf '%s\n' "$CHANGED" \| sed 's/^/ /'

	# Pure-shell classification - auditable in `actionlint`, no
	# sub-shell variable round-tripping through python.
	python_changed=false
	tests_changed=false
	gha_workflows_changed=false
	docs_only=true
	macos_sensitive=false
	# Classify each changed path via grep. Using grep instead of
	# bash `case` to avoid linter warnings on overlapping patterns
	# (case-globs cannot cross slashes anyway).
	while IFS= read -r f; do
	[ -z "$f" ] && continue
	matched_meta=false
	if printf '%s\n' "$f" \| grep -Eq '^src/.*\.py$'; then
	python_changed=true; docs_only=false; matched_meta=true
	fi
	if printf '%s\n' "$f" \| grep -Eq '^tests/'; then
	tests_changed=true; docs_only=false; matched_meta=true
	fi
	if printf '%s\n' "$f" \| grep -Eq '^\.github/workflows/.*\.(yml\|yaml)$'; then
	gha_workflows_changed=true; docs_only=false; matched_meta=true
	fi
	if printf '%s\n' "$f" \| grep -Eq '^docs/\|\.md$\|^LICENSE$\|^CONTRIBUTORS\.md$'; then
	matched_meta=true
	fi
	if [ "$matched_meta" = "false" ]; then
	docs_only=false
	fi
	# macOS-sensitive paths (see #1468). Modules with branches on
	# `sys.platform == "darwin"` or that wrap macOS-only APIs
	# (Keychain via `keyring`, AppKit notifications, Foundation
	# clipboard, `launchd` daemon installer). When any of these
	# change, the macOS matrix cell must run on the PR to catch
	# regressions before merge. Otherwise it skips and the
	# nightly ci-macos-nightly.yml workflow catches drift.
	if printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/tunnels/'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/daemon/'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/config/platform_compat\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/security/vault/'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/security/resource_limits\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/persistence/runtime_state\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/communication/notifications\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/core/preview/'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/tui/clipboard\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/cli/display/splash_screen\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^src/bernstein/bridges/openclaw_gateway\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^tests/integration/test_adapter_e2e\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^scripts/run_tests\.py$'; then
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^\.github/workflows/ci\.yml$'; then
	# Changes to the CI workflow itself must re-validate the
	# macOS cell on the PR so we never ship a broken matrix
	# config (the nightly workflow runs the merged config).
	macos_sensitive=true
	elif printf '%s\n' "$f" \| grep -Eq '^\.github/workflows/ci-macos-nightly\.yml$'; then
	macos_sensitive=true
	fi
	done <<< "$CHANGED"
	# If nothing changed at all (e.g. workflow_dispatch on a clean ref),
	# treat as "not docs-only" so we don't intentionally skip tests.
	if [ -z "$CHANGED" ]; then
	docs_only=false
	fi
	echo "python_changed=$python_changed" \| tee -a "$GITHUB_OUTPUT"
	echo "tests_changed=$tests_changed" \| tee -a "$GITHUB_OUTPUT"
	echo "gha_workflows_changed=$gha_workflows_changed" \| tee -a "$GITHUB_OUTPUT"
	echo "docs_only=$docs_only" \| tee -a "$GITHUB_OUTPUT"
	echo "macos_sensitive=$macos_sensitive" \| tee -a "$GITHUB_OUTPUT"

	# ─── Fast checks (never cancelled, <2 min each) ───────────────────────

	repo-hygiene:
	name: Repo hygiene
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	# Full clone so ``bernstein agents-md verify`` can resolve the
	# default branch via ``git rev-parse --verify origin/main``.
	# The default depth=1 single-ref fetch leaves origin/main
	# unfetched and the verify step silently drops the
	# git-workflow section, producing drift against locally-synced
	# content.
	fetch-depth: 0
	- name: Establish origin/HEAD
	# actions/checkout does not set ``refs/remotes/origin/HEAD`` even
	# with full fetch. The generator's first-choice resolver reads
	# that symbolic-ref; this step makes the result match a
	# developer checkout where ``git remote set-head origin -a``
	# has run.
	run: git remote set-head origin -a
	- name: Assert .sdd is not tracked
	run: \|
	TRACKED="$(git ls-files '.sdd')"
	if [ -n "$TRACKED" ]; then
	echo "::error::.sdd must never be committed to git"
	printf '%s\n' "$TRACKED"
	exit 1
	fi
	- name: Check for merge conflict markers in source files
	run: \|
	CONFLICTS=""
	for f in $(git ls-files -- '.py' '.yaml' '.yml' '.md' '*.toml'); do
	if grep -qE '^(<{7} \|>{7} )' "$f" 2>/dev/null; then
	CONFLICTS="$CONFLICTS $f"
	echo "::error file=$f::Unresolved merge conflict markers in $f"
	fi
	done
	if [ -n "$CONFLICTS" ]; then
	exit 1
	fi
	- name: Check Python syntax in scripts/
	run: \|
	ERRORS=""
	for f in scripts/*.py; do
	if ! python3 -m py_compile "$f" 2>/dev/null; then
	ERRORS="$ERRORS $f"
	echo "::error file=$f::Syntax error in $f"
	fi
	done
	if [ -n "$ERRORS" ]; then
	exit 1
	fi
	- uses: ./.github/actions/bootstrap
	- name: AGENTS.md cross-CLI sync drift check
	# Fails if AGENTS.md / CLAUDE.md / CONVENTIONS.md / .aider.conf.yml /
	# .goosehints / .cursor/rules/*.mdc drift from `bernstein agents-md
	# generate`. Run `uv run bernstein agents-md sync` locally to fix.
	run: uv run bernstein agents-md verify --workdir .

	lint:
	name: Lint
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- run: uv run ruff check src/
	- run: uv run ruff format --check src/
	- name: Architecture contracts (import-linter)
	run: uv run lint-imports
	- name: Route broad-except policy (#1723)
	# Fails if a bare `except Exception:` appears in
	# src/bernstein/core/routes/**.py without a `bot-ack:` or
	# `intentional-broad-except` marker within 3 lines.
	run: uv run python scripts/check_routes_broad_except.py

	spelling:
	name: Spelling (typos)
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: crate-ci/typos@bee27e3a4fd1ea2111cf90ab89cd076c870fce14 # v1

	actionlint:
	name: Workflow lint
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: reviewdog/action-actionlint@6fb7acc99f4a1008869fa8a0f09cfca740837d9d # v1
	with:
	reporter: github-check
	level: error
	fail_level: error
	actionlint_flags: -shellcheck=
	# `actionlint_flags: -shellcheck=` disables the embedded
	# shellcheck (empty path = disabled). Without this our workflows
	# emit ~21 SC2016/SC2221/SC2222/SC2034 warnings that flood the
	# 22-annotation GitHub check_run cap, which is itself reported
	# as an error and fails reviewdog. `level` is a severity tag,
	# not a filter, so it alone does not stop the cap being hit.
	# `fail_on_error` is deprecated; `fail_level: error` replaces it.

	lineage-gate:
	# ADR-009 Lineage Gate - required check. CI generates a minimal signed
	# lineage fixture and verifies it so the job always exercises the gate
	# logic even when runtime `.sdd/lineage/log.jsonl` is absent.
	name: Lineage Gate
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run lineage gate
	run: \|
	set -euo pipefail
	LINEAGE_FIXTURE="${RUNNER_TEMP}/lineage-fixture"
	export LINEAGE_FIXTURE
	uv run python - <<'PY'
	import hashlib
	import json
	import os
	from pathlib import Path

	from bernstein.core.lineage.entry import LineageEntry, canonicalise, entry_hash
	from bernstein.core.lineage.identity import generate_keypair, sign_detached

	root = Path(os.environ["LINEAGE_FIXTURE"])
	log = root / "lineage" / "log.jsonl"
	cards = root / "agents" / "agent:ci"
	log.parent.mkdir(parents=True, exist_ok=True)
	cards.mkdir(parents=True, exist_ok=True)

	private_key, public_key = generate_keypair()
	(cards / "card.json").write_text(
	json.dumps(
	{
	"protocolVersion": "a2a/1.0",
	"agent_id": "agent:ci",
	"kid": "ci-fixture",
	"public_key_pem": public_key,
	}
	),
	encoding="utf-8",
	)

	entry = LineageEntry(
	v=1,
	artefact_path="ci/lineage-fixture.txt",
	artefact_kind="file",
	content_hash="sha256:" + ("1" * 64),
	parent_hashes=[],
	agent_id="agent:ci",
	agent_card_kid="ci-fixture",
	tool_call_id="ci-lineage-gate",
	span_id="ci-lineage-gate",
	ts_ns=1,
	operator_hmac="0" * 64,
	)
	canonical = canonicalise(entry)
	log.write_bytes(canonical + b"\n")

	jws = sign_detached(canonical, private_key, kid="ci-fixture")
	path_hash = hashlib.sha256(entry.artefact_path.encode()).hexdigest()
	entry_digest = entry_hash(entry).replace("sha256:", "")
	sig_dir = log.parent / "signatures" / path_hash[:2] / path_hash
	sig_dir.mkdir(parents=True, exist_ok=True)
	(sig_dir / f"{entry_digest}.jws").write_text(jws, encoding="utf-8")
	PY
	uv run python scripts/check_lineage.py \
	--log "${LINEAGE_FIXTURE}/lineage/log.jsonl" \
	--cards "${LINEAGE_FIXTURE}/agents"

	# ─── Medium checks (cancel old runs, 5-20 min) ────────────────────────

	typecheck:
	name: Type check report
	needs: [lint] # only run if lint passes (fast-fail)
	runs-on: ubuntu-latest
	timeout-minutes: 20
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run pyright (advisory)
	run: \|
	uv run pyright 2>&1 \| tail -1 \|\| true
	echo "::notice::Typecheck is advisory while module decomposition shims are being typed"

	dead-code:
	name: Dead code (Vulture)
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- run: uv tool install vulture
	- run: vulture src/ vulture_whitelist.py --min-confidence 80 --exclude "tests,docs"

	dist-size:
	name: Package size check
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Build and check size
	run: \|
	uv build
	MAX_SIZE=$((10 * 1024 * 1024))
	for f in dist/*.whl; do
	SIZE=$(stat -c%s "$f" 2>/dev/null \|\| stat -f%z "$f")
	echo "$f: $SIZE bytes"
	if [ "$SIZE" -gt "$MAX_SIZE" ]; then
	echo "::error::Wheel $f exceeds 10MB limit ($SIZE bytes)"
	exit 1
	fi
	done
	- name: Upload built wheel for downstream install-smoke jobs
	# Shared artifact consumed by install-smoke-pipx and
	# install-smoke-uv. Building once and reusing avoids running
	# `uv build` on every matrix cell (6 + 2 = 8 cells today) and
	# ensures every smoke runs against bit-identical wheels.
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: install-smoke-wheel
	path: dist/*.whl
	if-no-files-found: error
	retention-days: 1

	# ─── Install-path smoke (built wheel, not editable) ──────────────────
	#
	# The two install paths the README documents first - pipx and
	# `uv tool install` - have no other coverage that exercises the
	# built wheel end to end. Editable installs (`pip install -e .`)
	# hide a class of packaging bugs: missing package-data, broken
	# `console_scripts`, entry-point loading errors, dependency-resolver
	# regressions. These jobs install from the wheel produced by
	# `dist-size`, then run `bernstein --version`, `bernstein --help`,
	# and `bernstein doctor --json` to confirm the dominant install path
	# documented first in README still works end to end.
	install-smoke-pipx:
	name: Install smoke - pipx (${{ matrix.os }}, Python ${{ matrix.python-version }})
	needs: [dist-size]
	runs-on: ${{ matrix.os }}
	timeout-minutes: 15
	permissions:
	contents: read
	strategy:
	fail-fast: false
	matrix:
	os: [ubuntu-latest, macos-latest]
	# Matrix tracks `requires-python = ">=3.12"` in pyproject.toml.
	# 3.11 is intentionally excluded: pipx / uv install would refuse
	# the wheel for a Python the package does not support, which
	# would just confirm the floor we already pin.
	python-version: ["3.12", "3.13"]
	steps:
	- name: Harden runner (audit mode)
	if: runner.os == 'Linux'
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
	with:
	python-version: ${{ matrix.python-version }}
	- name: Download built wheel
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	name: install-smoke-wheel
	path: dist
	- name: Fail fast if wheel exceeds 25 MB
	# Independent of the 10 MB gate in `dist-size`: that one tracks
	# day-to-day growth and is tuned tight. This one is the smoke
	# job's own hard ceiling - catches accidental bundling
	# regressions (binary blobs, vendored deps) that would slow
	# pipx installs for everyone.
	shell: bash
	run: \|
	set -euo pipefail
	MAX_SIZE=$((25 * 1024 * 1024))
	shopt -s nullglob
	wheels=(dist/*.whl)
	if [ "${#wheels[@]}" -eq 0 ]; then
	echo "::error::no wheel found under dist/"
	exit 1
	fi
	for f in "${wheels[@]}"; do
	SIZE=$(stat -c%s "$f" 2>/dev/null \|\| stat -f%z "$f")
	echo "$f: $SIZE bytes"
	if [ "$SIZE" -gt "$MAX_SIZE" ]; then
	echo "::error::wheel $f exceeds 25 MB install-smoke ceiling ($SIZE bytes)"
	exit 1
	fi
	done
	- name: Install uv (SHA-pinned, vendors pipx)
	# Scorecard pinned-dependencies: pip cannot be hash-pinned for a
	# single bootstrap step without a maintained requirements file,
	# so we route the pipx install through SHA-pinned uv instead.
	# `uv tool install pipx` puts pipx on PATH via uv's tool dir.
	uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0
	with:
	enable-cache: true
	- name: Install pipx via uv
	shell: bash
	run: \|
	set -euo pipefail
	uv tool install pipx
	# Expose uv-managed tool shims and pipx's own bin dir on PATH.
	UV_TOOL_BIN_DIR="$(uv tool dir --bin)"
	echo "$UV_TOOL_BIN_DIR" >> "$GITHUB_PATH"
	echo "$HOME/.local/bin" >> "$GITHUB_PATH"
	"$UV_TOOL_BIN_DIR/pipx" ensurepath
	- name: pipx install the built wheel
	# Install from the wheel, never editable. Editable installs
	# bypass package-data inclusion and entry-point registration -
	# the exact regression class this job is here to catch.
	shell: bash
	run: \|
	set -euo pipefail
	wheels=(dist/*.whl)
	pipx install --python "$(which python)" "${wheels[0]}"
	- name: bernstein --version (exit zero)
	shell: bash
	run: bernstein --version
	- name: bernstein --help (exit zero)
	shell: bash
	run: bernstein --help
	- name: Verify packaged resources load via importlib
	# Confirms package-data survived the wheel build by reading a
	# bundled MCP tool schema and a force-included default template
	# through importlib.resources. Uses the pipx-managed interpreter
	# so we exercise the same site-packages layout end users get.
	# We deliberately avoid `bernstein doctor --json` here: doctor
	# is a dev-environment diagnostic that probes optional tools
	# (uv, ruff, pytest, pyright) and git context, neither of which
	# exists in a fresh pipx venv. This probe is narrowly scoped to
	# the regression class the smoke job is meant to catch.
	shell: bash
	run: \|
	set -euo pipefail
	BERNSTEIN_PYTHON="$(pipx environment --value PIPX_LOCAL_VENVS)/bernstein/bin/python"
	"$BERNSTEIN_PYTHON" -c '
	import importlib
	import importlib.resources as ir

	cli_mod = importlib.import_module("bernstein.cli.main")
	assert callable(getattr(cli_mod, "cli")), "bernstein.cli.main:cli missing"

	schema_pkg = ir.files("bernstein.mcp.tool_schemas")
	schemas = [p.name for p in schema_pkg.iterdir() if p.name.endswith(".json")]
	assert schemas, "no MCP tool schemas shipped in wheel"

	tpl_pkg = ir.files("bernstein._default_templates")
	assert any(tpl_pkg.iterdir()), "no default templates shipped in wheel"
	print(f"packaged resources OK: {len(schemas)} MCP schemas")
	'

	install-smoke-uv:
	# Leaner mirror of install-smoke-pipx for the `uv tool install`
	# path. uv is the second install command documented in the README
	# and rounds out coverage for the two paths most likely to surface
	# packaging regressions. We run a smaller matrix (one Python
	# version per OS) because the pipx job already exercises the
	# cross-Python combinatorics; uv shares the same wheel and
	# console-scripts entry point, so the marginal coverage of a full
	# 6-cell matrix is not worth the runner spend.
	name: Install smoke - uv tool (${{ matrix.os }})
	needs: [dist-size]
	runs-on: ${{ matrix.os }}
	timeout-minutes: 15
	permissions:
	contents: read
	strategy:
	fail-fast: false
	matrix:
	os: [ubuntu-latest, macos-latest]
	steps:
	- name: Harden runner (audit mode)
	if: runner.os == 'Linux'
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0
	with:
	python-version: "3.14"
	- name: Download built wheel
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	name: install-smoke-wheel
	path: dist
	- name: Fail fast if wheel exceeds 25 MB
	shell: bash
	run: \|
	set -euo pipefail
	MAX_SIZE=$((25 * 1024 * 1024))
	shopt -s nullglob
	wheels=(dist/*.whl)
	if [ "${#wheels[@]}" -eq 0 ]; then
	echo "::error::no wheel found under dist/"
	exit 1
	fi
	for f in "${wheels[@]}"; do
	SIZE=$(stat -c%s "$f" 2>/dev/null \|\| stat -f%z "$f")
	echo "$f: $SIZE bytes"
	if [ "$SIZE" -gt "$MAX_SIZE" ]; then
	echo "::error::wheel $f exceeds 25 MB install-smoke ceiling ($SIZE bytes)"
	exit 1
	fi
	done
	- name: uv tool install the built wheel
	# `uv tool install` is the install command the README documents
	# second. As with pipx we install from the wheel, never
	# editable, so packaging bugs surface here and not in user
	# reports.
	shell: bash
	run: \|
	set -euo pipefail
	wheels=(dist/*.whl)
	uv tool install "${wheels[0]}"
	# uv tool bin dir on Linux/macOS
	echo "$HOME/.local/bin" >> "$GITHUB_PATH"
	- name: bernstein --version (exit zero)
	shell: bash
	run: bernstein --version
	- name: bernstein --help (exit zero)
	shell: bash
	run: bernstein --help
	- name: Verify packaged resources load via importlib
	# Mirror of the pipx job's resource probe but resolved through
	# the uv-managed tool venv. See pipx counterpart for rationale.
	shell: bash
	run: \|
	set -euo pipefail
	UV_TOOL_DIR="$(uv tool dir)"
	BERNSTEIN_PYTHON="$UV_TOOL_DIR/bernstein/bin/python"
	if [ ! -x "$BERNSTEIN_PYTHON" ]; then
	echo "::error::cannot find uv-managed bernstein interpreter at $BERNSTEIN_PYTHON"
	exit 1
	fi
	"$BERNSTEIN_PYTHON" -c '
	import importlib
	import importlib.resources as ir

	cli_mod = importlib.import_module("bernstein.cli.main")
	assert callable(getattr(cli_mod, "cli")), "bernstein.cli.main:cli missing"

	schema_pkg = ir.files("bernstein.mcp.tool_schemas")
	schemas = [p.name for p in schema_pkg.iterdir() if p.name.endswith(".json")]
	assert schemas, "no MCP tool schemas shipped in wheel"

	tpl_pkg = ir.files("bernstein._default_templates")
	assert any(tpl_pkg.iterdir()), "no default templates shipped in wheel"
	print(f"packaged resources OK: {len(schemas)} MCP schemas")
	'

	# ─── CI hardening 2026 (medium, parallel, ≤8 min each) ───────────────

	property-tests:
	# Hypothesis property suite. PR-time runs the `smoke` profile (50
	# examples per property) so every file finishes in well under a
	# minute. Catches hash-chain / signature / canonicalisation
	# regressions that escape unit tests' fixed inputs.
	name: Property tests (Hypothesis smoke)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run Hypothesis property suite (smoke profile)
	env:
	HYPOTHESIS_PROFILE: smoke
	run: uv run pytest tests/property/ -q --no-cov --timeout=60

	snapshot-tests:
	# Syrupy snapshot suite. Locks JSONL field order / shape for the
	# audit log + lineage record so silent wire-format drift is caught
	# before downstream parsers break.
	name: Snapshot tests (syrupy)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run snapshot tests
	run: uv run pytest tests/snapshot/ -q --no-cov

	schemathesis-smoke:
	# OpenAPI fuzz on the FastAPI task server. Smoke profile fuzzes
	# only the critical-surface allow-list (task CRUD, health,
	# openapi.json, metrics) with 5 examples per endpoint and the
	# `not_a_server_error` check. Heavier sweeps live in nightly.
	name: Schemathesis smoke
	needs: [lint]
	runs-on: ubuntu-latest
	# Smoke profile wall-clock runs ~7m30s, which raced the previous
	# 8-minute window and got cancelled, failing the CI gate aggregator.
	# Widen to 20m for headroom; uv setup is already cached via bootstrap
	# and the in-process ASGI schema build is not separately cacheable.
	timeout-minutes: 20
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run Schemathesis smoke profile
	env:
	SCHEMATHESIS_PROFILE: smoke
	BERNSTEIN_AUTH_DISABLED: "1"
	run: uv run pytest tests/contract/ -q --no-cov --timeout=30 -p no:warnings

	semgrep:
	# Project-specific Semgrep rules (.semgrep.yml). ERROR severity
	# fails PR; WARNING is advisory (annotation only).
	name: Semgrep (custom rules)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Install Semgrep (isolated env)
	# semgrep pins click<8.2 + opentelemetry-sdk<1.38, which conflict
	# with our project floors (click>=8.3.3, opentelemetry-sdk>=1.41.1).
	# Install in its own venv via `uv tool` (pipx-equivalent) so its
	# transitive pins never touch the project resolver.
	run: uv tool install semgrep
	- name: Run Semgrep (ERROR-only fail gate)
	run: \|
	uv tool run semgrep --config .semgrep.yml --metrics off --severity ERROR --error src/

	bandit:
	# Bandit static security analyzer. HIGH severity only - there are
	# ~30 MEDIUM findings on main that are accepted patterns
	# (urlopen with timeout, hardcoded localhost in dev). Fails PR
	# only on new HIGH-severity introductions; pre-existing HIGHs are
	# captured in `.bandit-baseline.json` and tracked as follow-ups.
	name: Bandit (security)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 5
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run Bandit (HIGH severity gate via baseline)
	run: \|
	# Baseline file `.bandit-baseline.json` captures the 11 known
	# HIGH findings on main; the gate below fails only on NEW
	# HIGH-severity issues introduced by the PR.
	uv run bandit -r src/ --severity-level high \
	-b .bandit-baseline.json

	pip-audit:
	# PyPI CVE scan. Production deps are strict (any vulnerability
	# fails); dev deps are advisory (continue-on-error). The free
	# PyPI advisory DB is updated daily.
	name: pip-audit (deps)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 8
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Export production requirements (no project, hashed)
	# Audit the resolved lockfile without including bernstein itself.
	# `pip-audit` chokes on editable distributions even with
	# `--skip-editable` under `--strict`, so we feed it a flat
	# requirements file generated from `uv.lock`.
	run: uv export --no-dev --no-emit-project --format requirements-txt -o /tmp/req-prod.txt
	- name: Export dev requirements (no project, hashed)
	run: uv export --no-emit-project --format requirements-txt -o /tmp/req-dev.txt
	- name: Production deps (strict)
	# `--no-deps` is safe: the exported file is fully pinned and the
	# transitive closure is resolved by `uv.lock`. `--disable-pip`
	# avoids spinning up a sub-venv just to run `pip install`.
	#
	# `--ignore-vuln PYSEC-2025-183` (CVE-2025-45768): disputed advisory
	# against pyjwt that affects all released versions (introduced at 0,
	# no fix version published) and is pulled in transitively via `mcp`.
	# The maintainer disputes it because the key length is chosen by the
	# calling application, not the library. There is nothing to upgrade
	# to, so it is ignored with the rationale recorded here.
	run: uv run pip-audit -r /tmp/req-prod.txt --strict --disable-pip --no-deps --ignore-vuln PYSEC-2025-183
	- name: Dev deps (advisory)
	continue-on-error: true
	run: uv run pip-audit -r /tmp/req-dev.txt --strict --disable-pip --no-deps --ignore-vuln PYSEC-2025-183

	beartype:
	# Runtime type-check enforcement via beartype.claw. Imports the
	# public APIs in core.security / core.agents / core.protocols.cluster
	# under @beartype and runs the unit tests for those modules so any
	# type contract violation surfaces as a test failure.
	name: Beartype (type contracts)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 15
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run lineage-signer tests under beartype claw
	# The beartype-claw allow-list is in tests/_beartype_claw.py.
	# Today only ``bernstein.core.persistence.lineage_signer`` is
	# in the strict zone - widen as more modules reach
	# beartype-clean status.
	env:
	BEARTYPE_USE_CLAW: enable
	run: \|
	uv run pytest tests/unit/ -q --no-cov --timeout=120 \
	-k 'lineage_signer or lineage_record or lineage_export'

	mutmut-diff:
	# Mutation testing report on PR-changed files only. Computes a
	# mutation score over the lines actually touched in this PR.
	# Advisory until the command is allowed to fail this job.
	name: Mutation report (diff-only)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 20
	if: github.event_name == 'pull_request'
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	fetch-depth: 0
	- uses: ./.github/actions/bootstrap
	- name: Compute changed src/ files
	id: diff
	env:
	BASE_REF: ${{ github.base_ref }}
	run: \|
	CHANGED=$(git diff --name-only \
	"origin/${BASE_REF}...HEAD" \
	\| grep '^src/.*\.py$' \| tr '\n' ',' \| sed 's/,$//')
	if [ -z "$CHANGED" ]; then
	echo "no Python files changed; skipping mutation step"
	echo "skip=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	echo "paths=$CHANGED" >> "$GITHUB_OUTPUT"
	echo "Mutating: $CHANGED"
	- name: Run mutmut on changed files
	if: steps.diff.outputs.skip != 'true'
	continue-on-error: true # advisory - score reported, not enforced
	env:
	DIFF_PATHS: ${{ steps.diff.outputs.paths }}
	run: \|
	uv sync --group dev
	# mutmut 3.x reads paths_to_mutate from pyproject.toml /
	# mutmut_config.py instead of CLI; we rewrite the config to
	# the diff paths for this run only, then revert.
	cp mutmut_config.py mutmut_config.py.bak
	PATHS=$(echo "${DIFF_PATHS}" \| tr ',' '\n' \| sed 's/.*/ "&",/')
	{
	echo "paths_to_mutate = ["
	echo "$PATHS"
	echo "]"
	echo "test_command = \"python -m pytest tests/unit/ -x -q --no-header --override-ini=addopts=\""
	echo "tests_dir = \"tests/unit/\""
	} > mutmut_config.py
	uv run mutmut run \|\| true
	mv mutmut_config.py.bak mutmut_config.py
	uv run mutmut results \|\| true

	diff-coverage:
	# LEVEL 1 of the coverage ratchet - diff-cover report: lines touched in
	# this PR are compared with the committed diff-coverage floor. The floor lives
	# in .coverage-baseline.json (key: diff_coverage_floor_percent) so it
	# is a single source of truth that the weekly bump workflow nudges up
	# over time (see docs/operations/coverage-ratchet.md). Reuses the
	# coverage.xml uploaded by the main test job.
	#
	# ADVISORY: the diff-cover step is continue-on-error, so this report's
	# result stays `success` even when diff coverage is below the floor.
	# The report is outside the CI-gate `needs` set until PR coverage
	# artifacts are reliable enough for a blocking threshold.
	name: Diff coverage report
	needs: [test]
	runs-on: ubuntu-latest
	timeout-minutes: 5
	if: github.event_name == 'pull_request'
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	fetch-depth: 0
	- uses: ./.github/actions/bootstrap
	- name: Download coverage report
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	name: coverage-report
	continue-on-error: true # main may not have generated coverage on the PR's commit
	- name: Resolve diff-coverage floor from baseline
	id: floor
	# Single source of truth: the floor the weekly bump nudges up.
	# Falls back to 80 if the baseline file is somehow absent so a
	# missing file never silently disables the gate.
	run: \|
	if [ -f .coverage-baseline.json ]; then
	floor=$(uv run python scripts/coverage_ratchet.py show-floor \
	--baseline .coverage-baseline.json)
	else
	echo "::warning::.coverage-baseline.json missing; defaulting diff floor to 80"
	floor=80
	fi
	echo "value=${floor}" >> "$GITHUB_OUTPUT"
	echo "Diff-coverage floor: ${floor}%"
	- name: Run diff-cover
	continue-on-error: true # advisory until all PRs reliably have coverage.xml
	env:
	BASE_REF: ${{ github.base_ref }}
	FLOOR: ${{ steps.floor.outputs.value }}
	run: \|
	if [ -f coverage.xml ]; then
	uv run diff-cover coverage.xml \
	--compare-branch="origin/${BASE_REF}" \
	--fail-under="${FLOOR}" \
	--markdown-report diff-coverage.md
	cat diff-coverage.md >> "$GITHUB_STEP_SUMMARY" \|\| true
	else
	echo "::warning::No coverage.xml found - skipping diff-cover"
	fi

	pyright-strict-zone:
	# Pyright strict mode against the security and protocols.cluster
	# subtrees. The repo-wide pyright run stays advisory (basic mode
	# via tool.pyright); the strict zone fails PR on any new error
	# in the listed packages.
	name: Pyright strict (security + cluster)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	- name: Run pyright strict on the curated allow-list
	# The strict-zone allow-list is in pyrightconfig.strict.json. The
	# repo-wide pyright run uses pyproject.toml [tool.pyright]. Add
	# files to the allow-list as they reach strict cleanliness.
	run: uv run pyright --project pyrightconfig.strict.json

	# ─── Slow checks (cancel old runs, 15-45 min) ─────────────────────────

	adapter-integration:
	# End-to-end adapter tests against the fake-CLI harness in
	# tests/integration/fake_cli. Spawns real subprocesses (no Popen
	# mocks) so PATH-resolution, env filtering, exit-code mapping, and
	# output capture are exercised against actual fork/exec. Skipped on
	# Windows because the wrappers are POSIX shell scripts and the
	# adapters use ``start_new_session=True``; unit tests cover the same
	# argv/env logic on Windows via mocked Popen.
	#
	# macOS coverage moved to the adapter-integration-macos job below
	# (gated by determine-changes.outputs.macos_sensitive, the
	# `macos-needed` label, or push events) to relieve the hosted macOS
	# runner pool during burst-merge waves - see #1468. The
	# ci-macos-nightly.yml workflow runs the full macOS matrix daily as
	# the safety-net for regressions that slip past the path gate.
	name: Adapter integration (fake-CLI)
	needs: [lint]
	runs-on: ubuntu-latest
	timeout-minutes: 15
	permissions:
	contents: read
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.13"]
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	with:
	python-version: ${{ matrix.python-version }}
	- name: Run fake-CLI adapter integration tests
	run: uv run pytest tests/integration/test_adapter_e2e.py -x -q --timeout=60

	adapter-integration-macos:
	# macOS half of adapter-integration. Gated on three conditions (see
	# #1468):
	# 1. push events (incl. merges to main) always run macOS so the
	# release-train sees a fresh signal on every commit that
	# reaches main;
	# 2. PRs whose diff touches macOS-sensitive paths (the planner
	# sets macos_sensitive=true);
	# 3. PRs that carry the `macos-needed` label (operator opt-in for
	# cross-platform work that the path filter cannot detect).
	# Otherwise this job is skipped on PRs and ci-macos-nightly.yml
	# provides the safety net.
	name: Adapter integration (fake-CLI, macOS)
	needs: [lint, determine-changes]
	if: >-
	github.event_name == 'push' \|\|
	needs.determine-changes.outputs.macos_sensitive == 'true' \|\|
	contains(github.event.pull_request.labels.*.name, 'macos-needed')
	runs-on: macos-latest
	timeout-minutes: 15
	permissions:
	contents: read
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.13"]
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	with:
	python-version: ${{ matrix.python-version }}
	- name: Run fake-CLI adapter integration tests
	run: uv run pytest tests/integration/test_adapter_e2e.py -x -q --timeout=60

	test:
	# Sharded across 4 parallel runners per (os, python) cell. The unit
	# suite runs each of ~1.4k test files in its own subprocess (the
	# OOM-avoidance model documented in the coverage step below); per-file
	# Python startup + full-package import is a fixed ~2.7s/file, so at
	# 1.4k files / 4 local workers a single runner spent 25+ min purely on
	# startup churn (the file count crossed a threshold when discovery
	# widened to rglob). Fanning the file list out over TEST_SHARD_COUNT
	# runners (each runs `run_tests.py --shard i/N`, a deterministic
	# disjoint slice) cuts each runner to ~1/4 of the files. The matrix
	# `.result` ci-gate reads is `failure` if ANY shard cell fails, so all
	# shards are still required - no shard can silently skip.
	name: Test (${{ matrix.os }}, Python ${{ matrix.python-version }}, shard ${{ matrix.shard }})
	needs: [lint] # fast-fail: don't waste 45min if lint fails
	runs-on: ${{ matrix.os }}
	# Per-shard the unit suite is ~1/4 of the old wall time. Push-time
	# coverage is collected during the existing ubuntu/3.13 shard runs and
	# combined by coverage-report below, so no shard performs a serial
	# full-suite coverage rerun.
	timeout-minutes: 90
	permissions:
	contents: read
	env:
	# Single source of truth for the shard count. The `--shard i/N`
	# denominator below and the per-shard slice both key off this; bump
	# it (and the `shard:` list) together to rescale the fan-out.
	TEST_SHARD_COUNT: "4"
	# Main push coverage runs are slower than local file runs; keep the
	# per-file guard, but allow heavy adapter contract files to finish.
	BERNSTEIN_TEST_FILE_TIMEOUT_SECONDS: "600"
	strategy:
	fail-fast: false
	matrix:
	# macOS removed from the default matrix to relieve hosted-runner
	# saturation during burst-merge waves (see #1468). The test-macos
	# job below runs the same suite on macOS when the diff is
	# macOS-sensitive, the PR carries the `macos-needed` label, or
	# the event is a push. ci-macos-nightly.yml provides a daily
	# safety-net run of the full macOS matrix.
	os: [ubuntu-latest, windows-latest]
	python-version: ["3.12", "3.13"]
	# Fan the per-file suite out across 4 deterministic shards per
	# cell. Keep this list in sync with TEST_SHARD_COUNT above.
	shard: [1, 2, 3, 4]
	exclude:
	# Coverage/JUnit upload only on ubuntu; skip duplicate slow jobs on Windows for 3.12
	- os: windows-latest
	python-version: "3.12"
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	with:
	python-version: ${{ matrix.python-version }}
	- name: Fetch base ref for impacted-test selection
	if: github.event_name == 'pull_request' && runner.os != 'Windows'
	env:
	BASE_REF: ${{ github.base_ref }}
	run: \|
	git fetch --no-tags --depth=1 origin \
	"refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}"
	- name: Run isolated test suite (Linux/macOS)
	if: runner.os != 'Windows'
	env:
	BASE_REF: ${{ github.base_ref }}
	EVENT_NAME: ${{ github.event_name }}
	PYTHON_VERSION: ${{ matrix.python-version }}
	SHARD: ${{ matrix.shard }}
	SHARD_COUNT: ${{ env.TEST_SHARD_COUNT }}
	run: \|
	# `--shard i/N` runs a deterministic disjoint slice of the
	# discovered (and, on PRs, affected) file list. On PRs the
	# affected set is sharded too, so each runner runs ~1/N of the
	# impacted files.
	coverage_args=()
	if [ "${EVENT_NAME}" = "push" ] && [ "${RUNNER_OS}" = "Linux" ] && [ "${PYTHON_VERSION}" = "3.13" ]; then
	coverage_args=(--coverage)
	fi
	if [ "${EVENT_NAME}" = "pull_request" ] && \
	git rev-parse --verify "refs/remotes/origin/${BASE_REF}" >/dev/null 2>&1; then
	uv run python scripts/run_tests.py --parallel 4 \
	"${coverage_args[@]}" \
	--shard "${SHARD}/${SHARD_COUNT}" \
	--affected "refs/remotes/origin/${BASE_REF}"
	else
	uv run python scripts/run_tests.py --parallel 4 \
	"${coverage_args[@]}" \
	--shard "${SHARD}/${SHARD_COUNT}"
	fi
	- name: Run isolated test suite (Windows)
	if: runner.os == 'Windows'
	continue-on-error: true # Windows has Unix-only tests (chmod, SIGKILL) that are skipped but some may remain
	shell: pwsh
	env:
	SHARD: ${{ matrix.shard }}
	SHARD_COUNT: ${{ env.TEST_SHARD_COUNT }}
	run: uv run python scripts/run_tests.py -x --parallel 4 --shard "${env:SHARD}/${env:SHARD_COUNT}"
	- name: Run capability-matrix spawn-refusal integration tests (Linux/macOS)
	# Pinned to shard 1 so this runs exactly once per (os, python) cell
	# rather than once per shard - the unit suite is sharded, this
	# integration probe is not. Lethal-trifecta integration coverage:
	# every supported OS/Python cell exercises the AgentSpawner
	# spawn-refusal path so we catch regressions in the structural rule
	# before they ship. See
	# tests/integration/test_capability_matrix_spawn_refusal.py.
	if: runner.os != 'Windows' && matrix.shard == 1
	run: \|
	uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py \
	-x -q --tb=short --timeout=120
	- name: Run capability-matrix spawn-refusal integration tests (Windows)
	# Pinned to shard 1 (see Linux/macOS counterpart): one run per cell.
	if: runner.os == 'Windows' && matrix.shard == 1
	shell: pwsh
	run: \|
	uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py `
	-x -q --tb=short --timeout=120
	- name: Prepare coverage shard artifact
	if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event_name == 'push'
	run: \|
	if [ ! -f .coverage ]; then
	echo "::error::.coverage was not generated for shard ${{ matrix.shard }}"
	exit 1
	fi
	mv .coverage ".coverage.${{ matrix.shard }}"
	- name: Upload coverage shard artifact
	if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event_name == 'push'
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: coverage-data-${{ matrix.shard }}
	path: .coverage.${{ matrix.shard }}
	if-no-files-found: error
	include-hidden-files: true
	retention-days: 1

	coverage-report:
	name: Coverage report
	needs: [test]
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	with:
	python-version: "3.13"
	- name: Download coverage shard artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	pattern: coverage-data-*
	path: coverage-shards
	merge-multiple: true
	- name: Merge coverage shards
	run: \|
	shard_count=$(find coverage-shards -name '.coverage.*' -type f \| wc -l \| tr -d ' ')
	if [ "$shard_count" -lt 4 ]; then
	echo "::error::expected 4 coverage shard files, found $shard_count"
	find coverage-shards -maxdepth 2 -type f -print
	exit 1
	fi
	uv run python -m coverage combine coverage-shards/.coverage.*
	uv run python -m coverage xml --ignore-errors -o coverage.xml
	test -s coverage.xml
	- name: Upload coverage report artifact
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: coverage-report
	path: coverage.xml
	if-no-files-found: error
	retention-days: 1
	- name: Upload coverage to Codecov
	uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0
	with:
	files: coverage.xml
	fail_ci_if_error: false
	token: ${{ secrets.CODECOV_TOKEN }}

	test-macos:
	# macOS half of the test matrix, split out for #1468 (macOS hosted
	# runner saturation). Runs unconditionally on push (so every commit
	# that reaches main has a fresh macOS signal), and on PRs only when
	# one of the gate conditions is met:
	# - the planner classified the diff as macos_sensitive (touched
	# a module with `sys.platform == "darwin"` branches, the
	# tunnels driver layer, the daemon installer, the runtime
	# state code, the macOS clipboard/notifications wrappers, or
	# ci.yml / ci-macos-nightly.yml themselves);
	# - the PR carries the `macos-needed` label (operator opt-in for
	# cross-platform work the path filter cannot detect).
	# Otherwise this job is skipped on the PR and ci-macos-nightly.yml
	# provides the safety-net coverage at 06:00 UTC each day.
	#
	# Coverage / JUnit / Codecov uploads are NOT mirrored here; they
	# remain ubuntu-only (matches the previous matrix gating).
	#
	# macOS sharding decision (this PR): the ubuntu `test` job fans out
	# across a `shard` matrix dimension, but macOS CANNOT - this job's
	# `name:` MUST stay the literal string below (branch-protection
	# required-context + required-check-canary.yml both pin it; a matrix
	# `shard` dimension would template the name and break the lock). So
	# instead of sharding macOS, we shrink its per-push workload to a
	# single deterministic quarter of the file list (`--shard 1/MACOS
	# _PUSH_SHARD_COUNT`). This fits the time budget without the 90-min
	# wall the full macOS suite hit, keeps a real (deterministic, ~1/4)
	# macOS signal on every commit that lands on main, and leaves
	# ci-macos-nightly.yml to run the FULL macOS matrix as the safety
	# net. On PRs the job already runs `--affected` (impacted slice), so
	# macos_sensitive PRs still exercise exactly the touched code on macOS.
	# The merge queue (merge_group) skips this job entirely (see the
	# ci-gate MACOS_SKIP_EVENTS handling) - the post-merge push is what
	# carries the macOS signal.
	#
	# Literal job name -- NOT templated. The branch-protection required
	# context for macOS test runs depends on this exact string; when the
	# job is skipped via the `if:` gate, GitHub posts the templated form
	# verbatim, which never matches a required-context rule. The literal
	# form keeps the name resolvable in every state (success, fail, skip).
	# required-check-canary.yml asserts this remains literal.
	name: Test (macos-latest, Python 3.13)
	needs: [lint, determine-changes]
	if: >-
	github.event_name == 'push' \|\|
	needs.determine-changes.outputs.macos_sensitive == 'true' \|\|
	contains(github.event.pull_request.labels.*.name, 'macos-needed')
	runs-on: macos-latest
	timeout-minutes: 90
	permissions:
	contents: read
	env:
	# Per-push macOS runs only shard 1 of this many - a deterministic
	# ~1/4 subset of the file list. ci-macos-nightly.yml runs the full
	# matrix daily as the safety net.
	MACOS_PUSH_SHARD_COUNT: "4"
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.13"]
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- uses: ./.github/actions/bootstrap
	with:
	python-version: ${{ matrix.python-version }}
	- name: Fetch base ref for impacted-test selection
	if: github.event_name == 'pull_request'
	env:
	BASE_REF: ${{ github.base_ref }}
	run: \|
	git fetch --no-tags --depth=1 origin \
	"refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}"
	- name: Run isolated test suite
	env:
	BASE_REF: ${{ github.base_ref }}
	EVENT_NAME: ${{ github.event_name }}
	MACOS_PUSH_SHARD_COUNT: ${{ env.MACOS_PUSH_SHARD_COUNT }}
	run: \|
	# On PRs: run only the affected slice (impacted by the diff).
	# On push to main: run a single deterministic quarter of the
	# file list (`--shard 1/N`) so the macOS cell fits its time
	# budget; ci-macos-nightly.yml runs the full matrix daily.
	if [ "${EVENT_NAME}" = "pull_request" ] && \
	git rev-parse --verify "refs/remotes/origin/${BASE_REF}" >/dev/null 2>&1; then
	uv run python scripts/run_tests.py --parallel 4 --affected "refs/remotes/origin/${BASE_REF}"
	else
	uv run python scripts/run_tests.py --parallel 4 \
	--shard "1/${MACOS_PUSH_SHARD_COUNT}"
	fi
	- name: Run capability-matrix spawn-refusal integration tests
	run: \|
	uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py \
	-x -q --tb=short --timeout=120

	# ─── Post-pipeline (conditional, never cancelled) ──────────────────────

	autofix:
	name: Auto-fix lint
	runs-on: ubuntu-latest
	needs: [lint, repo-hygiene]
	if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.actor != 'bernstein[bot]' && github.actor != 'bernstein-orchestrator[bot]'
	continue-on-error: true
	timeout-minutes: 15
	permissions:
	contents: write
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	fetch-depth: 5
	- name: Check for autofix loop
	id: loop_check
	run: \|
	RECENT=$(git log --oneline -3 --format='%s' \| grep -c "style: auto-fix" \|\| true)
	if [ "$RECENT" -ge 3 ]; then
	echo "::warning::Autofix loop detected - last 3 commits are all auto-fix. Skipping."
	echo "skip=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	- if: steps.loop_check.outputs.skip != 'true'
	uses: ./.github/actions/bootstrap
	- name: Auto-fix ruff
	if: steps.loop_check.outputs.skip != 'true'
	run: \|
	uv run ruff check src/ --fix --unsafe-fixes \|\| true
	uv run ruff format src/
	- name: Purge tracked .sdd files
	if: steps.loop_check.outputs.skip != 'true'
	run: \|
	TRACKED="$(git ls-files '.sdd')"
	if [ -n "$TRACKED" ]; then
	echo "$TRACKED" \| xargs git rm --cached --
	fi
	- name: Commit and push fixes
	if: steps.loop_check.outputs.skip != 'true'
	run: \|
	git add src/ tests/ scripts/
	git diff --cached --quiet && echo "Nothing to fix" && exit 0
	git config user.name "bernstein[bot]"
	git config user.email "bernstein-bot@users.noreply.github.com"
	git commit -m "style: auto-fix ruff lint and format"
	git push

	close-ci-issues:
	name: Close resolved CI issues
	runs-on: ubuntu-latest
	needs: [ci-gate]
	if: >
	success() &&
	needs.ci-gate.result == 'success' &&
	github.ref == 'refs/heads/main' &&
	github.event_name == 'push'
	timeout-minutes: 5
	permissions:
	contents: read
	issues: write
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
	with:
	persist-credentials: false
	- name: Close ci-fix issues
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	GATE_RESULT="${{ needs.ci-gate.result }}"
	RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	ISSUES=$(gh issue list --label ci-fix --state open --json number --jq '.[].number' 2>/dev/null \|\| echo "")
	for NUM in $ISSUES; do
	gh issue close "$NUM" --comment "CI aggregate gate result: ${GATE_RESULT}. Run: ${RUN_URL}. Commit: \`${{ github.sha }}\`." \|\| true
	done

	# Note: the previous `self-heal` issue-creating job was superseded by the
	# `bernstein-ci-fix` workflow (.github/workflows/bernstein-ci-fix.yml),
	# which on a CI failure first attempts an auto-heal PR and only falls
	# back to opening a `ci-fix` issue when no diff is produced.

	pr-summary:
	name: PR CI summary
	runs-on: ubuntu-latest
	if: github.event_name == 'pull_request'
	needs:
	- repo-hygiene
	- lint
	- typecheck
	- test
	- test-macos
	- spelling
	- dead-code
	- actionlint
	- dist-size
	- property-tests
	- snapshot-tests
	- schemathesis-smoke
	- semgrep
	- bandit
	- pip-audit
	- beartype
	- pyright-strict-zone
	timeout-minutes: 5
	permissions:
	pull-requests: write
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
	with:
	script: \|
	const marker = '<!-- ci-summary -->';
	const jobs = [
	{ name: 'Repo hygiene', result: '${{ needs.repo-hygiene.result }}' },
	{ name: 'Lint', result: '${{ needs.lint.result }}' },
	{ name: 'Type check report', result: '${{ needs.typecheck.result }}' },
	{ name: 'Tests', result: '${{ needs.test.result }}' },
	{ name: 'Tests (macOS)', result: '${{ needs.test-macos.result }}' },
	{ name: 'Spelling', result: '${{ needs.spelling.result }}' },
	{ name: 'Dead code', result: '${{ needs.dead-code.result }}' },
	{ name: 'Workflow lint', result: '${{ needs.actionlint.result }}' },
	{ name: 'Dist size', result: '${{ needs.dist-size.result }}' },
	{ name: 'Property (Hypothesis)', result: '${{ needs.property-tests.result }}' },
	{ name: 'Snapshot (syrupy)', result: '${{ needs.snapshot-tests.result }}' },
	{ name: 'Schemathesis smoke', result: '${{ needs.schemathesis-smoke.result }}' },
	{ name: 'Semgrep custom', result: '${{ needs.semgrep.result }}' },
	{ name: 'Bandit', result: '${{ needs.bandit.result }}' },
	{ name: 'pip-audit', result: '${{ needs.pip-audit.result }}' },
	{ name: 'Beartype', result: '${{ needs.beartype.result }}' },
	{ name: 'Pyright strict zone', result: '${{ needs.pyright-strict-zone.result }}' },
	];
	const icon = (r) => r === 'success' ? '✅' : r === 'failure' ? '❌' : r === 'skipped' ? '⏭️' : '⚠️';
	let body = `${marker}\n### CI Summary\n\n`;
	body += '\| Check \| Result \|\n\|-------\|--------\|\n';
	for (const j of jobs) {
	body += `\| ${j.name} \| ${icon(j.result)} ${j.result} \|\n`;
	}
	body += '\nCoverage and detailed reports are available via Codecov and the Checks tab.';

	const { data: comments } = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	});
	const existing = comments.find(c => c.body.includes(marker));
	if (existing) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: existing.id,
	body,
	});
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body,
	});
	}

	# ─── Aggregator gate (closes #1273) ────────────────────────────────────
	#
	# SINGLE required status check for branch protection.
	#
	# Why this exists:
	# * `ci.yml` has 20+ jobs (incl. an OS x Python matrix that fans out
	# `test (ubuntu-latest, 3.12)` etc.) - listing every contextual name
	# in branch-protection is fragile because matrix names drift with
	# each matrix change.
	# * A `skipped` job auto-passes branch protection. So does a job that
	# was never enqueued because an upstream `needs:` was `cancelled`.
	# Either condition can let a red commit reach `main`.
	#
	# This job fails on ANY non-success result - including `cancelled`,
	# `timed_out`, `action_required`, or a job that never ran. `skipped`
	# passes ONLY when the planner job (``determine-changes``) classified
	# the diff in a way that makes the skip intentional (docs-only PRs,
	# event-gated jobs, etc.). A `skipped` result for a job that should
	# have run is treated as a failure - pattern borrowed from pypa/pip's
	# CI aggregator (see PR #1273 for the discussion).
	#
	# Operator: after this PR merges, replace every required check in
	# branch protection with this single context (name shown in UI:
	# ``CI gate``). The exact `gh api` invocation is in PR #1273's body.
	#
	# Excluded from `needs:`:
	# * `autofix` - runs only on push to main, mutates the tree
	# * `close-ci-issues` - post-gate issue update, must not gate merges
	# * `pr-summary` - cosmetic PR comment, must not gate merges
	# * `typecheck` - advisory report while repo-wide pyright is being typed
	# * `mutmut-diff` - advisory report until mutation score enforcement is enabled
	# * `diff-coverage` - advisory report until PR coverage artifacts are reliable
	# * `ci-gate` itself - would deadlock the dependency graph
	ci-gate:
	name: CI gate
	runs-on: ubuntu-latest
	# `always()` ensures the gate fires even when an upstream job is
	# `cancelled`. `!cancelled()` lets the gate itself be cancelled when
	# the user cancels the whole workflow run (so we don't try to
	# report "cancelled" on a manually aborted run).
	if: always() && !cancelled()
	needs:
	- determine-changes
	- repo-hygiene
	- lint
	- spelling
	- actionlint
	- lineage-gate
	- dead-code
	- dist-size
	- install-smoke-pipx
	- install-smoke-uv
	- property-tests
	- snapshot-tests
	- schemathesis-smoke
	- semgrep
	- bandit
	- pip-audit
	- beartype
	- pyright-strict-zone
	- adapter-integration
	- adapter-integration-macos
	- test
	- coverage-report
	- test-macos
	timeout-minutes: 3
	permissions:
	contents: read
	steps:
	- name: Harden runner (audit mode)
	uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
	with:
	egress-policy: audit
	- id: roll-up
	name: Roll up needs.*.result with conditional allowed-skips
	env:
	NEEDS_JSON: ${{ toJSON(needs) }}
	PLAN_JSON: ${{ toJSON(needs.determine-changes.outputs) }}
	EVENT_NAME: ${{ github.event_name }}
	run: \|
	# Write JSONs to disk so the python heredoc can read them
	# without worrying about shell quoting on multiline content.
	printf '%s' "$NEEDS_JSON" > results.json
	printf '%s' "$PLAN_JSON" > plan.json
	python3 - <<'PY'
	import json, os, sys
	data = json.load(open("results.json"))
	plan = json.load(open("plan.json"))
	event = os.environ.get("EVENT_NAME", "")

	# Jobs that intentionally skip on docs-only changes (no python
	# / tests / workflows touched). Mirrors paths-ignore at the top.
	DOCS_ONLY_SKIPPABLE = {
	"test",
	"schemathesis-smoke",
	"snapshot-tests",
	"property-tests",
	"beartype",
	"adapter-integration",
	"pyright-strict-zone",
	"semgrep",
	"bandit",
	"pip-audit",
	"dead-code",
	"dist-size",
	"install-smoke-pipx",
	"install-smoke-uv",
	}
	# macOS-gated jobs (see #1468): on PRs these are skipped unless
	# the diff is macos_sensitive or the PR carries the
	# `macos-needed` label. Always run on push events.
	MACOS_GATED = {"test-macos", "adapter-integration-macos"}
	# Push-only jobs are required on native main pushes but skip
	# intentionally on PR, workflow_dispatch, and merge_group runs.
	PUSH_ONLY = {"coverage-report"}
	# Events under which a macOS-gated skip is intentional. PRs and
	# manual dispatch already gate macOS behind macos_sensitive /
	# label. merge_group must be included too: a merge queue runs CI
	# on a synthetic merge_group ref where `github.event.pull_request`
	# is null, so the `macos-needed` label and `push`-only branches of
	# the job `if:` can never be true and these jobs always skip. The
	# merged commit still triggers a native `push` to main that runs
	# the full macOS suite un-gated, and ci-macos-nightly.yml covers
	# regression -- so tolerating the skip here is what keeps the queue
	# from wedging without losing macOS coverage on what actually lands.
	MACOS_SKIP_EVENTS = ("pull_request", "workflow_dispatch", "merge_group")

	docs_only = plan.get("docs_only") == "true"
	macos_sensitive = plan.get("macos_sensitive") == "true"
	# Read the PR labels via the event payload. The aggregator
	# runs inside ci.yml so the same event.pull_request.labels
	# array used by the job `if:` is available here too. The
	# GITHUB_EVENT_PATH file is the canonical source.
	macos_labelled = False
	try:
	with open(os.environ["GITHUB_EVENT_PATH"]) as fh:
	payload = json.load(fh)
	labels = (payload.get("pull_request") or {}).get("labels") or []
	macos_labelled = any(
	(lbl.get("name") == "macos-needed") for lbl in labels
	)
	except Exception:
	macos_labelled = False

	bad = []
	for name, info in data.items():
	r = info.get("result")
	if r == "success":
	continue
	if r == "skipped":
	if name in PUSH_ONLY and event != "push":
	continue
	if docs_only and name in DOCS_ONLY_SKIPPABLE:
	continue
	# macOS-gated jobs are allowed to skip on:
	# - PRs when diff is not macos_sensitive AND no
	# `macos-needed` label.
	# - workflow_dispatch (manual re-runs from hotfix
	# agents) when diff is not macos_sensitive.
	# - merge_group (merge-queue ref) where the label/push
	# branches of the job `if:` cannot be true; the
	# post-merge push to main runs macOS un-gated.
	# On native push events macOS must run.
	# Nightly ci-macos-nightly.yml covers regression.
	if (
	name in MACOS_GATED
	and event in MACOS_SKIP_EVENTS
	and not macos_sensitive
	and not macos_labelled
	):
	continue
	bad.append((name, r))

	if bad:
	print("::error::CI gate FAILED - these jobs were not success "
	"and not intentionally skipped:")
	for n, r in bad:
	print(f" - {n}: result={r}")
	sys.exit(1)
	print(f"CI gate: all required jobs passed "
	f"(or intentionally skipped). docs_only={docs_only}, event={event}")
	for n, info in sorted(data.items()):
	print(f" {n}: {info.get('result')}")
	PY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Update docker/build-push-action digest to 53b7df9 #3748

Workflow file

Update docker/build-push-action digest to 53b7df9 #3748

Uh oh!

Workflow file for this run