Update docker/build-push-action digest to 53b7df9 #3748
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| workflow_dispatch: | |
| merge_group: {} | |
| push: | |
| branches: [main] | |
| paths-ignore: | |
| # NOTE: pyproject.toml IS NOT ignored. Auto-release uses a PR-based | |
| # bump flow (auto-release.yml → opens auto/bump-vX.Y.Z PR with | |
| # auto-merge); the resulting squash-merge to main carries only a | |
| # pyproject.toml diff and MUST trigger CI so the workflow_run | |
| # listener in auto-release.yml can fire and tag + publish. The old | |
| # CI→release→CI loop concern is already handled by the bot-author | |
| # filter and the "tag exists?" check inside auto-release.yml. | |
| # Documentation & prose | |
| - "docs/**" | |
| - "!docs/operations/ci-topology.md" | |
| - "!docs/observability/**" | |
| - "*.md" | |
| - "!README.md" | |
| - "LICENSE" | |
| - "CONTRIBUTORS.md" | |
| # Runtime state (never committed) | |
| - ".sdd/**" | |
| # Non-Python packages & SDKs | |
| - "sdk/typescript/**" | |
| - "packages/vscode/**" | |
| - "packages/cursor-plugin/**" | |
| - "packaging/**" | |
| - "Formula/**" | |
| # Deployment & infra configs | |
| - "deploy/**" | |
| - "docker/**" | |
| - "docker-compose.yaml" | |
| - "Dockerfile" | |
| - "action.yml" | |
| - "action/**" | |
| # CI tool configs (don't re-run tests for codecov/sonar tweaks) | |
| - "codecov.yml" | |
| - "sonar-project.properties" | |
| # GitHub meta (templates, labels, funding - not ci.yml) | |
| - ".github/ISSUE_TEMPLATE/**" | |
| - ".github/FUNDING.yml" | |
| - ".github/CODEOWNERS" | |
| - ".github/pull_request_template.md" | |
| - ".github/dependabot.yml" | |
| - ".github/labeler.yml" | |
| - ".github/release-drafter.yml" | |
| - ".github/copilot-instructions.md" | |
| - ".github/codeql/**" | |
| # Non-code project files | |
| - "marketing/**" | |
| - "benchmarks/**" | |
| - "examples/**" | |
| - "plans/**" | |
| - "agents/**" | |
| - "commands/**" | |
| - "rules/**" | |
| - ".bernstein/**" | |
| - ".plugin/**" | |
| - "scripts/gen_tickets_*.py" | |
| - "scripts/gen_roadmap_*.py" | |
| - "scripts/generate_benchmark_docs.py" | |
| pull_request: | |
| paths-ignore: | |
| # Documentation & prose | |
| - "docs/**" | |
| - "!docs/operations/ci-topology.md" | |
| - "!docs/observability/**" | |
| - "*.md" | |
| - "!README.md" | |
| - "LICENSE" | |
| - "CONTRIBUTORS.md" | |
| # Runtime state (never committed) | |
| - ".sdd/**" | |
| # Non-Python packages & SDKs | |
| - "sdk/typescript/**" | |
| - "packages/vscode/**" | |
| - "packages/cursor-plugin/**" | |
| - "packaging/**" | |
| - "Formula/**" | |
| # Deployment & infra configs | |
| - "deploy/**" | |
| - "docker/**" | |
| - "docker-compose.yaml" | |
| - "Dockerfile" | |
| - "action.yml" | |
| - "action/**" | |
| # CI tool configs | |
| - "codecov.yml" | |
| - "sonar-project.properties" | |
| # GitHub meta | |
| - ".github/ISSUE_TEMPLATE/**" | |
| - ".github/FUNDING.yml" | |
| - ".github/CODEOWNERS" | |
| - ".github/pull_request_template.md" | |
| - ".github/dependabot.yml" | |
| - ".github/labeler.yml" | |
| - ".github/release-drafter.yml" | |
| - ".github/copilot-instructions.md" | |
| - ".github/codeql/**" | |
| # Non-code project files | |
| - "marketing/**" | |
| - "benchmarks/**" | |
| - "examples/**" | |
| - "plans/**" | |
| - "agents/**" | |
| - "commands/**" | |
| - "rules/**" | |
| - ".bernstein/**" | |
| - ".plugin/**" | |
| - "scripts/gen_tickets_*.py" | |
| - "scripts/gen_roadmap_*.py" | |
| - "scripts/generate_benchmark_docs.py" | |
| # Concurrency policy for heavy CI (see #1273): | |
| # | |
| # - Pull requests: per-PR group (keyed off pull_request.number, stable | |
| # across pushes to the same PR), cancel-in-progress=true. New commits | |
| # on the same PR cancel older CI runs so reviewers only ever wait on | |
| # the latest push and we don't burn minutes on stale SHAs. | |
| # | |
| # - Pushes to main: branch-scoped group, cancel-in-progress=true. | |
| # A rapid wave of merges can cancel older heavy CI runs on main so | |
| # the latest push supersedes stale ones. Per-SHA main observability is | |
| # provided separately by main-sha-marker.yml, which is keyed by | |
| # github.sha and is not cancellable by newer main pushes. | |
| # | |
| # The conditional in `group:` selects the right key per event type, and | |
| # `cancel-in-progress` fires for both pull_request and push: a rapid | |
| # wave of merges on `main` (13 commits in 90 min during the May 2026 | |
| # META wave) used to saturate the runner queue because each sha-unique | |
| # group kept its own full-matrix run alive. Branch-scoped grouping + | |
| # always-cancel-in-progress lets the latest push supersede stale ones. | |
| concurrency: | |
| group: ci-${{ github.workflow }}-${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || format('branch-{0}', github.ref) }} | |
| cancel-in-progress: true | |
| # Default-deny for the workflow token; individual jobs escalate only | |
| # the scopes they actually need (Scorecard token-permissions, Sonar S8264). | |
| permissions: | |
| contents: read | |
| jobs: | |
| # ─── Planner (determines which downstream jobs may legitimately skip) ── | |
| # | |
| # Inspired by pypa/pip's CI: a planner job classifies the PR (or push) | |
| # diff and emits boolean outputs. Downstream skips are then either | |
| # "intentional" (planner said so) or suspicious (cancelled / crashed). | |
| # The aggregator gate at the bottom uses these outputs to distinguish | |
| # the two and refuses to pass on suspicious skips. See #1273. | |
| determine-changes: | |
| name: Determine changes | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 3 | |
| permissions: | |
| contents: read | |
| outputs: | |
| python_changed: ${{ steps.classify.outputs.python_changed }} | |
| tests_changed: ${{ steps.classify.outputs.tests_changed }} | |
| gha_workflows_changed: ${{ steps.classify.outputs.gha_workflows_changed }} | |
| docs_only: ${{ steps.classify.outputs.docs_only }} | |
| # macos_sensitive: true when the diff touches platform-specific code | |
| # paths whose macOS behaviour cannot be exercised on ubuntu/windows | |
| # runners. Used by the `test` matrix gate (see #1468) to skip the | |
| # macos-latest cells on PRs that do not need them, freeing the | |
| # macOS hosted-runner pool during burst-merge waves. The nightly | |
| # workflow (ci-macos-nightly.yml) provides the safety-net coverage. | |
| macos_sensitive: ${{ steps.classify.outputs.macos_sensitive }} | |
| steps: | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| fetch-depth: 0 | |
| - id: classify | |
| name: Classify changed paths | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| EVENT_NAME: ${{ github.event_name }} | |
| run: | | |
| # On pull_request, diff against the base branch. On push to main | |
| # (or any other push event), diff against the previous commit so | |
| # the planner stays useful for filter-skipped downstream jobs. | |
| # | |
| # NOTE: checkout above uses fetch-depth: 0, so HEAD has full history. | |
| # However, origin/${BASE_REF} is NOT fetched by default in the PR | |
| # checkout (which is on the merge ref) - we must fetch it explicitly. | |
| # Using --depth=1 here previously caused `...` (merge-base) diffs to | |
| # fail when the merge-base wasn't in the shallow window, especially | |
| # after parallel main merges. Fetch the base ref WITHOUT --depth. | |
| CHANGED="" | |
| diff_failed=false | |
| if [ "$EVENT_NAME" = "pull_request" ]; then | |
| # Fetch base ref with full history (no --depth) so merge-base resolves. | |
| git fetch --no-tags origin "refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" || true | |
| # If the local clone is still shallow for any reason, unshallow it. | |
| if [ -f "$(git rev-parse --git-dir)/shallow" ]; then | |
| git fetch --unshallow origin "${BASE_REF}" || git fetch origin "${BASE_REF}" || true | |
| fi | |
| # Diagnostic output - keeps future flakes debuggable in the action log. | |
| echo "::group::git diagnostic" | |
| git remote -v || true | |
| git log --oneline -5 || true | |
| echo "origin/${BASE_REF} -> $(git rev-parse "origin/${BASE_REF}" 2>&1 || echo UNRESOLVED)" | |
| echo "HEAD -> $(git rev-parse HEAD 2>&1 || echo UNRESOLVED)" | |
| echo "::endgroup::" | |
| if ! CHANGED=$(git diff --name-only "origin/${BASE_REF}...HEAD" 2>&1); then | |
| echo "::warning::git diff failed against origin/${BASE_REF}; falling back to safe over-broad result" | |
| echo "diff stderr: $CHANGED" | |
| diff_failed=true | |
| fi | |
| else | |
| # `before` may be 000... on first push of a branch; fall back to HEAD~1. | |
| BEFORE="${{ github.event.before }}" | |
| if [ -z "$BEFORE" ] || [ "$BEFORE" = "0000000000000000000000000000000000000000" ]; then | |
| CHANGED=$(git diff --name-only "HEAD~1...HEAD" 2>/dev/null || git ls-files) | |
| else | |
| CHANGED=$(git diff --name-only "${BEFORE}...HEAD" 2>/dev/null || git ls-files) | |
| fi | |
| fi | |
| # Fail-safe fallback: when diff fails for any reason, emit the safe | |
| # over-broad classification so downstream jobs run anyway. Correctness | |
| # wins over efficiency - never fail this job for a clone-shape issue. | |
| if [ "$diff_failed" = "true" ]; then | |
| { | |
| echo "python_changed=true" | |
| echo "tests_changed=true" | |
| echo "gha_workflows_changed=true" | |
| echo "docs_only=false" | |
| echo "macos_sensitive=true" | |
| } | tee -a "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "Changed files:" | |
| printf '%s\n' "$CHANGED" | sed 's/^/ /' | |
| # Pure-shell classification - auditable in `actionlint`, no | |
| # sub-shell variable round-tripping through python. | |
| python_changed=false | |
| tests_changed=false | |
| gha_workflows_changed=false | |
| docs_only=true | |
| macos_sensitive=false | |
| # Classify each changed path via grep. Using grep instead of | |
| # bash `case` to avoid linter warnings on overlapping patterns | |
| # (case-globs cannot cross slashes anyway). | |
| while IFS= read -r f; do | |
| [ -z "$f" ] && continue | |
| matched_meta=false | |
| if printf '%s\n' "$f" | grep -Eq '^src/.*\.py$'; then | |
| python_changed=true; docs_only=false; matched_meta=true | |
| fi | |
| if printf '%s\n' "$f" | grep -Eq '^tests/'; then | |
| tests_changed=true; docs_only=false; matched_meta=true | |
| fi | |
| if printf '%s\n' "$f" | grep -Eq '^\.github/workflows/.*\.(yml|yaml)$'; then | |
| gha_workflows_changed=true; docs_only=false; matched_meta=true | |
| fi | |
| if printf '%s\n' "$f" | grep -Eq '^docs/|\.md$|^LICENSE$|^CONTRIBUTORS\.md$'; then | |
| matched_meta=true | |
| fi | |
| if [ "$matched_meta" = "false" ]; then | |
| docs_only=false | |
| fi | |
| # macOS-sensitive paths (see #1468). Modules with branches on | |
| # `sys.platform == "darwin"` or that wrap macOS-only APIs | |
| # (Keychain via `keyring`, AppKit notifications, Foundation | |
| # clipboard, `launchd` daemon installer). When any of these | |
| # change, the macOS matrix cell must run on the PR to catch | |
| # regressions before merge. Otherwise it skips and the | |
| # nightly ci-macos-nightly.yml workflow catches drift. | |
| if printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/tunnels/'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/daemon/'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/config/platform_compat\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/security/vault/'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/security/resource_limits\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/persistence/runtime_state\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/communication/notifications\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/core/preview/'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/tui/clipboard\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/cli/display/splash_screen\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^src/bernstein/bridges/openclaw_gateway\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^tests/integration/test_adapter_e2e\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^scripts/run_tests\.py$'; then | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^\.github/workflows/ci\.yml$'; then | |
| # Changes to the CI workflow itself must re-validate the | |
| # macOS cell on the PR so we never ship a broken matrix | |
| # config (the nightly workflow runs the *merged* config). | |
| macos_sensitive=true | |
| elif printf '%s\n' "$f" | grep -Eq '^\.github/workflows/ci-macos-nightly\.yml$'; then | |
| macos_sensitive=true | |
| fi | |
| done <<< "$CHANGED" | |
| # If nothing changed at all (e.g. workflow_dispatch on a clean ref), | |
| # treat as "not docs-only" so we don't intentionally skip tests. | |
| if [ -z "$CHANGED" ]; then | |
| docs_only=false | |
| fi | |
| echo "python_changed=$python_changed" | tee -a "$GITHUB_OUTPUT" | |
| echo "tests_changed=$tests_changed" | tee -a "$GITHUB_OUTPUT" | |
| echo "gha_workflows_changed=$gha_workflows_changed" | tee -a "$GITHUB_OUTPUT" | |
| echo "docs_only=$docs_only" | tee -a "$GITHUB_OUTPUT" | |
| echo "macos_sensitive=$macos_sensitive" | tee -a "$GITHUB_OUTPUT" | |
| # ─── Fast checks (never cancelled, <2 min each) ─────────────────────── | |
| repo-hygiene: | |
| name: Repo hygiene | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| # Full clone so ``bernstein agents-md verify`` can resolve the | |
| # default branch via ``git rev-parse --verify origin/main``. | |
| # The default depth=1 single-ref fetch leaves origin/main | |
| # unfetched and the verify step silently drops the | |
| # git-workflow section, producing drift against locally-synced | |
| # content. | |
| fetch-depth: 0 | |
| - name: Establish origin/HEAD | |
| # actions/checkout does not set ``refs/remotes/origin/HEAD`` even | |
| # with full fetch. The generator's first-choice resolver reads | |
| # that symbolic-ref; this step makes the result match a | |
| # developer checkout where ``git remote set-head origin -a`` | |
| # has run. | |
| run: git remote set-head origin -a | |
| - name: Assert .sdd is not tracked | |
| run: | | |
| TRACKED="$(git ls-files '.sdd')" | |
| if [ -n "$TRACKED" ]; then | |
| echo "::error::.sdd must never be committed to git" | |
| printf '%s\n' "$TRACKED" | |
| exit 1 | |
| fi | |
| - name: Check for merge conflict markers in source files | |
| run: | | |
| CONFLICTS="" | |
| for f in $(git ls-files -- '*.py' '*.yaml' '*.yml' '*.md' '*.toml'); do | |
| if grep -qE '^(<{7} |>{7} )' "$f" 2>/dev/null; then | |
| CONFLICTS="$CONFLICTS $f" | |
| echo "::error file=$f::Unresolved merge conflict markers in $f" | |
| fi | |
| done | |
| if [ -n "$CONFLICTS" ]; then | |
| exit 1 | |
| fi | |
| - name: Check Python syntax in scripts/ | |
| run: | | |
| ERRORS="" | |
| for f in scripts/*.py; do | |
| if ! python3 -m py_compile "$f" 2>/dev/null; then | |
| ERRORS="$ERRORS $f" | |
| echo "::error file=$f::Syntax error in $f" | |
| fi | |
| done | |
| if [ -n "$ERRORS" ]; then | |
| exit 1 | |
| fi | |
| - uses: ./.github/actions/bootstrap | |
| - name: AGENTS.md cross-CLI sync drift check | |
| # Fails if AGENTS.md / CLAUDE.md / CONVENTIONS.md / .aider.conf.yml / | |
| # .goosehints / .cursor/rules/*.mdc drift from `bernstein agents-md | |
| # generate`. Run `uv run bernstein agents-md sync` locally to fix. | |
| run: uv run bernstein agents-md verify --workdir . | |
| lint: | |
| name: Lint | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - run: uv run ruff check src/ | |
| - run: uv run ruff format --check src/ | |
| - name: Architecture contracts (import-linter) | |
| run: uv run lint-imports | |
| - name: Route broad-except policy (#1723) | |
| # Fails if a bare `except Exception:` appears in | |
| # src/bernstein/core/routes/**.py without a `bot-ack:` or | |
| # `intentional-broad-except` marker within 3 lines. | |
| run: uv run python scripts/check_routes_broad_except.py | |
| spelling: | |
| name: Spelling (typos) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: crate-ci/typos@bee27e3a4fd1ea2111cf90ab89cd076c870fce14 # v1 | |
| actionlint: | |
| name: Workflow lint | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: reviewdog/action-actionlint@6fb7acc99f4a1008869fa8a0f09cfca740837d9d # v1 | |
| with: | |
| reporter: github-check | |
| level: error | |
| fail_level: error | |
| actionlint_flags: -shellcheck= | |
| # `actionlint_flags: -shellcheck=` disables the embedded | |
| # shellcheck (empty path = disabled). Without this our workflows | |
| # emit ~21 SC2016/SC2221/SC2222/SC2034 warnings that flood the | |
| # 22-annotation GitHub check_run cap, which is itself reported | |
| # as an error and fails reviewdog. `level` is a severity tag, | |
| # not a filter, so it alone does not stop the cap being hit. | |
| # `fail_on_error` is deprecated; `fail_level: error` replaces it. | |
| lineage-gate: | |
| # ADR-009 Lineage Gate - required check. CI generates a minimal signed | |
| # lineage fixture and verifies it so the job always exercises the gate | |
| # logic even when runtime `.sdd/lineage/log.jsonl` is absent. | |
| name: Lineage Gate | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run lineage gate | |
| run: | | |
| set -euo pipefail | |
| LINEAGE_FIXTURE="${RUNNER_TEMP}/lineage-fixture" | |
| export LINEAGE_FIXTURE | |
| uv run python - <<'PY' | |
| import hashlib | |
| import json | |
| import os | |
| from pathlib import Path | |
| from bernstein.core.lineage.entry import LineageEntry, canonicalise, entry_hash | |
| from bernstein.core.lineage.identity import generate_keypair, sign_detached | |
| root = Path(os.environ["LINEAGE_FIXTURE"]) | |
| log = root / "lineage" / "log.jsonl" | |
| cards = root / "agents" / "agent:ci" | |
| log.parent.mkdir(parents=True, exist_ok=True) | |
| cards.mkdir(parents=True, exist_ok=True) | |
| private_key, public_key = generate_keypair() | |
| (cards / "card.json").write_text( | |
| json.dumps( | |
| { | |
| "protocolVersion": "a2a/1.0", | |
| "agent_id": "agent:ci", | |
| "kid": "ci-fixture", | |
| "public_key_pem": public_key, | |
| } | |
| ), | |
| encoding="utf-8", | |
| ) | |
| entry = LineageEntry( | |
| v=1, | |
| artefact_path="ci/lineage-fixture.txt", | |
| artefact_kind="file", | |
| content_hash="sha256:" + ("1" * 64), | |
| parent_hashes=[], | |
| agent_id="agent:ci", | |
| agent_card_kid="ci-fixture", | |
| tool_call_id="ci-lineage-gate", | |
| span_id="ci-lineage-gate", | |
| ts_ns=1, | |
| operator_hmac="0" * 64, | |
| ) | |
| canonical = canonicalise(entry) | |
| log.write_bytes(canonical + b"\n") | |
| jws = sign_detached(canonical, private_key, kid="ci-fixture") | |
| path_hash = hashlib.sha256(entry.artefact_path.encode()).hexdigest() | |
| entry_digest = entry_hash(entry).replace("sha256:", "") | |
| sig_dir = log.parent / "signatures" / path_hash[:2] / path_hash | |
| sig_dir.mkdir(parents=True, exist_ok=True) | |
| (sig_dir / f"{entry_digest}.jws").write_text(jws, encoding="utf-8") | |
| PY | |
| uv run python scripts/check_lineage.py \ | |
| --log "${LINEAGE_FIXTURE}/lineage/log.jsonl" \ | |
| --cards "${LINEAGE_FIXTURE}/agents" | |
| # ─── Medium checks (cancel old runs, 5-20 min) ──────────────────────── | |
| typecheck: | |
| name: Type check report | |
| needs: [lint] # only run if lint passes (fast-fail) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run pyright (advisory) | |
| run: | | |
| uv run pyright 2>&1 | tail -1 || true | |
| echo "::notice::Typecheck is advisory while module decomposition shims are being typed" | |
| dead-code: | |
| name: Dead code (Vulture) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - run: uv tool install vulture | |
| - run: vulture src/ vulture_whitelist.py --min-confidence 80 --exclude "tests,docs" | |
| dist-size: | |
| name: Package size check | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Build and check size | |
| run: | | |
| uv build | |
| MAX_SIZE=$((10 * 1024 * 1024)) | |
| for f in dist/*.whl; do | |
| SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f") | |
| echo "$f: $SIZE bytes" | |
| if [ "$SIZE" -gt "$MAX_SIZE" ]; then | |
| echo "::error::Wheel $f exceeds 10MB limit ($SIZE bytes)" | |
| exit 1 | |
| fi | |
| done | |
| - name: Upload built wheel for downstream install-smoke jobs | |
| # Shared artifact consumed by install-smoke-pipx and | |
| # install-smoke-uv. Building once and reusing avoids running | |
| # `uv build` on every matrix cell (6 + 2 = 8 cells today) and | |
| # ensures every smoke runs against bit-identical wheels. | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: install-smoke-wheel | |
| path: dist/*.whl | |
| if-no-files-found: error | |
| retention-days: 1 | |
| # ─── Install-path smoke (built wheel, not editable) ────────────────── | |
| # | |
| # The two install paths the README documents first - pipx and | |
| # `uv tool install` - have no other coverage that exercises the | |
| # *built* wheel end to end. Editable installs (`pip install -e .`) | |
| # hide a class of packaging bugs: missing package-data, broken | |
| # `console_scripts`, entry-point loading errors, dependency-resolver | |
| # regressions. These jobs install from the wheel produced by | |
| # `dist-size`, then run `bernstein --version`, `bernstein --help`, | |
| # and `bernstein doctor --json` to confirm the dominant install path | |
| # documented first in README still works end to end. | |
| install-smoke-pipx: | |
| name: Install smoke - pipx (${{ matrix.os }}, Python ${{ matrix.python-version }}) | |
| needs: [dist-size] | |
| runs-on: ${{ matrix.os }} | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| os: [ubuntu-latest, macos-latest] | |
| # Matrix tracks `requires-python = ">=3.12"` in pyproject.toml. | |
| # 3.11 is intentionally excluded: pipx / uv install would refuse | |
| # the wheel for a Python the package does not support, which | |
| # would just confirm the floor we already pin. | |
| python-version: ["3.12", "3.13"] | |
| steps: | |
| - name: Harden runner (audit mode) | |
| if: runner.os == 'Linux' | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Download built wheel | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| name: install-smoke-wheel | |
| path: dist | |
| - name: Fail fast if wheel exceeds 25 MB | |
| # Independent of the 10 MB gate in `dist-size`: that one tracks | |
| # day-to-day growth and is tuned tight. This one is the smoke | |
| # job's own hard ceiling - catches accidental bundling | |
| # regressions (binary blobs, vendored deps) that would slow | |
| # pipx installs for everyone. | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| MAX_SIZE=$((25 * 1024 * 1024)) | |
| shopt -s nullglob | |
| wheels=(dist/*.whl) | |
| if [ "${#wheels[@]}" -eq 0 ]; then | |
| echo "::error::no wheel found under dist/" | |
| exit 1 | |
| fi | |
| for f in "${wheels[@]}"; do | |
| SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f") | |
| echo "$f: $SIZE bytes" | |
| if [ "$SIZE" -gt "$MAX_SIZE" ]; then | |
| echo "::error::wheel $f exceeds 25 MB install-smoke ceiling ($SIZE bytes)" | |
| exit 1 | |
| fi | |
| done | |
| - name: Install uv (SHA-pinned, vendors pipx) | |
| # Scorecard pinned-dependencies: pip cannot be hash-pinned for a | |
| # single bootstrap step without a maintained requirements file, | |
| # so we route the pipx install through SHA-pinned uv instead. | |
| # `uv tool install pipx` puts pipx on PATH via uv's tool dir. | |
| uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 | |
| with: | |
| enable-cache: true | |
| - name: Install pipx via uv | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| uv tool install pipx | |
| # Expose uv-managed tool shims and pipx's own bin dir on PATH. | |
| UV_TOOL_BIN_DIR="$(uv tool dir --bin)" | |
| echo "$UV_TOOL_BIN_DIR" >> "$GITHUB_PATH" | |
| echo "$HOME/.local/bin" >> "$GITHUB_PATH" | |
| "$UV_TOOL_BIN_DIR/pipx" ensurepath | |
| - name: pipx install the built wheel | |
| # Install from the wheel, never editable. Editable installs | |
| # bypass package-data inclusion and entry-point registration - | |
| # the exact regression class this job is here to catch. | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| wheels=(dist/*.whl) | |
| pipx install --python "$(which python)" "${wheels[0]}" | |
| - name: bernstein --version (exit zero) | |
| shell: bash | |
| run: bernstein --version | |
| - name: bernstein --help (exit zero) | |
| shell: bash | |
| run: bernstein --help | |
| - name: Verify packaged resources load via importlib | |
| # Confirms package-data survived the wheel build by reading a | |
| # bundled MCP tool schema and a force-included default template | |
| # through importlib.resources. Uses the pipx-managed interpreter | |
| # so we exercise the same site-packages layout end users get. | |
| # We deliberately avoid `bernstein doctor --json` here: doctor | |
| # is a dev-environment diagnostic that probes optional tools | |
| # (uv, ruff, pytest, pyright) and git context, neither of which | |
| # exists in a fresh pipx venv. This probe is narrowly scoped to | |
| # the regression class the smoke job is meant to catch. | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| BERNSTEIN_PYTHON="$(pipx environment --value PIPX_LOCAL_VENVS)/bernstein/bin/python" | |
| "$BERNSTEIN_PYTHON" -c ' | |
| import importlib | |
| import importlib.resources as ir | |
| cli_mod = importlib.import_module("bernstein.cli.main") | |
| assert callable(getattr(cli_mod, "cli")), "bernstein.cli.main:cli missing" | |
| schema_pkg = ir.files("bernstein.mcp.tool_schemas") | |
| schemas = [p.name for p in schema_pkg.iterdir() if p.name.endswith(".json")] | |
| assert schemas, "no MCP tool schemas shipped in wheel" | |
| tpl_pkg = ir.files("bernstein._default_templates") | |
| assert any(tpl_pkg.iterdir()), "no default templates shipped in wheel" | |
| print(f"packaged resources OK: {len(schemas)} MCP schemas") | |
| ' | |
| install-smoke-uv: | |
| # Leaner mirror of install-smoke-pipx for the `uv tool install` | |
| # path. uv is the second install command documented in the README | |
| # and rounds out coverage for the two paths most likely to surface | |
| # packaging regressions. We run a smaller matrix (one Python | |
| # version per OS) because the pipx job already exercises the | |
| # cross-Python combinatorics; uv shares the same wheel and | |
| # console-scripts entry point, so the marginal coverage of a full | |
| # 6-cell matrix is not worth the runner spend. | |
| name: Install smoke - uv tool (${{ matrix.os }}) | |
| needs: [dist-size] | |
| runs-on: ${{ matrix.os }} | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| os: [ubuntu-latest, macos-latest] | |
| steps: | |
| - name: Harden runner (audit mode) | |
| if: runner.os == 'Linux' | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0 | |
| with: | |
| python-version: "3.14" | |
| - name: Download built wheel | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| name: install-smoke-wheel | |
| path: dist | |
| - name: Fail fast if wheel exceeds 25 MB | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| MAX_SIZE=$((25 * 1024 * 1024)) | |
| shopt -s nullglob | |
| wheels=(dist/*.whl) | |
| if [ "${#wheels[@]}" -eq 0 ]; then | |
| echo "::error::no wheel found under dist/" | |
| exit 1 | |
| fi | |
| for f in "${wheels[@]}"; do | |
| SIZE=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f") | |
| echo "$f: $SIZE bytes" | |
| if [ "$SIZE" -gt "$MAX_SIZE" ]; then | |
| echo "::error::wheel $f exceeds 25 MB install-smoke ceiling ($SIZE bytes)" | |
| exit 1 | |
| fi | |
| done | |
| - name: uv tool install the built wheel | |
| # `uv tool install` is the install command the README documents | |
| # second. As with pipx we install from the wheel, never | |
| # editable, so packaging bugs surface here and not in user | |
| # reports. | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| wheels=(dist/*.whl) | |
| uv tool install "${wheels[0]}" | |
| # uv tool bin dir on Linux/macOS | |
| echo "$HOME/.local/bin" >> "$GITHUB_PATH" | |
| - name: bernstein --version (exit zero) | |
| shell: bash | |
| run: bernstein --version | |
| - name: bernstein --help (exit zero) | |
| shell: bash | |
| run: bernstein --help | |
| - name: Verify packaged resources load via importlib | |
| # Mirror of the pipx job's resource probe but resolved through | |
| # the uv-managed tool venv. See pipx counterpart for rationale. | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| UV_TOOL_DIR="$(uv tool dir)" | |
| BERNSTEIN_PYTHON="$UV_TOOL_DIR/bernstein/bin/python" | |
| if [ ! -x "$BERNSTEIN_PYTHON" ]; then | |
| echo "::error::cannot find uv-managed bernstein interpreter at $BERNSTEIN_PYTHON" | |
| exit 1 | |
| fi | |
| "$BERNSTEIN_PYTHON" -c ' | |
| import importlib | |
| import importlib.resources as ir | |
| cli_mod = importlib.import_module("bernstein.cli.main") | |
| assert callable(getattr(cli_mod, "cli")), "bernstein.cli.main:cli missing" | |
| schema_pkg = ir.files("bernstein.mcp.tool_schemas") | |
| schemas = [p.name for p in schema_pkg.iterdir() if p.name.endswith(".json")] | |
| assert schemas, "no MCP tool schemas shipped in wheel" | |
| tpl_pkg = ir.files("bernstein._default_templates") | |
| assert any(tpl_pkg.iterdir()), "no default templates shipped in wheel" | |
| print(f"packaged resources OK: {len(schemas)} MCP schemas") | |
| ' | |
| # ─── CI hardening 2026 (medium, parallel, ≤8 min each) ─────────────── | |
| property-tests: | |
| # Hypothesis property suite. PR-time runs the `smoke` profile (50 | |
| # examples per property) so every file finishes in well under a | |
| # minute. Catches hash-chain / signature / canonicalisation | |
| # regressions that escape unit tests' fixed inputs. | |
| name: Property tests (Hypothesis smoke) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run Hypothesis property suite (smoke profile) | |
| env: | |
| HYPOTHESIS_PROFILE: smoke | |
| run: uv run pytest tests/property/ -q --no-cov --timeout=60 | |
| snapshot-tests: | |
| # Syrupy snapshot suite. Locks JSONL field order / shape for the | |
| # audit log + lineage record so silent wire-format drift is caught | |
| # before downstream parsers break. | |
| name: Snapshot tests (syrupy) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run snapshot tests | |
| run: uv run pytest tests/snapshot/ -q --no-cov | |
| schemathesis-smoke: | |
| # OpenAPI fuzz on the FastAPI task server. Smoke profile fuzzes | |
| # only the critical-surface allow-list (task CRUD, health, | |
| # openapi.json, metrics) with 5 examples per endpoint and the | |
| # `not_a_server_error` check. Heavier sweeps live in nightly. | |
| name: Schemathesis smoke | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| # Smoke profile wall-clock runs ~7m30s, which raced the previous | |
| # 8-minute window and got cancelled, failing the CI gate aggregator. | |
| # Widen to 20m for headroom; uv setup is already cached via bootstrap | |
| # and the in-process ASGI schema build is not separately cacheable. | |
| timeout-minutes: 20 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run Schemathesis smoke profile | |
| env: | |
| SCHEMATHESIS_PROFILE: smoke | |
| BERNSTEIN_AUTH_DISABLED: "1" | |
| run: uv run pytest tests/contract/ -q --no-cov --timeout=30 -p no:warnings | |
| semgrep: | |
| # Project-specific Semgrep rules (.semgrep.yml). ERROR severity | |
| # fails PR; WARNING is advisory (annotation only). | |
| name: Semgrep (custom rules) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Install Semgrep (isolated env) | |
| # semgrep pins click<8.2 + opentelemetry-sdk<1.38, which conflict | |
| # with our project floors (click>=8.3.3, opentelemetry-sdk>=1.41.1). | |
| # Install in its own venv via `uv tool` (pipx-equivalent) so its | |
| # transitive pins never touch the project resolver. | |
| run: uv tool install semgrep | |
| - name: Run Semgrep (ERROR-only fail gate) | |
| run: | | |
| uv tool run semgrep --config .semgrep.yml --metrics off --severity ERROR --error src/ | |
| bandit: | |
| # Bandit static security analyzer. HIGH severity only - there are | |
| # ~30 MEDIUM findings on main that are accepted patterns | |
| # (urlopen with timeout, hardcoded localhost in dev). Fails PR | |
| # only on new HIGH-severity introductions; pre-existing HIGHs are | |
| # captured in `.bandit-baseline.json` and tracked as follow-ups. | |
| name: Bandit (security) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run Bandit (HIGH severity gate via baseline) | |
| run: | | |
| # Baseline file `.bandit-baseline.json` captures the 11 known | |
| # HIGH findings on main; the gate below fails only on NEW | |
| # HIGH-severity issues introduced by the PR. | |
| uv run bandit -r src/ --severity-level high \ | |
| -b .bandit-baseline.json | |
| pip-audit: | |
| # PyPI CVE scan. Production deps are strict (any vulnerability | |
| # fails); dev deps are advisory (continue-on-error). The free | |
| # PyPI advisory DB is updated daily. | |
| name: pip-audit (deps) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 8 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Export production requirements (no project, hashed) | |
| # Audit the resolved lockfile without including bernstein itself. | |
| # `pip-audit` chokes on editable distributions even with | |
| # `--skip-editable` under `--strict`, so we feed it a flat | |
| # requirements file generated from `uv.lock`. | |
| run: uv export --no-dev --no-emit-project --format requirements-txt -o /tmp/req-prod.txt | |
| - name: Export dev requirements (no project, hashed) | |
| run: uv export --no-emit-project --format requirements-txt -o /tmp/req-dev.txt | |
| - name: Production deps (strict) | |
| # `--no-deps` is safe: the exported file is fully pinned and the | |
| # transitive closure is resolved by `uv.lock`. `--disable-pip` | |
| # avoids spinning up a sub-venv just to run `pip install`. | |
| # | |
| # `--ignore-vuln PYSEC-2025-183` (CVE-2025-45768): disputed advisory | |
| # against pyjwt that affects all released versions (introduced at 0, | |
| # no fix version published) and is pulled in transitively via `mcp`. | |
| # The maintainer disputes it because the key length is chosen by the | |
| # calling application, not the library. There is nothing to upgrade | |
| # to, so it is ignored with the rationale recorded here. | |
| run: uv run pip-audit -r /tmp/req-prod.txt --strict --disable-pip --no-deps --ignore-vuln PYSEC-2025-183 | |
| - name: Dev deps (advisory) | |
| continue-on-error: true | |
| run: uv run pip-audit -r /tmp/req-dev.txt --strict --disable-pip --no-deps --ignore-vuln PYSEC-2025-183 | |
| beartype: | |
| # Runtime type-check enforcement via beartype.claw. Imports the | |
| # public APIs in core.security / core.agents / core.protocols.cluster | |
| # under @beartype and runs the unit tests for those modules so any | |
| # type contract violation surfaces as a test failure. | |
| name: Beartype (type contracts) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run lineage-signer tests under beartype claw | |
| # The beartype-claw allow-list is in tests/_beartype_claw.py. | |
| # Today only ``bernstein.core.persistence.lineage_signer`` is | |
| # in the strict zone - widen as more modules reach | |
| # beartype-clean status. | |
| env: | |
| BEARTYPE_USE_CLAW: enable | |
| run: | | |
| uv run pytest tests/unit/ -q --no-cov --timeout=120 \ | |
| -k 'lineage_signer or lineage_record or lineage_export' | |
| mutmut-diff: | |
| # Mutation testing report on PR-changed files only. Computes a | |
| # mutation score over the lines actually touched in this PR. | |
| # Advisory until the command is allowed to fail this job. | |
| name: Mutation report (diff-only) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 20 | |
| if: github.event_name == 'pull_request' | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| fetch-depth: 0 | |
| - uses: ./.github/actions/bootstrap | |
| - name: Compute changed src/ files | |
| id: diff | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| run: | | |
| CHANGED=$(git diff --name-only \ | |
| "origin/${BASE_REF}...HEAD" \ | |
| | grep '^src/.*\.py$' | tr '\n' ',' | sed 's/,$//') | |
| if [ -z "$CHANGED" ]; then | |
| echo "no Python files changed; skipping mutation step" | |
| echo "skip=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "paths=$CHANGED" >> "$GITHUB_OUTPUT" | |
| echo "Mutating: $CHANGED" | |
| - name: Run mutmut on changed files | |
| if: steps.diff.outputs.skip != 'true' | |
| continue-on-error: true # advisory - score reported, not enforced | |
| env: | |
| DIFF_PATHS: ${{ steps.diff.outputs.paths }} | |
| run: | | |
| uv sync --group dev | |
| # mutmut 3.x reads paths_to_mutate from pyproject.toml / | |
| # mutmut_config.py instead of CLI; we rewrite the config to | |
| # the diff paths for this run only, then revert. | |
| cp mutmut_config.py mutmut_config.py.bak | |
| PATHS=$(echo "${DIFF_PATHS}" | tr ',' '\n' | sed 's/.*/ "&",/') | |
| { | |
| echo "paths_to_mutate = [" | |
| echo "$PATHS" | |
| echo "]" | |
| echo "test_command = \"python -m pytest tests/unit/ -x -q --no-header --override-ini=addopts=\"" | |
| echo "tests_dir = \"tests/unit/\"" | |
| } > mutmut_config.py | |
| uv run mutmut run || true | |
| mv mutmut_config.py.bak mutmut_config.py | |
| uv run mutmut results || true | |
| diff-coverage: | |
| # LEVEL 1 of the coverage ratchet - diff-cover report: lines touched in | |
| # this PR are compared with the committed diff-coverage floor. The floor lives | |
| # in .coverage-baseline.json (key: diff_coverage_floor_percent) so it | |
| # is a single source of truth that the weekly bump workflow nudges up | |
| # over time (see docs/operations/coverage-ratchet.md). Reuses the | |
| # coverage.xml uploaded by the main test job. | |
| # | |
| # ADVISORY: the diff-cover step is continue-on-error, so this report's | |
| # result stays `success` even when diff coverage is below the floor. | |
| # The report is outside the CI-gate `needs` set until PR coverage | |
| # artifacts are reliable enough for a blocking threshold. | |
| name: Diff coverage report | |
| needs: [test] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| if: github.event_name == 'pull_request' | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| fetch-depth: 0 | |
| - uses: ./.github/actions/bootstrap | |
| - name: Download coverage report | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| name: coverage-report | |
| continue-on-error: true # main may not have generated coverage on the PR's commit | |
| - name: Resolve diff-coverage floor from baseline | |
| id: floor | |
| # Single source of truth: the floor the weekly bump nudges up. | |
| # Falls back to 80 if the baseline file is somehow absent so a | |
| # missing file never silently disables the gate. | |
| run: | | |
| if [ -f .coverage-baseline.json ]; then | |
| floor=$(uv run python scripts/coverage_ratchet.py show-floor \ | |
| --baseline .coverage-baseline.json) | |
| else | |
| echo "::warning::.coverage-baseline.json missing; defaulting diff floor to 80" | |
| floor=80 | |
| fi | |
| echo "value=${floor}" >> "$GITHUB_OUTPUT" | |
| echo "Diff-coverage floor: ${floor}%" | |
| - name: Run diff-cover | |
| continue-on-error: true # advisory until all PRs reliably have coverage.xml | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| FLOOR: ${{ steps.floor.outputs.value }} | |
| run: | | |
| if [ -f coverage.xml ]; then | |
| uv run diff-cover coverage.xml \ | |
| --compare-branch="origin/${BASE_REF}" \ | |
| --fail-under="${FLOOR}" \ | |
| --markdown-report diff-coverage.md | |
| cat diff-coverage.md >> "$GITHUB_STEP_SUMMARY" || true | |
| else | |
| echo "::warning::No coverage.xml found - skipping diff-cover" | |
| fi | |
| pyright-strict-zone: | |
| # Pyright strict mode against the security and protocols.cluster | |
| # subtrees. The repo-wide pyright run stays advisory (basic mode | |
| # via tool.pyright); the strict zone fails PR on any new error | |
| # in the listed packages. | |
| name: Pyright strict (security + cluster) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| - name: Run pyright strict on the curated allow-list | |
| # The strict-zone allow-list is in pyrightconfig.strict.json. The | |
| # repo-wide pyright run uses pyproject.toml [tool.pyright]. Add | |
| # files to the allow-list as they reach strict cleanliness. | |
| run: uv run pyright --project pyrightconfig.strict.json | |
| # ─── Slow checks (cancel old runs, 15-45 min) ───────────────────────── | |
| adapter-integration: | |
| # End-to-end adapter tests against the fake-CLI harness in | |
| # tests/integration/fake_cli. Spawns real subprocesses (no Popen | |
| # mocks) so PATH-resolution, env filtering, exit-code mapping, and | |
| # output capture are exercised against actual fork/exec. Skipped on | |
| # Windows because the wrappers are POSIX shell scripts and the | |
| # adapters use ``start_new_session=True``; unit tests cover the same | |
| # argv/env logic on Windows via mocked Popen. | |
| # | |
| # macOS coverage moved to the adapter-integration-macos job below | |
| # (gated by determine-changes.outputs.macos_sensitive, the | |
| # `macos-needed` label, or push events) to relieve the hosted macOS | |
| # runner pool during burst-merge waves - see #1468. The | |
| # ci-macos-nightly.yml workflow runs the full macOS matrix daily as | |
| # the safety-net for regressions that slip past the path gate. | |
| name: Adapter integration (fake-CLI) | |
| needs: [lint] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.13"] | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Run fake-CLI adapter integration tests | |
| run: uv run pytest tests/integration/test_adapter_e2e.py -x -q --timeout=60 | |
| adapter-integration-macos: | |
| # macOS half of adapter-integration. Gated on three conditions (see | |
| # #1468): | |
| # 1. push events (incl. merges to main) always run macOS so the | |
| # release-train sees a fresh signal on every commit that | |
| # reaches main; | |
| # 2. PRs whose diff touches macOS-sensitive paths (the planner | |
| # sets macos_sensitive=true); | |
| # 3. PRs that carry the `macos-needed` label (operator opt-in for | |
| # cross-platform work that the path filter cannot detect). | |
| # Otherwise this job is skipped on PRs and ci-macos-nightly.yml | |
| # provides the safety net. | |
| name: Adapter integration (fake-CLI, macOS) | |
| needs: [lint, determine-changes] | |
| if: >- | |
| github.event_name == 'push' || | |
| needs.determine-changes.outputs.macos_sensitive == 'true' || | |
| contains(github.event.pull_request.labels.*.name, 'macos-needed') | |
| runs-on: macos-latest | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.13"] | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Run fake-CLI adapter integration tests | |
| run: uv run pytest tests/integration/test_adapter_e2e.py -x -q --timeout=60 | |
| test: | |
| # Sharded across 4 parallel runners per (os, python) cell. The unit | |
| # suite runs each of ~1.4k test files in its own subprocess (the | |
| # OOM-avoidance model documented in the coverage step below); per-file | |
| # Python startup + full-package import is a fixed ~2.7s/file, so at | |
| # 1.4k files / 4 local workers a single runner spent 25+ min purely on | |
| # startup churn (the file count crossed a threshold when discovery | |
| # widened to rglob). Fanning the file list out over TEST_SHARD_COUNT | |
| # runners (each runs `run_tests.py --shard i/N`, a deterministic | |
| # disjoint slice) cuts each runner to ~1/4 of the files. The matrix | |
| # `.result` ci-gate reads is `failure` if ANY shard cell fails, so all | |
| # shards are still required - no shard can silently skip. | |
| name: Test (${{ matrix.os }}, Python ${{ matrix.python-version }}, shard ${{ matrix.shard }}) | |
| needs: [lint] # fast-fail: don't waste 45min if lint fails | |
| runs-on: ${{ matrix.os }} | |
| # Per-shard the unit suite is ~1/4 of the old wall time. Push-time | |
| # coverage is collected during the existing ubuntu/3.13 shard runs and | |
| # combined by coverage-report below, so no shard performs a serial | |
| # full-suite coverage rerun. | |
| timeout-minutes: 90 | |
| permissions: | |
| contents: read | |
| env: | |
| # Single source of truth for the shard count. The `--shard i/N` | |
| # denominator below and the per-shard slice both key off this; bump | |
| # it (and the `shard:` list) together to rescale the fan-out. | |
| TEST_SHARD_COUNT: "4" | |
| # Main push coverage runs are slower than local file runs; keep the | |
| # per-file guard, but allow heavy adapter contract files to finish. | |
| BERNSTEIN_TEST_FILE_TIMEOUT_SECONDS: "600" | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| # macOS removed from the default matrix to relieve hosted-runner | |
| # saturation during burst-merge waves (see #1468). The test-macos | |
| # job below runs the same suite on macOS when the diff is | |
| # macOS-sensitive, the PR carries the `macos-needed` label, or | |
| # the event is a push. ci-macos-nightly.yml provides a daily | |
| # safety-net run of the full macOS matrix. | |
| os: [ubuntu-latest, windows-latest] | |
| python-version: ["3.12", "3.13"] | |
| # Fan the per-file suite out across 4 deterministic shards per | |
| # cell. Keep this list in sync with TEST_SHARD_COUNT above. | |
| shard: [1, 2, 3, 4] | |
| exclude: | |
| # Coverage/JUnit upload only on ubuntu; skip duplicate slow jobs on Windows for 3.12 | |
| - os: windows-latest | |
| python-version: "3.12" | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Fetch base ref for impacted-test selection | |
| if: github.event_name == 'pull_request' && runner.os != 'Windows' | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| run: | | |
| git fetch --no-tags --depth=1 origin \ | |
| "refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" | |
| - name: Run isolated test suite (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| EVENT_NAME: ${{ github.event_name }} | |
| PYTHON_VERSION: ${{ matrix.python-version }} | |
| SHARD: ${{ matrix.shard }} | |
| SHARD_COUNT: ${{ env.TEST_SHARD_COUNT }} | |
| run: | | |
| # `--shard i/N` runs a deterministic disjoint slice of the | |
| # discovered (and, on PRs, affected) file list. On PRs the | |
| # affected set is sharded too, so each runner runs ~1/N of the | |
| # impacted files. | |
| coverage_args=() | |
| if [ "${EVENT_NAME}" = "push" ] && [ "${RUNNER_OS}" = "Linux" ] && [ "${PYTHON_VERSION}" = "3.13" ]; then | |
| coverage_args=(--coverage) | |
| fi | |
| if [ "${EVENT_NAME}" = "pull_request" ] && \ | |
| git rev-parse --verify "refs/remotes/origin/${BASE_REF}" >/dev/null 2>&1; then | |
| uv run python scripts/run_tests.py --parallel 4 \ | |
| "${coverage_args[@]}" \ | |
| --shard "${SHARD}/${SHARD_COUNT}" \ | |
| --affected "refs/remotes/origin/${BASE_REF}" | |
| else | |
| uv run python scripts/run_tests.py --parallel 4 \ | |
| "${coverage_args[@]}" \ | |
| --shard "${SHARD}/${SHARD_COUNT}" | |
| fi | |
| - name: Run isolated test suite (Windows) | |
| if: runner.os == 'Windows' | |
| continue-on-error: true # Windows has Unix-only tests (chmod, SIGKILL) that are skipped but some may remain | |
| shell: pwsh | |
| env: | |
| SHARD: ${{ matrix.shard }} | |
| SHARD_COUNT: ${{ env.TEST_SHARD_COUNT }} | |
| run: uv run python scripts/run_tests.py -x --parallel 4 --shard "${env:SHARD}/${env:SHARD_COUNT}" | |
| - name: Run capability-matrix spawn-refusal integration tests (Linux/macOS) | |
| # Pinned to shard 1 so this runs exactly once per (os, python) cell | |
| # rather than once per shard - the unit suite is sharded, this | |
| # integration probe is not. Lethal-trifecta integration coverage: | |
| # every supported OS/Python cell exercises the AgentSpawner | |
| # spawn-refusal path so we catch regressions in the structural rule | |
| # before they ship. See | |
| # tests/integration/test_capability_matrix_spawn_refusal.py. | |
| if: runner.os != 'Windows' && matrix.shard == 1 | |
| run: | | |
| uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py \ | |
| -x -q --tb=short --timeout=120 | |
| - name: Run capability-matrix spawn-refusal integration tests (Windows) | |
| # Pinned to shard 1 (see Linux/macOS counterpart): one run per cell. | |
| if: runner.os == 'Windows' && matrix.shard == 1 | |
| shell: pwsh | |
| run: | | |
| uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py ` | |
| -x -q --tb=short --timeout=120 | |
| - name: Prepare coverage shard artifact | |
| if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event_name == 'push' | |
| run: | | |
| if [ ! -f .coverage ]; then | |
| echo "::error::.coverage was not generated for shard ${{ matrix.shard }}" | |
| exit 1 | |
| fi | |
| mv .coverage ".coverage.${{ matrix.shard }}" | |
| - name: Upload coverage shard artifact | |
| if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && github.event_name == 'push' | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: coverage-data-${{ matrix.shard }} | |
| path: .coverage.${{ matrix.shard }} | |
| if-no-files-found: error | |
| include-hidden-files: true | |
| retention-days: 1 | |
| coverage-report: | |
| name: Coverage report | |
| needs: [test] | |
| if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| with: | |
| python-version: "3.13" | |
| - name: Download coverage shard artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| pattern: coverage-data-* | |
| path: coverage-shards | |
| merge-multiple: true | |
| - name: Merge coverage shards | |
| run: | | |
| shard_count=$(find coverage-shards -name '.coverage.*' -type f | wc -l | tr -d ' ') | |
| if [ "$shard_count" -lt 4 ]; then | |
| echo "::error::expected 4 coverage shard files, found $shard_count" | |
| find coverage-shards -maxdepth 2 -type f -print | |
| exit 1 | |
| fi | |
| uv run python -m coverage combine coverage-shards/.coverage.* | |
| uv run python -m coverage xml --ignore-errors -o coverage.xml | |
| test -s coverage.xml | |
| - name: Upload coverage report artifact | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: coverage-report | |
| path: coverage.xml | |
| if-no-files-found: error | |
| retention-days: 1 | |
| - name: Upload coverage to Codecov | |
| uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0 | |
| with: | |
| files: coverage.xml | |
| fail_ci_if_error: false | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| test-macos: | |
| # macOS half of the test matrix, split out for #1468 (macOS hosted | |
| # runner saturation). Runs unconditionally on push (so every commit | |
| # that reaches main has a fresh macOS signal), and on PRs only when | |
| # one of the gate conditions is met: | |
| # - the planner classified the diff as macos_sensitive (touched | |
| # a module with `sys.platform == "darwin"` branches, the | |
| # tunnels driver layer, the daemon installer, the runtime | |
| # state code, the macOS clipboard/notifications wrappers, or | |
| # ci.yml / ci-macos-nightly.yml themselves); | |
| # - the PR carries the `macos-needed` label (operator opt-in for | |
| # cross-platform work the path filter cannot detect). | |
| # Otherwise this job is skipped on the PR and ci-macos-nightly.yml | |
| # provides the safety-net coverage at 06:00 UTC each day. | |
| # | |
| # Coverage / JUnit / Codecov uploads are NOT mirrored here; they | |
| # remain ubuntu-only (matches the previous matrix gating). | |
| # | |
| # macOS sharding decision (this PR): the ubuntu `test` job fans out | |
| # across a `shard` matrix dimension, but macOS CANNOT - this job's | |
| # `name:` MUST stay the literal string below (branch-protection | |
| # required-context + required-check-canary.yml both pin it; a matrix | |
| # `shard` dimension would template the name and break the lock). So | |
| # instead of sharding macOS, we shrink its per-push workload to a | |
| # single deterministic quarter of the file list (`--shard 1/MACOS | |
| # _PUSH_SHARD_COUNT`). This fits the time budget without the 90-min | |
| # wall the full macOS suite hit, keeps a real (deterministic, ~1/4) | |
| # macOS signal on every commit that lands on main, and leaves | |
| # ci-macos-nightly.yml to run the FULL macOS matrix as the safety | |
| # net. On PRs the job already runs `--affected` (impacted slice), so | |
| # macos_sensitive PRs still exercise exactly the touched code on macOS. | |
| # The merge queue (merge_group) skips this job entirely (see the | |
| # ci-gate MACOS_SKIP_EVENTS handling) - the post-merge push is what | |
| # carries the macOS signal. | |
| # | |
| # Literal job name -- NOT templated. The branch-protection required | |
| # context for macOS test runs depends on this exact string; when the | |
| # job is skipped via the `if:` gate, GitHub posts the templated form | |
| # verbatim, which never matches a required-context rule. The literal | |
| # form keeps the name resolvable in every state (success, fail, skip). | |
| # required-check-canary.yml asserts this remains literal. | |
| name: Test (macos-latest, Python 3.13) | |
| needs: [lint, determine-changes] | |
| if: >- | |
| github.event_name == 'push' || | |
| needs.determine-changes.outputs.macos_sensitive == 'true' || | |
| contains(github.event.pull_request.labels.*.name, 'macos-needed') | |
| runs-on: macos-latest | |
| timeout-minutes: 90 | |
| permissions: | |
| contents: read | |
| env: | |
| # Per-push macOS runs only shard 1 of this many - a deterministic | |
| # ~1/4 subset of the file list. ci-macos-nightly.yml runs the full | |
| # matrix daily as the safety net. | |
| MACOS_PUSH_SHARD_COUNT: "4" | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.13"] | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: ./.github/actions/bootstrap | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Fetch base ref for impacted-test selection | |
| if: github.event_name == 'pull_request' | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| run: | | |
| git fetch --no-tags --depth=1 origin \ | |
| "refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" | |
| - name: Run isolated test suite | |
| env: | |
| BASE_REF: ${{ github.base_ref }} | |
| EVENT_NAME: ${{ github.event_name }} | |
| MACOS_PUSH_SHARD_COUNT: ${{ env.MACOS_PUSH_SHARD_COUNT }} | |
| run: | | |
| # On PRs: run only the affected slice (impacted by the diff). | |
| # On push to main: run a single deterministic quarter of the | |
| # file list (`--shard 1/N`) so the macOS cell fits its time | |
| # budget; ci-macos-nightly.yml runs the full matrix daily. | |
| if [ "${EVENT_NAME}" = "pull_request" ] && \ | |
| git rev-parse --verify "refs/remotes/origin/${BASE_REF}" >/dev/null 2>&1; then | |
| uv run python scripts/run_tests.py --parallel 4 --affected "refs/remotes/origin/${BASE_REF}" | |
| else | |
| uv run python scripts/run_tests.py --parallel 4 \ | |
| --shard "1/${MACOS_PUSH_SHARD_COUNT}" | |
| fi | |
| - name: Run capability-matrix spawn-refusal integration tests | |
| run: | | |
| uv run pytest tests/integration/test_capability_matrix_spawn_refusal.py \ | |
| -x -q --tb=short --timeout=120 | |
| # ─── Post-pipeline (conditional, never cancelled) ────────────────────── | |
| autofix: | |
| name: Auto-fix lint | |
| runs-on: ubuntu-latest | |
| needs: [lint, repo-hygiene] | |
| if: github.event_name == 'push' && github.ref == 'refs/heads/main' && github.actor != 'bernstein[bot]' && github.actor != 'bernstein-orchestrator[bot]' | |
| continue-on-error: true | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: write | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| fetch-depth: 5 | |
| - name: Check for autofix loop | |
| id: loop_check | |
| run: | | |
| RECENT=$(git log --oneline -3 --format='%s' | grep -c "style: auto-fix" || true) | |
| if [ "$RECENT" -ge 3 ]; then | |
| echo "::warning::Autofix loop detected - last 3 commits are all auto-fix. Skipping." | |
| echo "skip=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| - if: steps.loop_check.outputs.skip != 'true' | |
| uses: ./.github/actions/bootstrap | |
| - name: Auto-fix ruff | |
| if: steps.loop_check.outputs.skip != 'true' | |
| run: | | |
| uv run ruff check src/ --fix --unsafe-fixes || true | |
| uv run ruff format src/ | |
| - name: Purge tracked .sdd files | |
| if: steps.loop_check.outputs.skip != 'true' | |
| run: | | |
| TRACKED="$(git ls-files '.sdd')" | |
| if [ -n "$TRACKED" ]; then | |
| echo "$TRACKED" | xargs git rm --cached -- | |
| fi | |
| - name: Commit and push fixes | |
| if: steps.loop_check.outputs.skip != 'true' | |
| run: | | |
| git add src/ tests/ scripts/ | |
| git diff --cached --quiet && echo "Nothing to fix" && exit 0 | |
| git config user.name "bernstein[bot]" | |
| git config user.email "bernstein-bot@users.noreply.github.com" | |
| git commit -m "style: auto-fix ruff lint and format" | |
| git push | |
| close-ci-issues: | |
| name: Close resolved CI issues | |
| runs-on: ubuntu-latest | |
| needs: [ci-gate] | |
| if: > | |
| success() && | |
| needs.ci-gate.result == 'success' && | |
| github.ref == 'refs/heads/main' && | |
| github.event_name == 'push' | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| issues: write | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - name: Close ci-fix issues | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| GATE_RESULT="${{ needs.ci-gate.result }}" | |
| RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| ISSUES=$(gh issue list --label ci-fix --state open --json number --jq '.[].number' 2>/dev/null || echo "") | |
| for NUM in $ISSUES; do | |
| gh issue close "$NUM" --comment "CI aggregate gate result: ${GATE_RESULT}. Run: ${RUN_URL}. Commit: \`${{ github.sha }}\`." || true | |
| done | |
| # Note: the previous `self-heal` issue-creating job was superseded by the | |
| # `bernstein-ci-fix` workflow (.github/workflows/bernstein-ci-fix.yml), | |
| # which on a CI failure first attempts an auto-heal PR and only falls | |
| # back to opening a `ci-fix` issue when no diff is produced. | |
| pr-summary: | |
| name: PR CI summary | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'pull_request' | |
| needs: | |
| - repo-hygiene | |
| - lint | |
| - typecheck | |
| - test | |
| - test-macos | |
| - spelling | |
| - dead-code | |
| - actionlint | |
| - dist-size | |
| - property-tests | |
| - snapshot-tests | |
| - schemathesis-smoke | |
| - semgrep | |
| - bandit | |
| - pip-audit | |
| - beartype | |
| - pyright-strict-zone | |
| timeout-minutes: 5 | |
| permissions: | |
| pull-requests: write | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 | |
| with: | |
| script: | | |
| const marker = '<!-- ci-summary -->'; | |
| const jobs = [ | |
| { name: 'Repo hygiene', result: '${{ needs.repo-hygiene.result }}' }, | |
| { name: 'Lint', result: '${{ needs.lint.result }}' }, | |
| { name: 'Type check report', result: '${{ needs.typecheck.result }}' }, | |
| { name: 'Tests', result: '${{ needs.test.result }}' }, | |
| { name: 'Tests (macOS)', result: '${{ needs.test-macos.result }}' }, | |
| { name: 'Spelling', result: '${{ needs.spelling.result }}' }, | |
| { name: 'Dead code', result: '${{ needs.dead-code.result }}' }, | |
| { name: 'Workflow lint', result: '${{ needs.actionlint.result }}' }, | |
| { name: 'Dist size', result: '${{ needs.dist-size.result }}' }, | |
| { name: 'Property (Hypothesis)', result: '${{ needs.property-tests.result }}' }, | |
| { name: 'Snapshot (syrupy)', result: '${{ needs.snapshot-tests.result }}' }, | |
| { name: 'Schemathesis smoke', result: '${{ needs.schemathesis-smoke.result }}' }, | |
| { name: 'Semgrep custom', result: '${{ needs.semgrep.result }}' }, | |
| { name: 'Bandit', result: '${{ needs.bandit.result }}' }, | |
| { name: 'pip-audit', result: '${{ needs.pip-audit.result }}' }, | |
| { name: 'Beartype', result: '${{ needs.beartype.result }}' }, | |
| { name: 'Pyright strict zone', result: '${{ needs.pyright-strict-zone.result }}' }, | |
| ]; | |
| const icon = (r) => r === 'success' ? '✅' : r === 'failure' ? '❌' : r === 'skipped' ? '⏭️' : '⚠️'; | |
| let body = `${marker}\n### CI Summary\n\n`; | |
| body += '| Check | Result |\n|-------|--------|\n'; | |
| for (const j of jobs) { | |
| body += `| ${j.name} | ${icon(j.result)} ${j.result} |\n`; | |
| } | |
| body += '\nCoverage and detailed reports are available via Codecov and the Checks tab.'; | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const existing = comments.find(c => c.body.includes(marker)); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existing.id, | |
| body, | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body, | |
| }); | |
| } | |
| # ─── Aggregator gate (closes #1273) ──────────────────────────────────── | |
| # | |
| # SINGLE required status check for branch protection. | |
| # | |
| # Why this exists: | |
| # * `ci.yml` has 20+ jobs (incl. an OS x Python matrix that fans out | |
| # `test (ubuntu-latest, 3.12)` etc.) - listing every contextual name | |
| # in branch-protection is fragile because matrix names drift with | |
| # each matrix change. | |
| # * A `skipped` job auto-passes branch protection. So does a job that | |
| # was never enqueued because an upstream `needs:` was `cancelled`. | |
| # Either condition can let a red commit reach `main`. | |
| # | |
| # This job fails on ANY non-success result - including `cancelled`, | |
| # `timed_out`, `action_required`, or a job that never ran. `skipped` | |
| # passes ONLY when the planner job (``determine-changes``) classified | |
| # the diff in a way that makes the skip intentional (docs-only PRs, | |
| # event-gated jobs, etc.). A `skipped` result for a job that should | |
| # have run is treated as a failure - pattern borrowed from pypa/pip's | |
| # CI aggregator (see PR #1273 for the discussion). | |
| # | |
| # Operator: after this PR merges, replace every required check in | |
| # branch protection with this single context (name shown in UI: | |
| # ``CI gate``). The exact `gh api` invocation is in PR #1273's body. | |
| # | |
| # Excluded from `needs:`: | |
| # * `autofix` - runs only on push to main, mutates the tree | |
| # * `close-ci-issues` - post-gate issue update, must not gate merges | |
| # * `pr-summary` - cosmetic PR comment, must not gate merges | |
| # * `typecheck` - advisory report while repo-wide pyright is being typed | |
| # * `mutmut-diff` - advisory report until mutation score enforcement is enabled | |
| # * `diff-coverage` - advisory report until PR coverage artifacts are reliable | |
| # * `ci-gate` itself - would deadlock the dependency graph | |
| ci-gate: | |
| name: CI gate | |
| runs-on: ubuntu-latest | |
| # `always()` ensures the gate fires even when an upstream job is | |
| # `cancelled`. `!cancelled()` lets the gate itself be cancelled when | |
| # the user cancels the whole workflow run (so we don't try to | |
| # report "cancelled" on a manually aborted run). | |
| if: always() && !cancelled() | |
| needs: | |
| - determine-changes | |
| - repo-hygiene | |
| - lint | |
| - spelling | |
| - actionlint | |
| - lineage-gate | |
| - dead-code | |
| - dist-size | |
| - install-smoke-pipx | |
| - install-smoke-uv | |
| - property-tests | |
| - snapshot-tests | |
| - schemathesis-smoke | |
| - semgrep | |
| - bandit | |
| - pip-audit | |
| - beartype | |
| - pyright-strict-zone | |
| - adapter-integration | |
| - adapter-integration-macos | |
| - test | |
| - coverage-report | |
| - test-macos | |
| timeout-minutes: 3 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - id: roll-up | |
| name: Roll up needs.*.result with conditional allowed-skips | |
| env: | |
| NEEDS_JSON: ${{ toJSON(needs) }} | |
| PLAN_JSON: ${{ toJSON(needs.determine-changes.outputs) }} | |
| EVENT_NAME: ${{ github.event_name }} | |
| run: | | |
| # Write JSONs to disk so the python heredoc can read them | |
| # without worrying about shell quoting on multiline content. | |
| printf '%s' "$NEEDS_JSON" > results.json | |
| printf '%s' "$PLAN_JSON" > plan.json | |
| python3 - <<'PY' | |
| import json, os, sys | |
| data = json.load(open("results.json")) | |
| plan = json.load(open("plan.json")) | |
| event = os.environ.get("EVENT_NAME", "") | |
| # Jobs that intentionally skip on docs-only changes (no python | |
| # / tests / workflows touched). Mirrors paths-ignore at the top. | |
| DOCS_ONLY_SKIPPABLE = { | |
| "test", | |
| "schemathesis-smoke", | |
| "snapshot-tests", | |
| "property-tests", | |
| "beartype", | |
| "adapter-integration", | |
| "pyright-strict-zone", | |
| "semgrep", | |
| "bandit", | |
| "pip-audit", | |
| "dead-code", | |
| "dist-size", | |
| "install-smoke-pipx", | |
| "install-smoke-uv", | |
| } | |
| # macOS-gated jobs (see #1468): on PRs these are skipped unless | |
| # the diff is macos_sensitive or the PR carries the | |
| # `macos-needed` label. Always run on push events. | |
| MACOS_GATED = {"test-macos", "adapter-integration-macos"} | |
| # Push-only jobs are required on native main pushes but skip | |
| # intentionally on PR, workflow_dispatch, and merge_group runs. | |
| PUSH_ONLY = {"coverage-report"} | |
| # Events under which a macOS-gated skip is intentional. PRs and | |
| # manual dispatch already gate macOS behind macos_sensitive / | |
| # label. merge_group must be included too: a merge queue runs CI | |
| # on a synthetic merge_group ref where `github.event.pull_request` | |
| # is null, so the `macos-needed` label and `push`-only branches of | |
| # the job `if:` can never be true and these jobs always skip. The | |
| # merged commit still triggers a native `push` to main that runs | |
| # the full macOS suite un-gated, and ci-macos-nightly.yml covers | |
| # regression -- so tolerating the skip here is what keeps the queue | |
| # from wedging without losing macOS coverage on what actually lands. | |
| MACOS_SKIP_EVENTS = ("pull_request", "workflow_dispatch", "merge_group") | |
| docs_only = plan.get("docs_only") == "true" | |
| macos_sensitive = plan.get("macos_sensitive") == "true" | |
| # Read the PR labels via the event payload. The aggregator | |
| # runs inside ci.yml so the same event.pull_request.labels | |
| # array used by the job `if:` is available here too. The | |
| # GITHUB_EVENT_PATH file is the canonical source. | |
| macos_labelled = False | |
| try: | |
| with open(os.environ["GITHUB_EVENT_PATH"]) as fh: | |
| payload = json.load(fh) | |
| labels = (payload.get("pull_request") or {}).get("labels") or [] | |
| macos_labelled = any( | |
| (lbl.get("name") == "macos-needed") for lbl in labels | |
| ) | |
| except Exception: | |
| macos_labelled = False | |
| bad = [] | |
| for name, info in data.items(): | |
| r = info.get("result") | |
| if r == "success": | |
| continue | |
| if r == "skipped": | |
| if name in PUSH_ONLY and event != "push": | |
| continue | |
| if docs_only and name in DOCS_ONLY_SKIPPABLE: | |
| continue | |
| # macOS-gated jobs are allowed to skip on: | |
| # - PRs when diff is not macos_sensitive AND no | |
| # `macos-needed` label. | |
| # - workflow_dispatch (manual re-runs from hotfix | |
| # agents) when diff is not macos_sensitive. | |
| # - merge_group (merge-queue ref) where the label/push | |
| # branches of the job `if:` cannot be true; the | |
| # post-merge push to main runs macOS un-gated. | |
| # On native push events macOS must run. | |
| # Nightly ci-macos-nightly.yml covers regression. | |
| if ( | |
| name in MACOS_GATED | |
| and event in MACOS_SKIP_EVENTS | |
| and not macos_sensitive | |
| and not macos_labelled | |
| ): | |
| continue | |
| bad.append((name, r)) | |
| if bad: | |
| print("::error::CI gate FAILED - these jobs were not success " | |
| "and not intentionally skipped:") | |
| for n, r in bad: | |
| print(f" - {n}: result={r}") | |
| sys.exit(1) | |
| print(f"CI gate: all required jobs passed " | |
| f"(or intentionally skipped). docs_only={docs_only}, event={event}") | |
| for n, info in sorted(data.items()): | |
| print(f" {n}: {info.get('result')}") | |
| PY |