Adapter contract drift #223
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Adapter contract drift | |
| # Capability-only drift detection for adapter CLIs (refs #1291). | |
| # | |
| # For every contract under tests/contract/contracts/<name>.yaml we: | |
| # 1. Install the upstream CLI per contract.install.spec. | |
| # 2. Invoke ``bernstein adapters contract-check <name> --json``, which | |
| # verifies the CLI's ``--help`` still advertises every required | |
| # flag / subcommand. When a contract names a secret_env and that | |
| # secret is configured on the runner, the configured model-list | |
| # command is run too. | |
| # 3. Upload the per-adapter result JSON as an artifact. | |
| # | |
| # An aggregator job downloads every artifact. If any adapter failed it | |
| # opens (or refreshes) an issue titled "Adapter contract drift - <list>" | |
| # and fails the workflow. Drift is a hard fail per the refined design; | |
| # there is no batched auto-PR. | |
| # | |
| # Triggers: | |
| # * Three times daily on schedule so upstream releases surface fast. | |
| # * On PRs touching adapters or contracts so contract edits get | |
| # validated immediately. | |
| # * Manual dispatch for operator-driven re-runs. | |
| on: | |
| schedule: | |
| - cron: "0 6,14,22 * * *" | |
| pull_request: | |
| paths: | |
| - "src/bernstein/adapters/**" | |
| - "tests/contract/**" | |
| - ".github/workflows/adapter-contract-drift.yml" | |
| workflow_dispatch: | |
| concurrency: | |
| group: adapter-contract-drift-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| jobs: | |
| check: | |
| name: ${{ matrix.adapter }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| adapter: | |
| - claude | |
| - gemini | |
| - codex | |
| - aider | |
| - opencode | |
| - aichat | |
| - crush | |
| - amp | |
| - continue_dev | |
| - plandex | |
| - goose | |
| - q_dev | |
| - gptme | |
| - forge | |
| - qwen | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6.3.0 | |
| with: | |
| # aider-chat does not yet support Python 3.14; pin the aider | |
| # matrix row to 3.13 until upstream lands 3.14 compatibility. | |
| python-version: ${{ matrix.adapter == 'aider' && '3.13' || '3.14' }} | |
| - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 | |
| with: | |
| node-version: "24" | |
| - uses: ./.github/actions/bootstrap | |
| - name: Install bernstein | |
| # ``bernstein.cli.main`` imports cryptography/lineage transitively | |
| # at module load, so we install the package with its runtime | |
| # dependencies. Cache hits keep this under 30s per matrix job. | |
| run: | | |
| set -euo pipefail | |
| uv pip install --system -e . | |
| - name: Read contract metadata | |
| id: contract | |
| run: | | |
| set -euo pipefail | |
| python - <<'PY' >> "$GITHUB_OUTPUT" | |
| import sys | |
| import yaml | |
| from pathlib import Path | |
| p = Path("tests/contract/contracts/${{ matrix.adapter }}.yaml") | |
| data = yaml.safe_load(p.read_text()) | |
| install = data.get("install") or {} | |
| print(f"method={install.get('method','')}") | |
| print(f"spec={install.get('spec','')}") | |
| print(f"binary={data.get('binary','')}") | |
| PY | |
| - name: Install upstream CLI | |
| # Scorecard pinned-dependencies (NPM/PIP/PIPX): the install SPEC | |
| # is sourced from tests/contract/contracts/*.yaml and is the | |
| # adapter's *upstream-recommended* version pin. Hash-pinning at | |
| # this layer is impossible without forking every upstream CLI's | |
| # release pipeline; the contract YAML *is* the pinning surface. | |
| env: | |
| METHOD: ${{ steps.contract.outputs.method }} | |
| SPEC: ${{ steps.contract.outputs.spec }} | |
| BINARY: ${{ steps.contract.outputs.binary }} | |
| run: | | |
| set -euo pipefail | |
| echo "Installing $BINARY via $METHOD: $SPEC" | |
| case "$METHOD" in | |
| npm) | |
| npm install -g "$SPEC" | |
| ;; | |
| pipx) | |
| python -m pip install --user pipx | |
| python -m pipx install "$SPEC" | |
| # pipx default user bin | |
| echo "$HOME/.local/bin" >> "$GITHUB_PATH" | |
| ;; | |
| curl) | |
| # Free-form install scripts vary wildly. We deliberately do | |
| # not curl-pipe-bash an arbitrary URL in CI; instead we | |
| # record that this adapter requires a release-asset install | |
| # and degrade to "binary not found", which the checker | |
| # reports without failing the job. | |
| echo "::notice::curl-installed CLI '$BINARY' is not auto-installed in CI; running help-only check" | |
| ;; | |
| cargo) | |
| # Cargo builds are slow; skip in CI and let the checker | |
| # report binary-not-found. | |
| echo "::notice::cargo-installed CLI '$BINARY' is not auto-installed in CI; running help-only check" | |
| ;; | |
| *) | |
| echo "::warning::unknown install method '$METHOD' for ${{ matrix.adapter }}" | |
| ;; | |
| esac | |
| - name: Run contract check | |
| id: check | |
| env: | |
| # When the secret exists on this runner the checker will use | |
| # it for the optional model-list check. When absent the | |
| # adapter degrades to help-only coverage. | |
| ANTHROPIC_API_KEY: ${{ secrets.ADAPTER_CONTRACT_ANTHROPIC_API_KEY }} | |
| OPENAI_API_KEY: ${{ secrets.ADAPTER_CONTRACT_OPENAI_API_KEY }} | |
| GEMINI_API_KEY: ${{ secrets.ADAPTER_CONTRACT_GEMINI_API_KEY }} | |
| ADAPTER: ${{ matrix.adapter }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p drift-out | |
| set +e | |
| bernstein adapters contract-check "$ADAPTER" --json > "drift-out/$ADAPTER.json" 2> "drift-out/$ADAPTER.err" | |
| rc=$? | |
| set -e | |
| echo "exit=$rc" >> "$GITHUB_OUTPUT" | |
| echo "--- stdout ---" | |
| cat "drift-out/$ADAPTER.json" || true | |
| echo "--- stderr ---" | |
| cat "drift-out/$ADAPTER.err" || true | |
| # Exit codes: | |
| # 0 - contract holds. | |
| # 2 - capability/model drift (hard fail by design). | |
| # 3 - upstream CLI runtime failure (--help broken in the | |
| # sandbox env). Not drift; surface as a warning so an | |
| # operator investigates the CLI install without | |
| # blocking unrelated PRs on a transient upstream issue. | |
| # Anything else means the checker itself broke; treat it as | |
| # a workflow error so we don't silently green-light drift. | |
| case "$rc" in | |
| 0) | |
| ;; | |
| 2) | |
| echo "::error::contract drift in $ADAPTER" | |
| exit 1 | |
| ;; | |
| 3) | |
| echo "::warning::contract-check exited 3 for $ADAPTER (upstream CLI runtime failure, not drift)" | |
| ;; | |
| *) | |
| echo "::error::contract-check exited $rc for $ADAPTER (checker error, not drift)" | |
| exit 1 | |
| ;; | |
| esac | |
| - name: Upload result | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: drift-${{ matrix.adapter }} | |
| path: drift-out/${{ matrix.adapter }}.json | |
| if-no-files-found: warn | |
| retention-days: 14 | |
| aggregate: | |
| name: Aggregate drift report | |
| needs: check | |
| if: always() | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| issues: write | |
| steps: | |
| - name: Harden runner (audit mode) | |
| uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 | |
| with: | |
| egress-policy: audit | |
| - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7 | |
| with: | |
| persist-credentials: false | |
| - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | |
| with: | |
| path: drift-out | |
| pattern: drift-* | |
| merge-multiple: true | |
| - name: Summarise drift | |
| id: summarise | |
| run: | | |
| set -euo pipefail | |
| python - <<'PY' >> "$GITHUB_OUTPUT" | |
| import json | |
| import os | |
| from pathlib import Path | |
| failed = [] | |
| help_only = [] | |
| details = [] | |
| for p in sorted(Path("drift-out").glob("*.json")): | |
| try: | |
| data = json.loads(p.read_text()) | |
| except Exception as exc: | |
| details.append(f"- {p.stem}: unreadable artifact ({exc})") | |
| failed.append(p.stem) | |
| continue | |
| adapter = data.get("adapter", p.stem) | |
| caps = data.get("capability_failures") or [] | |
| models = data.get("model_failures") or [] | |
| skipped = data.get("skipped_reason") or "" | |
| if caps or models: | |
| failed.append(adapter) | |
| for line in caps + models: | |
| details.append(f"- **{adapter}**: {line}") | |
| if skipped and not (caps or models): | |
| help_only.append(f"- {adapter}: {skipped}") | |
| # Multi-line outputs use the heredoc form. | |
| def emit(key: str, value: str) -> None: | |
| print(f"{key}<<EOF") | |
| print(value) | |
| print("EOF") | |
| emit("failed", ",".join(failed)) | |
| emit("details", "\n".join(details) or "(none)") | |
| emit("notes", "\n".join(help_only) or "(none)") | |
| PY | |
| - name: Open or refresh tracking issue on failure | |
| if: steps.summarise.outputs.failed != '' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| FAILED: ${{ steps.summarise.outputs.failed }} | |
| DETAILS: ${{ steps.summarise.outputs.details }} | |
| NOTES: ${{ steps.summarise.outputs.notes }} | |
| run: | | |
| set -euo pipefail | |
| TITLE="Adapter contract drift - ${FAILED}" | |
| BODY="Adapter contract drift detected on $(date -u +%Y-%m-%dT%H:%MZ). | |
| ## Capability / model failures | |
| ${DETAILS} | |
| ## Help-only coverage (informational) | |
| ${NOTES} | |
| Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| Refs #1291." | |
| EXISTING=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --search "in:title Adapter contract drift" \ | |
| --state open \ | |
| --json number,title \ | |
| --jq '.[0].number // ""') | |
| if [ -n "$EXISTING" ]; then | |
| echo "Refreshing existing tracking issue #$EXISTING" | |
| gh issue edit "$EXISTING" \ | |
| --repo "${{ github.repository }}" \ | |
| --title "$TITLE" --body "$BODY" | |
| else | |
| gh issue create \ | |
| --repo "${{ github.repository }}" \ | |
| --title "$TITLE" \ | |
| --label "ci,contract-drift,bot" \ | |
| --body "$BODY" | |
| fi | |
| - name: Auto-close tracking issue when all adapters pass | |
| # When the summariser finds no failures we close any open | |
| # "Adapter contract drift -" tracking issue so the inbox does | |
| # not stay red after the upstream regression is fixed. Idempotent: | |
| # if nothing is open this is a no-op. | |
| # | |
| # Limited to schedule / workflow_dispatch so a PR-time run cannot | |
| # close an issue that tracks main-only drift (PR may not have the | |
| # fix; only the scheduled main run is authoritative). | |
| if: > | |
| steps.summarise.outputs.failed == '' && | |
| (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| OPEN=$(gh issue list \ | |
| --repo "${{ github.repository }}" \ | |
| --search "in:title Adapter contract drift" \ | |
| --state open \ | |
| --json number \ | |
| --jq '.[].number') | |
| if [ -z "$OPEN" ]; then | |
| echo "::notice::No open adapter-contract-drift tracking issue to close." | |
| exit 0 | |
| fi | |
| for NUM in $OPEN; do | |
| echo "Closing tracking issue #${NUM} (all adapters green on run ${{ github.run_id }})." | |
| gh issue close "$NUM" \ | |
| --comment "All adapters reported clean on workflow run ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}." \ | |
| --reason completed || true | |
| done | |
| - name: Fail when any adapter failed | |
| if: steps.summarise.outputs.failed != '' | |
| env: | |
| FAILED: ${{ steps.summarise.outputs.failed }} | |
| run: | | |
| echo "::error::adapter contract drift: ${FAILED}" | |
| exit 1 |