Skip to content

Post-CI dispatcher #394

Post-CI dispatcher

Post-CI dispatcher #394

name: Post-CI dispatcher
# Single workflow_run: CI completed listener that consolidates the fanout
# previously paid out across five sibling workflows (auto-release, auto-heal,
# bernstein-ci-fix, bisect-on-red, telegram-notify). Each child is now a
# reusable workflow (on: workflow_call:) and runs only when the dispatcher's
# routing logic selects it.
#
# Routing summary:
# - telegram-notify -> fires on any non-success, non-skipped conclusion.
# - auto-release -> fires on the main branch only.
# - auto-heal -> fires on failure on main, gated on canonical repo
# and recursion guards.
# - bernstein-ci-fix -> fires on failure on main only when auto-heal did
# NOT open a heal PR. Prevents the parallel race
# that previously caused both heals to fight over
# the same SHA.
# - bisect-on-red -> fires on failure on main.
#
# Each child reusable workflow re-checks its own preconditions, so the
# dispatcher gates are best-effort routing rather than security boundaries.
#
# Safety note (zizmor dangerous-triggers): the workflow_run trigger is
# intentional and only reads metadata fields (conclusion, sha, branch,
# repository) into the dispatcher's job-level if: conditions. No
# attacker-controlled value reaches a run: script in this file. The child
# reusable workflows enforce their own canonical-repo and recursion guards.
on: # zizmor: ignore[dangerous-triggers]
workflow_run:
workflows: ["CI"]
types: [completed]
branches: [main]
concurrency:
group: post-ci-dispatcher-${{ github.event.workflow_run.head_sha }}
cancel-in-progress: false
# Workflow-level permissions stay minimal. Each reusable child below
# declares its own per-job `permissions:` block scoped to the union of
# the scopes that the called workflow's jobs request. A reusable
# workflow inherits the *calling job's* GITHUB_TOKEN permissions, and
# the GitHub Actions validator rejects the call at workflow_run boot
# (zero check_runs, conclusion: startup_failure) when the caller grants
# less than any callee job requests. The repo default for GITHUB_TOKEN
# is `read` only, so an empty `permissions: {}` at this level grants
# nothing and every callee with a `contents: write` /
# `pull-requests: write` / `issues: write` / `attestations: write` /
# `id-token: write` job failed the call before any job was scheduled.
# That was the cause of the silent startup_failure on every dispatcher
# run since the consolidation merged.
permissions: {}
jobs:
# Surface the upstream CI run metadata once so each child reusable
# workflow can receive it via workflow_call inputs. Computing this in
# a single boot is the entire reason this dispatcher exists.
meta:
name: Resolve upstream metadata
runs-on: ubuntu-latest
timeout-minutes: 2
permissions:
contents: read
outputs:
head_sha: ${{ steps.out.outputs.head_sha }}
short_sha: ${{ steps.out.outputs.short_sha }}
head_branch: ${{ steps.out.outputs.head_branch }}
head_repo: ${{ steps.out.outputs.head_repo }}
conclusion: ${{ steps.out.outputs.conclusion }}
run_id: ${{ steps.out.outputs.run_id }}
run_name: ${{ steps.out.outputs.run_name }}
html_url: ${{ steps.out.outputs.html_url }}
display_title: ${{ steps.out.outputs.display_title }}
actor_login: ${{ steps.out.outputs.actor_login }}
event: ${{ steps.out.outputs.event }}
steps:
- name: Harden runner (audit mode)
uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
with:
egress-policy: audit
- name: Capture event metadata
id: out
env:
HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
HEAD_REPO: ${{ github.event.workflow_run.head_repository.full_name }}
CONCLUSION: ${{ github.event.workflow_run.conclusion }}
RUN_ID: ${{ github.event.workflow_run.id }}
RUN_NAME: ${{ github.event.workflow_run.name }}
HTML_URL: ${{ github.event.workflow_run.html_url }}
DISPLAY_TITLE: ${{ github.event.workflow_run.display_title }}
ACTOR_LOGIN: ${{ github.event.workflow_run.actor.login }}
EVENT: ${{ github.event.workflow_run.event }}
run: |
set -euo pipefail
{
echo "head_sha=${HEAD_SHA}"
echo "short_sha=${HEAD_SHA:0:12}"
echo "head_branch=${HEAD_BRANCH}"
echo "head_repo=${HEAD_REPO}"
echo "conclusion=${CONCLUSION}"
echo "run_id=${RUN_ID}"
echo "run_name=${RUN_NAME}"
echo "html_url=${HTML_URL}"
echo "display_title=${DISPLAY_TITLE}"
echo "actor_login=${ACTOR_LOGIN}"
echo "event=${EVENT}"
} >> "$GITHUB_OUTPUT"
telegram-notify:
name: Telegram notify
needs: meta
if: >-
needs.meta.outputs.conclusion != 'success' &&
needs.meta.outputs.conclusion != 'skipped' &&
needs.meta.outputs.conclusion != 'neutral'
# Union of permissions the called workflow's jobs request.
permissions:
contents: read
actions: read
uses: ./.github/workflows/telegram-notify.yml
# Pass only the secrets this child actually reads. Avoids the
# `secrets: inherit` blanket pass that zizmor flags as
# `secrets-inherit`: every secret in the repo would otherwise be
# silently available to the called workflow.
secrets:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
with:
conclusion: ${{ needs.meta.outputs.conclusion }}
head_branch: ${{ needs.meta.outputs.head_branch }}
head_sha: ${{ needs.meta.outputs.head_sha }}
run_id: ${{ needs.meta.outputs.run_id }}
html_url: ${{ needs.meta.outputs.html_url }}
auto-release:
name: Auto-release
needs: meta
if: needs.meta.outputs.head_branch == 'main'
# Union of permissions the called workflow's jobs request: it tags
# the release commit (contents:write) and opens / closes the
# auto-release-skipped issue (issues:write).
permissions:
contents: write
issues: write
uses: ./.github/workflows/auto-release.yml
# auto-release only needs Telegram credentials for its alert job.
# GITHUB_TOKEN is provided automatically to reusable workflows.
secrets:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
with:
conclusion: ${{ needs.meta.outputs.conclusion }}
head_branch: ${{ needs.meta.outputs.head_branch }}
head_sha: ${{ needs.meta.outputs.head_sha }}
run_id: ${{ needs.meta.outputs.run_id }}
html_url: ${{ needs.meta.outputs.html_url }}
auto-heal:
name: Auto-heal v2
needs: meta
if: >-
needs.meta.outputs.conclusion == 'failure' &&
needs.meta.outputs.head_branch == 'main' &&
needs.meta.outputs.head_repo == github.repository &&
!startsWith(needs.meta.outputs.display_title, 'fix(ci-heal-v2):') &&
!startsWith(needs.meta.outputs.display_title, 'fix(ci-heal):') &&
needs.meta.outputs.actor_login != 'github-actions[bot]'
# Union of permissions the called workflow's jobs request: it pushes
# heal branches and opens heal PRs (contents:write,
# pull-requests:write), attests provenance on the heal commit
# (attestations:write, id-token:write), and reads upstream CI run
# metadata (actions:read).
permissions:
contents: write
pull-requests: write
attestations: write
id-token: write
actions: read
uses: ./.github/workflows/auto-heal.yml
# auto-heal optionally pings Telegram on heal outcomes. GITHUB_TOKEN
# is auto-provided. Both Telegram secrets are declared `required:
# false` on the called workflow so unset repos still run cleanly.
secrets:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
GLITCHTIP_DSN: ${{ secrets.GLITCHTIP_DSN }}
with:
head_sha: ${{ needs.meta.outputs.head_sha }}
run_id: ${{ needs.meta.outputs.run_id }}
display_title: ${{ needs.meta.outputs.display_title }}
bernstein-ci-fix:
name: Bernstein CI fix
needs: [meta, auto-heal]
# Run only on real CI failures on main from the canonical repo, and only
# when auto-heal did NOT open a heal PR. This serialises the two fixers
# so they no longer race on the same failing SHA (ticket acceptance:
# auto-heal and bernstein-ci-fix call each other via dispatcher when
# one fails instead of both firing in parallel).
if: >-
always() &&
needs.meta.outputs.conclusion == 'failure' &&
needs.meta.outputs.head_branch == 'main' &&
needs.meta.outputs.head_repo == github.repository &&
!startsWith(needs.meta.outputs.display_title, 'auto-heal:') &&
needs.meta.outputs.actor_login != 'bernstein[bot]' &&
needs.meta.outputs.actor_login != 'bernstein-orchestrator[bot]' &&
needs.meta.outputs.actor_login != 'github-actions[bot]' &&
(needs.auto-heal.result == 'skipped' ||
needs.auto-heal.outputs.heal_outcome == '' ||
needs.auto-heal.outputs.heal_outcome == 'skipped_no_jobs' ||
needs.auto-heal.outputs.heal_outcome == 'failed_validation')
# Union of permissions the called workflow's jobs request: it pushes
# auto-heal branches and opens auto-heal PRs (contents:write,
# pull-requests:write), opens fallback ci-fix issues (issues:write),
# and reads upstream CI run + PR metadata (actions:read).
permissions:
contents: write
pull-requests: write
issues: write
actions: read
uses: ./.github/workflows/bernstein-ci-fix.yml
# bernstein-ci-fix uses GEMINI_API_KEY for the LLM triage agent.
# GITHUB_TOKEN is auto-provided. GEMINI_API_KEY is declared
# `required: false` on the called workflow so jobs that need it gate
# on a non-empty value rather than failing the whole reusable call.
secrets:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GLITCHTIP_DSN: ${{ secrets.GLITCHTIP_DSN }}
# OPENROUTER_API_KEY_FREE: ${{ secrets.OPENROUTER_API_KEY_FREE }}
# Re-enable the pass once OPENROUTER_API_KEY_FREE is defined on
# the repo. The Tier-3 shadow-mode job (#1715) gates on
# vars.BERNSTEIN_CI_SELF_DRIVE == 'tier3' (default off) so leaving
# the pass commented while the operator has not defined the secret
# is the documented graceful-degrade posture: Tier-3 records a
# flag_off / skipped outcome and the rest of the dispatcher chain
# continues to run.
with:
head_sha: ${{ needs.meta.outputs.head_sha }}
head_branch: ${{ needs.meta.outputs.head_branch }}
run_id: ${{ needs.meta.outputs.run_id }}
display_title: ${{ needs.meta.outputs.display_title }}
actor_login: ${{ needs.meta.outputs.actor_login }}
bisect-on-red:
name: Bisect on red
needs: meta
if: >-
needs.meta.outputs.conclusion == 'failure' &&
needs.meta.outputs.head_branch == 'main'
# Union of permissions the called workflow's jobs request: it reads
# the failing commit's git history (contents:read) and posts the
# culprit comment on the suspected PR plus opens a bisect-on-red
# issue (pull-requests:write, issues:write).
permissions:
contents: read
pull-requests: write
issues: write
uses: ./.github/workflows/bisect-on-red.yml
# bisect-on-red reads only inputs and GITHUB_TOKEN (auto-provided).
# No repository secrets are forwarded, replacing the prior
# `secrets: inherit` blanket pass.
with:
head_sha: ${{ needs.meta.outputs.head_sha }}
run_id: ${{ needs.meta.outputs.run_id }}
html_url: ${{ needs.meta.outputs.html_url }}