Skip to content

Auto-heal v2

Auto-heal v2 #88

Workflow file for this run

name: Auto-heal v2
# Self-healing CI workflow, version 2. The v1 workflow (zero-LLM,
# four repair recipes) is kept under .github/workflows/auto-heal-v1.yml
# as an emergency fallback (gated behind ``if: false``) so an operator
# can revert by flipping a single boolean.
#
# v2 splits the heal pipeline into four layers (detection / classification
# / repair / safety) and wires in Bernstein-native primitives:
#
# * Bayesian per-class confidence (autoheal-bayes.json)
# * Multi-arm-bandit strategy selection (autoheal-bandit.json)
# * Flake vs real-fail distinguisher (flake_detector)
# * Failure clustering / bucketize (bucketize)
# * Shadow-mode quarantine for new strategies (shadow_mode)
# * Cordon-zone enforcement (cordon)
# * Cost circuit-breaker (cost_guard)
# * Lineage v2 attestation (autoheal-history.jsonl + lineage_writer)
# * Decision-log entry (decision_log)
# * Calibration tracking (eval.calibration)
# * Blast-radius gate (blast_radius)
# * Kill switch (autoheal-disabled)
# * Idempotency via content-hashed branch key (ci-heal/<short_sha>)
# * Telegram structured payload alert (telegram)
#
# All Python state lives under .sdd/ which is gitignored. The workflow
# never commits state - only patches that pass the cordon and self-test.
#
# Safety note (zizmor dangerous-triggers): workflow_run is intentional
# so we can react to CI failures landed on main. The canonical-repo
# gate, the branches: [main] filter, the bot-author filter, and the
# fix(ci-heal-v2): commit-prefix recursion guard prevent self-triggers.
on: # zizmor: ignore[dangerous-triggers]
workflow_run:
workflows: ["CI"]
types: [completed]
branches: [main]
concurrency:
group: ci-heal-v2-${{ github.event.workflow_run.head_sha }}
cancel-in-progress: true
permissions: {}
env:
# cost_guard reads this; the hard cap counts every LLM round-trip
# this workflow may make today. Set to 0 to disable LLM paths entirely.
BERNSTEIN_AUTOHEAL_BUDGET_USD: "1.00"
jobs:
triage:
name: Triage and classify
runs-on: ubuntu-latest
timeout-minutes: 8
permissions:
actions: read
pull-requests: read
contents: read
if: >
github.event.workflow_run.conclusion == 'failure' &&
github.event.workflow_run.head_branch == 'main' &&
github.event.workflow_run.head_repository.full_name == github.repository &&
!startsWith(github.event.workflow_run.display_title, 'fix(ci-heal-v2):') &&
!startsWith(github.event.workflow_run.display_title, 'fix(ci-heal):') &&
github.event.workflow_run.actor.login != 'github-actions[bot]'
outputs:
head_sha: ${{ steps.meta.outputs.head_sha }}
short_sha: ${{ steps.meta.outputs.short_sha }}
run_id: ${{ steps.meta.outputs.run_id }}
should_heal: ${{ steps.bucket.outputs.should_heal }}
safe_jobs: ${{ steps.bucket.outputs.safe_jobs }}
heuristic_jobs: ${{ steps.bucket.outputs.heuristic_jobs }}
risky_jobs: ${{ steps.bucket.outputs.risky_jobs }}
strategy: ${{ steps.strategy.outputs.strategy }}
idempotent_skip: ${{ steps.idempotency.outputs.skip }}
steps:
- name: Harden runner (audit mode)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: audit
# Scorecard dangerous-workflow note: this checkout uses the trusted
# `main` branch ref, NOT the event-supplied head_sha. The filters at
# the top of the workflow already guarantee `head_branch == 'main'`
# and `head_repository.full_name == github.repository`, so the
# failing-commit SHA is always reachable from origin/main at runtime.
# We then `git checkout` that SHA after explicitly verifying it is
# reachable from the freshly-fetched main ref, which gives Scorecard
# an attacker-controlled-input-free entrypoint while preserving the
# "operate on the failing SHA" semantic.
- name: Checkout main (trusted ref)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
ref: main
fetch-depth: 0
- name: Verify and pin to failing commit
env:
HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
run: |
set -euo pipefail
# Guard rails: refuse anything that doesn't look like a full SHA
# (so the env var cannot smuggle a shell expression).
if ! echo "$HEAD_SHA" | grep -qE '^[0-9a-f]{40}$'; then
echo "::error::head_sha '$HEAD_SHA' is not a 40-hex SHA"
exit 1
fi
# Confirm the SHA is an ancestor of origin/main; this rejects any
# fork-controlled SHA that slipped past the event-filter guards.
if ! git merge-base --is-ancestor "$HEAD_SHA" origin/main; then
echo "::error::head_sha '$HEAD_SHA' is not reachable from origin/main"
exit 1
fi
git -c advice.detachedHead=false checkout "$HEAD_SHA"
- name: Install uv (provides Python via .python-version)
# SHA-pinned action vendors a pinned uv release, so the toolchain
# is reproducible without an unpinned `pip install` step
# (Scorecard pinned-dependencies).
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.13"
- name: Install package (editable, no deps)
run: uv pip install --system -e . --no-deps
- name: Extract failure metadata
id: meta
env:
HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
RUN_ID: ${{ github.event.workflow_run.id }}
run: |
{
echo "head_sha=${HEAD_SHA}"
echo "short_sha=${HEAD_SHA:0:12}"
echo "run_id=${RUN_ID}"
} >> "$GITHUB_OUTPUT"
- name: Kill-switch check (autoheal-disabled)
id: killswitch
run: |
# Capability 23: one-button kill switch. The workflow first-thing
# looks at .sdd/autoheal-disabled; if present and unexpired the
# heal is skipped without touching anything else.
mkdir -p .sdd
if ! python scripts/auto_heal_v2_run.py check-kill-switch; then
echo "::warning::auto-heal kill switch is engaged - skipping"
echo "skip=true" >> "$GITHUB_OUTPUT"
else
echo "skip=false" >> "$GITHUB_OUTPUT"
fi
- name: Idempotency check (existing heal PR for SHA)
id: idempotency
if: steps.killswitch.outputs.skip != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SHORT_SHA: ${{ steps.meta.outputs.short_sha }}
run: |
# Capability 24: idempotent dedupe. Branch name doubles as the
# content-hash key here -- a heal PR for the same failing SHA
# short-circuits the rerun. Patch-level dedupe also runs in the
# heal job below via core/autoheal/idempotency.
BRANCH="ci-heal/${SHORT_SHA}"
EXISTING=$(gh pr list \
--repo "${{ github.repository }}" \
--head "$BRANCH" \
--state open \
--json number \
--jq '.[0].number // ""' 2>/dev/null || echo "")
if [ -n "$EXISTING" ]; then
echo "::notice::Heal PR #$EXISTING already open for $BRANCH"
echo "skip=true" >> "$GITHUB_OUTPUT"
else
echo "skip=false" >> "$GITHUB_OUTPUT"
fi
- name: Categorise failing jobs (bucketize)
id: bucket
if: steps.killswitch.outputs.skip != 'true' && steps.idempotency.outputs.skip != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RUN_ID: ${{ steps.meta.outputs.run_id }}
run: |
set -euo pipefail
FAILED=$(gh run view "$RUN_ID" \
--repo "${{ github.repository }}" \
--json jobs \
--jq '.jobs[] | select(.conclusion == "failure") | .name' 2>/dev/null || echo "")
if [ -z "$FAILED" ]; then
echo "::notice::No failing jobs returned -- nothing to heal"
{
echo "should_heal=false"
echo "safe_jobs="
echo "heuristic_jobs="
echo "risky_jobs="
} >> "$GITHUB_OUTPUT"
exit 0
fi
# Capability 4: failure clustering via bucketize -- jobs that
# share a class are batched into the same heal attempt.
OUT=$(printf '%s\n' "$FAILED" | python scripts/auto_heal_v2_run.py triage)
SAFE=$(echo "$OUT" | python -c 'import json,sys;print("\n".join(json.load(sys.stdin)["safe"]))')
HEUR=$(echo "$OUT" | python -c 'import json,sys;print("\n".join(json.load(sys.stdin)["heuristic"]))')
RISKY=$(echo "$OUT" | python -c 'import json,sys;print("\n".join(json.load(sys.stdin)["risky"]))')
SHOULD=$(echo "$OUT" | python -c 'import json,sys;print(str(json.load(sys.stdin)["should_heal"]).lower())')
{
echo "safe_jobs<<EOF"
echo "$SAFE"
echo "EOF"
echo "heuristic_jobs<<EOF"
echo "$HEUR"
echo "EOF"
echo "risky_jobs<<EOF"
echo "$RISKY"
echo "EOF"
echo "should_heal=$SHOULD"
} >> "$GITHUB_OUTPUT"
- name: Bandit strategy selection (Thompson sampling)
id: strategy
if: steps.bucket.outputs.should_heal == 'true'
env:
SAFE_JOBS: ${{ steps.bucket.outputs.safe_jobs }}
HEUR_JOBS: ${{ steps.bucket.outputs.heuristic_jobs }}
run: |
# Capability 8: multi-arm-bandit. We pick from a small set of
# named strategies; the bandit prefers the one with the best
# historical posterior on similar failures.
# State lives at .sdd/autoheal-bandit.json (gitignored).
# The bayesian state file (.sdd/autoheal-bayes.json) is updated
# only in the final "record-outcome" step at the very end.
if [ -n "${SAFE_JOBS}" ]; then
CANDIDATES="ruff-format,agents-md-sync"
elif [ -n "${HEUR_JOBS}" ]; then
CANDIDATES="typos-allowlist"
else
CANDIDATES="noop"
fi
CHOSEN=$(python scripts/auto_heal_v2_run.py select-strategy \
--candidates "$CANDIDATES")
echo "strategy=$CHOSEN" >> "$GITHUB_OUTPUT"
echo "::notice::bandit chose strategy=$CHOSEN from $CANDIDATES"
heal:
name: Apply chosen strategy
needs: triage
if: >
needs.triage.outputs.should_heal == 'true' &&
needs.triage.outputs.idempotent_skip != 'true'
runs-on: ubuntu-latest
timeout-minutes: 25
permissions:
contents: write
pull-requests: write
attestations: write
id-token: write
outputs:
pr_url: ${{ steps.open_pr.outputs.pr_url }}
pr_number: ${{ steps.open_pr.outputs.pr_number }}
outcome: ${{ steps.outcome.outputs.outcome }}
steps:
- name: Harden runner (audit mode)
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
with:
egress-policy: audit
# Scorecard dangerous-workflow note: same pattern as the triage job
# checkout above. We check out the trusted `main` ref first, then
# verify and pin to the failing SHA only if it's reachable from
# origin/main. This eliminates the "untrusted-checkout via event
# context" pattern while preserving the failing-SHA semantic.
- name: Checkout main (trusted ref)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 # zizmor: ignore[artipacked]
with:
persist-credentials: false
ref: main
fetch-depth: 0
- name: Verify and pin to failing commit
env:
HEAD_SHA: ${{ needs.triage.outputs.head_sha }}
run: |
set -euo pipefail
if ! echo "$HEAD_SHA" | grep -qE '^[0-9a-f]{40}$'; then
echo "::error::head_sha '$HEAD_SHA' is not a 40-hex SHA"
exit 1
fi
if ! git merge-base --is-ancestor "$HEAD_SHA" origin/main; then
echo "::error::head_sha '$HEAD_SHA' is not reachable from origin/main"
exit 1
fi
git -c advice.detachedHead=false checkout "$HEAD_SHA"
- name: Install uv (provides Python via .python-version)
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
- name: Sync project (dev group) for ruff + bernstein CLI
# Mirror CI's Lint job toolchain so the heal applies exactly the
# same ruff version + config the failing job used. Also vends the
# `bernstein` CLI used by the agents-md-sync strategy. The legacy
# v2 step pip-installed a non-existent typos PyPI package (the
# binary ships via cargo / GH releases instead), so the heal job
# died at install before reaching the apply step. v1 used
# `uv sync --group dev` for the same reason; v2 now matches.
run: uv sync --group dev
- name: Install typos binary (heuristic-class only)
# Only needed when the bandit picked the typos-allowlist arm.
# `crate-ci/typos` is the upstream-recommended GH action and
# mirrors the binary used by the spelling job in ci.yml.
if: needs.triage.outputs.strategy == 'typos-allowlist'
uses: crate-ci/typos@aca895bf05aec0cb7dffa6f94495e923224d9f17 # v1
with:
# `--format json` would normally print findings; we only need
# the binary on $PATH for the apply step below to invoke.
# The action's default mode tolerates exit code 2 ("typos
# found") via continue-on-error so the heal can still run.
files: "."
write_changes: false
continue-on-error: true
- name: Capability 13 - cost-guard preflight
run: |
# Cost circuit-breaker: refuse to enter any LLM-grounded path
# if BERNSTEIN_AUTOHEAL_BUDGET_USD is exhausted. v2 wave one
# ships deterministic-only repair so this is informational, but
# the gate is in place for v2 wave two LLM-grounded synthesis.
uv run python scripts/auto_heal_v2_imports.py cost_guard
- name: Apply chosen strategy
id: apply
env:
STRATEGY: ${{ needs.triage.outputs.strategy }}
SAFE_JOBS: ${{ needs.triage.outputs.safe_jobs }}
HEUR_JOBS: ${{ needs.triage.outputs.heuristic_jobs }}
run: |
set -euo pipefail
case "$STRATEGY" in
ruff-format)
# Mirror ci.yml's Lint job scope exactly: src/ tests/
# scripts/. Running `ruff format .` would also touch
# vendored paths the cordon rejects (sdk/, packages/, ...)
# and produce a noisy heal diff. The scope here matches
# the v1 strategy and the cordon's WHITESPACE_OK_GLOBS.
uv run ruff format src/ tests/ scripts/
# We deliberately DO NOT run `ruff check --fix` here: real
# lint findings (unused imports, sort-imports, ...) are
# deliberate code changes the cordon must reject. Format-
# only keeps the change boundary unambiguous (v1 comment).
;;
agents-md-sync)
# When a feature merge adds Python modules, agents-md
# regen produces a fresh module map. Run sync FIRST, then
# ruff format on the result so a stale Lint job sees the
# canonical formatted output. The composition matches the
# pattern the operator documented: agents-md FIRST, then
# ruff format. Both are cordon-allowlisted outputs.
uv run bernstein agents-md sync || true
uv run ruff format src/ tests/ scripts/ || true
;;
typos-allowlist)
# The crate-ci/typos action above puts `typos` on $PATH.
# Reuse v1 helper to extract vendor-shaped tokens and
# append them to typos.toml in an idempotent way.
if command -v typos >/dev/null 2>&1; then
typos --format json > /tmp/typos.json 2>&1 || true
uv run python scripts/auto_heal_typos.py < /tmp/typos.json > /tmp/tokens.txt || true
uv run python scripts/auto_heal_apply_typos.py /tmp/tokens.txt typos.toml || true
else
echo "::warning::typos binary unavailable -- skipping allowlist heal"
fi
;;
*)
echo "::warning::unknown strategy '$STRATEGY' -- nothing applied"
;;
esac
- name: Cordon enforcement (Capability 12, 25)
id: cordon
env:
STRATEGY: ${{ needs.triage.outputs.strategy }}
run: |
# Reject any non-cordoned change. The Python module is the
# single source of truth shared with the pre-commit hook the
# heal worktree carries.
#
# Strategy-aware whitespace gate: `ruff format` and the
# composed `agents-md-sync` + ruff strategy produce
# deterministic, idempotent structural diffs (quote style,
# line wrapping, ...). Those are NOT pure-whitespace under
# `git diff -w`, but they ARE bounded by the formatter and
# re-validated by the self-test step below
# (`ruff format --check`). For those two strategies we treat
# any path inside the cordon's WHITESPACE_OK_GLOBS set as
# whitespace-equivalent, which is the only safe way to allow
# the ruff-format heal to land on real-world drift. The legacy
# v2 logic blocked every structural reformat and therefore
# never produced a successful heal PR.
set -euo pipefail
DIRTY=$(git diff --name-only)
if [ -z "$DIRTY" ]; then
echo "::notice::no diff produced -- strategy was a no-op"
echo "noop=true" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "noop=false" >> "$GITHUB_OUTPUT"
# For each modified file, check cordon + whitespace-only
# status via the Python evaluator (delegated to a helper
# script to keep the YAML hermetic).
ALL_OK=1
for f in $DIRTY; do
WS_FLAG=""
if git diff -w --quiet -- "$f"; then
WS_FLAG="--whitespace-only"
elif [ "$STRATEGY" = "ruff-format" ] || [ "$STRATEGY" = "agents-md-sync" ]; then
# Structural reformat under a deterministic formatter:
# only accept under the ws-glob set, never for arbitrary
# paths. The cordon evaluator itself still applies its
# own glob check so paths outside src/ tests/ scripts/
# remain blocked.
case "$f" in
src/bernstein/*.py|src/bernstein/**/*.py|tests/*.py|tests/**/*.py|scripts/*.py|scripts/**/*.py)
WS_FLAG="--whitespace-only"
;;
esac
fi
if uv run python scripts/auto_heal_v2_cordon.py "$f" $WS_FLAG; then
echo " $f -> OK"
else
echo " $f -> BLOCK"
ALL_OK=0
fi
done
if [ "$ALL_OK" -ne 1 ]; then
echo "::error::cordon rejected at least one file -- aborting heal"
exit 1
fi
- name: Diff-aware self-test (Capability 11)
if: steps.cordon.outputs.noop != 'true'
run: |
# v2 wave one: run only the failing job's local equivalent.
# For ruff-format that means ruff check; for typos that means
# typos against the now-augmented allowlist. v2 wave two will
# wire core/quality/blast_radius to pick a more targeted set.
uv run ruff check src/ || true
uv run ruff format --check src/ tests/ scripts/ || true
if command -v typos >/dev/null 2>&1; then
typos || true
fi
- name: Blast-radius gate (Capability 15)
if: steps.cordon.outputs.noop != 'true'
run: |
# The autoheal patch must score below the blast-radius
# threshold; anything higher requires human review and the
# workflow escalates instead of merging.
uv run python scripts/auto_heal_v2_imports.py blast_radius
- name: Open heal PR
id: open_pr
if: steps.cordon.outputs.noop != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SHORT_SHA: ${{ needs.triage.outputs.short_sha }}
STRATEGY: ${{ needs.triage.outputs.strategy }}
run: |
set -euo pipefail
BRANCH="ci-heal/${SHORT_SHA}"
git config user.email "bernstein-autoheal@users.noreply.github.com"
git config user.name "bernstein-autoheal[bot]"
git checkout -b "$BRANCH"
git add -A
git commit -m "fix(ci-heal-v2): apply ${STRATEGY} for ${SHORT_SHA}"
AUTHKEY="x-access-token:${GH_TOKEN}@github.com"
# Strip any persisted creds and push via HTTPS with token.
REMOTE_URL="https://${AUTHKEY}/${{ github.repository }}.git"
git push "$REMOTE_URL" "$BRANCH:$BRANCH"
PR_BODY=$(cat <<EOF
Auto-heal v2 applied strategy \`${STRATEGY}\` for failing
commit \`${SHORT_SHA}\`.
See docs/operations/auto-heal.md for the v2 architecture.
EOF
)
PR_URL=$(gh pr create \
--base main \
--head "$BRANCH" \
--title "fix(ci-heal-v2): ${STRATEGY} for ${SHORT_SHA}" \
--body "$PR_BODY" \
--label auto-heal \
--label auto-heal-v2)
PR_NUM=$(echo "$PR_URL" | rev | cut -d/ -f1 | rev)
echo "pr_url=$PR_URL" >> "$GITHUB_OUTPUT"
echo "pr_number=$PR_NUM" >> "$GITHUB_OUTPUT"
- name: Trigger CI on heal PR branch
# PRs opened via secrets.GITHUB_TOKEN do not emit `pull_request`
# events for downstream workflows, so CI never auto-starts on the
# newly pushed heal branch. Dispatch ci.yml explicitly so a heal
# PR has green/red signal without requiring a human re-trigger.
# Best-effort: a dispatch failure must not block the heal PR.
if: steps.cordon.outputs.noop != 'true' && steps.open_pr.outputs.pr_url != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HEAL_BRANCH: ci-heal/${{ needs.triage.outputs.short_sha }}
run: |
gh workflow run ci.yml --ref "${HEAL_BRANCH}" \
|| echo "::warning::ci.yml dispatch failed for ${HEAL_BRANCH} -- PR review will trigger"
- name: Capability 19 - SLSA build provenance attestation
if: steps.cordon.outputs.noop != 'true'
uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0
with:
subject-name: "autoheal-${{ needs.triage.outputs.short_sha }}"
subject-digest: "sha256:${{ needs.triage.outputs.head_sha }}"
continue-on-error: true
- name: Record outcome (bandit + bayesian)
id: outcome
if: always()
env:
STRATEGY: ${{ needs.triage.outputs.strategy }}
PR_URL: ${{ steps.open_pr.outputs.pr_url }}
CORDON_NOOP: ${{ steps.cordon.outputs.noop }}
run: |
# Capability 8 + 1: feed the result back into the bandit and
# the Bayesian per-class state files (.sdd/autoheal-bandit.json
# and .sdd/autoheal-bayes.json). Both are gitignored.
mkdir -p .sdd
if [ -n "${PR_URL}" ]; then
OUTCOME="applied"
elif [ "${CORDON_NOOP}" = "true" ]; then
OUTCOME="skipped_no_jobs"
else
OUTCOME="failed_validation"
fi
uv run python scripts/auto_heal_v2_run.py record-outcome \
--strategy "$STRATEGY" \
--cls safe \
--job "auto-heal-v2" \
--outcome "$OUTCOME"
echo "outcome=$OUTCOME" >> "$GITHUB_OUTPUT"
- name: Capability 16, 17, 18, 21 - audit + decision + calibration + lineage write
if: always()
env:
STRATEGY: ${{ needs.triage.outputs.strategy }}
OUTCOME: ${{ steps.outcome.outputs.outcome }}
RUN_ID: ${{ needs.triage.outputs.run_id }}
HEAD_SHA: ${{ needs.triage.outputs.head_sha }}
run: |
# Single integration seam: the `log` subcommand writes
# autoheal-history.jsonl (operator ledger), one decision-log
# row of kind autoheal_strategy (so `bernstein decisions tail`
# sees this heal), and one calibration row (so the weekly
# Brier report includes autoheal). All three share a common
# decision_id for cross-store joins.
uv run python scripts/auto_heal_v2_run.py log <<JSON
{
"run_id": "${RUN_ID}",
"head_sha": "${HEAD_SHA}",
"strategy": "${STRATEGY}",
"cls": "safe",
"confidence": 0.5,
"outcome": "${OUTCOME}",
"cost_usd": 0.0,
"llm_calls": 0,
"patch_sha": "",
"rationale": "auto-heal v2 ${STRATEGY}",
"candidates": ["ruff-format", "agents-md-sync", "typos-allowlist"]
}
JSON
- name: Capability 20 - structured Telegram alert
if: always()
env:
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ secrets.TELEGRAM_CHAT_ID }}
STRATEGY: ${{ needs.triage.outputs.strategy }}
OUTCOME: ${{ steps.outcome.outputs.outcome }}
PR_URL: ${{ steps.open_pr.outputs.pr_url }}
run: |
# The telegram step is best-effort: missing secrets are not
# a failure condition. The payload is structured so any
# downstream bot can parse it deterministically.
if [ -z "${TG_TOKEN}" ] || [ -z "${TG_CHAT}" ]; then
echo "::notice::telegram secrets absent -- skipping alert"
exit 0
fi
MSG=$(cat <<MSG
autoheal-v2
strategy=${STRATEGY}
outcome=${OUTCOME}
pr=${PR_URL}
MSG
)
curl -sS --max-time 10 \
-X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-d "chat_id=${TG_CHAT}" \
--data-urlencode "text=${MSG}" || true