diff --git a/.github/workflows/auto-rerun-allocation-failure.yml b/.github/workflows/auto-rerun-allocation-failure.yml new file mode 100644 index 0000000..2d78524 --- /dev/null +++ b/.github/workflows/auto-rerun-allocation-failure.yml @@ -0,0 +1,52 @@ +# Reruns Daily Trading Pipeline when GitHub's hosted runner pool fails to +# acquire a runner (annotation: "not acquired by Runner of type hosted..."). +# Other failure types are left alone. Guarded to first attempt only to +# prevent rerun loops. +name: Auto-rerun on Runner Allocation Failure + +on: + workflow_run: + workflows: ["Daily Trading Pipeline"] + types: [completed] + +permissions: + actions: write + +jobs: + rerun: + if: ${{ github.event.workflow_run.conclusion == 'failure' && github.event.workflow_run.run_attempt == 1 }} + runs-on: ubuntu-latest + steps: + - name: Detect runner allocation failure + id: detect + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + RUN_ID: ${{ github.event.workflow_run.id }} + run: | + set -euo pipefail + allocation_failure=false + job_ids=$(gh api "/repos/$REPO/actions/runs/$RUN_ID/jobs" --jq '.jobs[].id') + for job_id in $job_ids; do + annotations=$(gh api "/repos/$REPO/check-runs/$job_id/annotations" 2>/dev/null || echo '[]') + if echo "$annotations" | jq -e 'any(.[]; .message | test("not acquired by Runner"))' >/dev/null; then + allocation_failure=true + echo "Runner allocation failure detected on job $job_id" + break + fi + done + echo "allocation_failure=$allocation_failure" >> "$GITHUB_OUTPUT" + + - name: Rerun failed jobs + if: steps.detect.outputs.allocation_failure == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + RUN_ID: ${{ github.event.workflow_run.id }} + run: | + gh run rerun --repo "$REPO" "$RUN_ID" --failed + echo "Triggered rerun of run $RUN_ID" + + - name: Skip (not a runner allocation failure) + if: steps.detect.outputs.allocation_failure != 'true' + run: echo "Failure not caused by runner allocation; no rerun triggered."