Skip to content

Retry workflow run with errors #102459

Retry workflow run with errors

Retry workflow run with errors #102459

name: Retry workflow run with errors
on:
workflow_dispatch:
inputs:
dry-run:
description: |
If enabled, only checks for errors in the workflow run and does not retry.
default: false
type: boolean
error-messages:
description: |
Error messages to look for in annotations and logs.
The workflow run is only retried if any of the error messages are found.
If this parameter is an empty array ('[]'), the workflow will be retried without checking for specific error messages.
default: '[]'
run-id:
description: Id of the workflow run to retry
required: true
attempt:
description: |
Attempt number of the workflow run to retry.
If specified, the retry is only performed when the provided attempt number
matches the current latest attempt of the workflow run.
This prevents rare cases where a retry for an older attempt is still pending
while a newer attempt has already started, preventing outdated retries from
interfering with the latest attempt.
required: false
owner:
description: Owner of the GitHub repository of the workflow run to retry
required: true
repository:
description: GitHub repository of the workflow run to retry
required: true
notify-back-on-error:
# Calling workflow must implement the `notify_back_error_message` parameter and forward the error.
description: |
If enabled, notifies back (i.e. trigger a new run) with an error message when specified error messages are not found.
default: false
type: boolean
notify-back-git-ref:
description: Git reference of the calling workflow to notifying back
default: main
distinct_id:
description: |
Random distinct Id to easily find the dispatched workflow run
Needed by https://github.com/Codex-/return-dispatch as workaround for https://github.com/cli/cli/issues/4001
required: false
default: "not-set"
rerun-whole-workflow:
description: |
If set to true, the workflow will be re-triggered in its entirety.
This is useful when you want to retry the entire workflow instead of
just failed jobs.
required: false
type: boolean
default: false
jobs:
retry:
name: Retry workflow run
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Echo distinct id '${{ inputs.distinct_id }}' to enable codex-/return-dispatch to find this workflow run
run: |
echo ${{ inputs.distinct_id }}
shell: bash
- name: Checkout
uses: actions/checkout@v6
- name: Import Secrets
id: secrets
uses: hashicorp/vault-action@v4.0.0
with:
url: ${{ secrets.VAULT_ADDR }}
method: approle
roleId: ${{ secrets.VAULT_ROLE_ID }}
secretId: ${{ secrets.VAULT_SECRET_ID }}
secrets: |
secret/data/products/infra/ci/retrigger-gha-workflow RETRIGGER_APP_KEY;
secret/data/products/infra/ci/retrigger-gha-workflow RETRIGGER_APP_ID;
- name: Generate a GitHub Access Token from Github App
id: github-token
uses: actions/create-github-app-token@v3
with:
app-id: ${{ steps.secrets.outputs.RETRIGGER_APP_ID }}
private-key: ${{ steps.secrets.outputs.RETRIGGER_APP_KEY }}
owner: ${{ inputs.owner }}
repositories: ${{ inputs.repository }}
- name: Wait for workflow run-id=${{ inputs.run-id }} to complete, repo=${{ inputs.owner }}/${{ inputs.repository }}
env:
ATTEMPT: ${{ inputs.attempt }}
GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }}
GH_TOKEN: ${{ steps.github-token.outputs.token }}
RUN_ID: ${{ inputs.run-id }}
run: |
# Wait until the workflow run is completed
echo "Waiting for the workflow run to complete ..."
ATTEMPT_GH_CLI_FLAG="${ATTEMPT:+--attempt $ATTEMPT}"
while true; do
# shellcheck disable=SC2086
status=$(gh run view "${RUN_ID}" ${ATTEMPT_GH_CLI_FLAG} --json status --jq '.status')
echo "Current status: ${status}"
if [ "${status}" = "completed" ]; then
break
fi
sleep 3
done
shell: bash
- name: Check annotations and job logs for errors
id: errors
if: inputs.error-messages != '[]'
uses: ./rerun-failed-run/check-annotations-and-logs
with:
error-messages: ${{ inputs.error-messages }}
github-token: ${{ steps.github-token.outputs.token }}
owner: ${{ inputs.owner }}
repository: ${{ inputs.repository }}
run-id: ${{ inputs.run-id }}
attempt: ${{ inputs.attempt }}
- name: Retry workflow run for repo=${{ inputs.owner }}/${{ inputs.repository }} and run-id=${{ inputs.run-id }}
if: steps.errors.outputs.found == 'true' || inputs.error-messages == '[]'
env:
ATTEMPT: ${{ inputs.attempt }}
DRY_RUN: ${{ inputs.dry-run }}
GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }}
GH_TOKEN: ${{ steps.github-token.outputs.token }}
RUN_ID: ${{ inputs.run-id }}
RERUN_WHOLE_WORKFLOW: ${{ inputs.rerun-whole-workflow }}
run: |
# trigger a new run retrying failed jobs or the whole workflow
last_attempt=$(gh run view "${RUN_ID}" --json attempt --jq '.attempt')
if [ "${DRY_RUN}" = "true" ]; then
echo "Skipping retry since this is a dry-run ..."
elif [ -n "${ATTEMPT}" ] && [ "${last_attempt}" -ne "$ATTEMPT" ]; then
echo "The specified attempt number to retry '$ATTEMPT' does not match the latest attempt number ${last_attempt}, meaning the workflow has been retried already for this attempt. Aborting ..."
else
echo "Triggering a new attempt of run-id=${RUN_ID} ..."
if [ "${RERUN_WHOLE_WORKFLOW}" = "true" ]; then
echo "Rerunning the whole workflow ..."
gh run rerun "${RUN_ID}"
else
echo "Rerunning only failed jobs ..."
gh run rerun "${RUN_ID}" --failed
fi
fi
shell: bash
- name: Notify back error for repo=${{ inputs.owner }}/${{ inputs.repository }} and run-id=${{ inputs.run-id }}
if: >
steps.errors.outputs.found == 'false' &&
inputs.notify-back-on-error == true
env:
DRY_RUN: ${{ inputs.dry-run }}
GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }}
GH_TOKEN: ${{ steps.github-token.outputs.token }}
NOTIFY_BACK_ERROR_MESSAGE: >
This workflow attempted a retry but there was no match on the log message, so it was not retried.
Please check: ${{ github.server_url }}/${{ inputs.owner }}/${{ inputs.repository }}/actions/runs/${{ inputs.run-id }}
RUN_ID: ${{ inputs.run-id }}
run: |
# trigger a new run with error notification
path=$(gh api "/repos/${GH_REPO}/actions/runs/${RUN_ID}" \
--jq '.path'
)
workflow_name=$(basename "$path")
if [ "${DRY_RUN}" = "true" ]; then
echo "Skipping notify back since this is a dry-run ..."
else
echo "notifying back to workflow=${workflow_name} ..."
gh workflow run "${workflow_name}" \
--field notify_back_error_message="${NOTIFY_BACK_ERROR_MESSAGE}" \
--ref="${{ inputs.notify-back-git-ref }}"
fi
shell: bash