Retry workflow run with errors #102459
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Retry workflow run with errors | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| dry-run: | |
| description: | | |
| If enabled, only checks for errors in the workflow run and does not retry. | |
| default: false | |
| type: boolean | |
| error-messages: | |
| description: | | |
| Error messages to look for in annotations and logs. | |
| The workflow run is only retried if any of the error messages are found. | |
| If this parameter is an empty array ('[]'), the workflow will be retried without checking for specific error messages. | |
| default: '[]' | |
| run-id: | |
| description: Id of the workflow run to retry | |
| required: true | |
| attempt: | |
| description: | | |
| Attempt number of the workflow run to retry. | |
| If specified, the retry is only performed when the provided attempt number | |
| matches the current latest attempt of the workflow run. | |
| This prevents rare cases where a retry for an older attempt is still pending | |
| while a newer attempt has already started, preventing outdated retries from | |
| interfering with the latest attempt. | |
| required: false | |
| owner: | |
| description: Owner of the GitHub repository of the workflow run to retry | |
| required: true | |
| repository: | |
| description: GitHub repository of the workflow run to retry | |
| required: true | |
| notify-back-on-error: | |
| # Calling workflow must implement the `notify_back_error_message` parameter and forward the error. | |
| description: | | |
| If enabled, notifies back (i.e. trigger a new run) with an error message when specified error messages are not found. | |
| default: false | |
| type: boolean | |
| notify-back-git-ref: | |
| description: Git reference of the calling workflow to notifying back | |
| default: main | |
| distinct_id: | |
| description: | | |
| Random distinct Id to easily find the dispatched workflow run | |
| Needed by https://github.com/Codex-/return-dispatch as workaround for https://github.com/cli/cli/issues/4001 | |
| required: false | |
| default: "not-set" | |
| rerun-whole-workflow: | |
| description: | | |
| If set to true, the workflow will be re-triggered in its entirety. | |
| This is useful when you want to retry the entire workflow instead of | |
| just failed jobs. | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| retry: | |
| name: Retry workflow run | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| steps: | |
| - name: Echo distinct id '${{ inputs.distinct_id }}' to enable codex-/return-dispatch to find this workflow run | |
| run: | | |
| echo ${{ inputs.distinct_id }} | |
| shell: bash | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| - name: Import Secrets | |
| id: secrets | |
| uses: hashicorp/vault-action@v4.0.0 | |
| with: | |
| url: ${{ secrets.VAULT_ADDR }} | |
| method: approle | |
| roleId: ${{ secrets.VAULT_ROLE_ID }} | |
| secretId: ${{ secrets.VAULT_SECRET_ID }} | |
| secrets: | | |
| secret/data/products/infra/ci/retrigger-gha-workflow RETRIGGER_APP_KEY; | |
| secret/data/products/infra/ci/retrigger-gha-workflow RETRIGGER_APP_ID; | |
| - name: Generate a GitHub Access Token from Github App | |
| id: github-token | |
| uses: actions/create-github-app-token@v3 | |
| with: | |
| app-id: ${{ steps.secrets.outputs.RETRIGGER_APP_ID }} | |
| private-key: ${{ steps.secrets.outputs.RETRIGGER_APP_KEY }} | |
| owner: ${{ inputs.owner }} | |
| repositories: ${{ inputs.repository }} | |
| - name: Wait for workflow run-id=${{ inputs.run-id }} to complete, repo=${{ inputs.owner }}/${{ inputs.repository }} | |
| env: | |
| ATTEMPT: ${{ inputs.attempt }} | |
| GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }} | |
| GH_TOKEN: ${{ steps.github-token.outputs.token }} | |
| RUN_ID: ${{ inputs.run-id }} | |
| run: | | |
| # Wait until the workflow run is completed | |
| echo "Waiting for the workflow run to complete ..." | |
| ATTEMPT_GH_CLI_FLAG="${ATTEMPT:+--attempt $ATTEMPT}" | |
| while true; do | |
| # shellcheck disable=SC2086 | |
| status=$(gh run view "${RUN_ID}" ${ATTEMPT_GH_CLI_FLAG} --json status --jq '.status') | |
| echo "Current status: ${status}" | |
| if [ "${status}" = "completed" ]; then | |
| break | |
| fi | |
| sleep 3 | |
| done | |
| shell: bash | |
| - name: Check annotations and job logs for errors | |
| id: errors | |
| if: inputs.error-messages != '[]' | |
| uses: ./rerun-failed-run/check-annotations-and-logs | |
| with: | |
| error-messages: ${{ inputs.error-messages }} | |
| github-token: ${{ steps.github-token.outputs.token }} | |
| owner: ${{ inputs.owner }} | |
| repository: ${{ inputs.repository }} | |
| run-id: ${{ inputs.run-id }} | |
| attempt: ${{ inputs.attempt }} | |
| - name: Retry workflow run for repo=${{ inputs.owner }}/${{ inputs.repository }} and run-id=${{ inputs.run-id }} | |
| if: steps.errors.outputs.found == 'true' || inputs.error-messages == '[]' | |
| env: | |
| ATTEMPT: ${{ inputs.attempt }} | |
| DRY_RUN: ${{ inputs.dry-run }} | |
| GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }} | |
| GH_TOKEN: ${{ steps.github-token.outputs.token }} | |
| RUN_ID: ${{ inputs.run-id }} | |
| RERUN_WHOLE_WORKFLOW: ${{ inputs.rerun-whole-workflow }} | |
| run: | | |
| # trigger a new run retrying failed jobs or the whole workflow | |
| last_attempt=$(gh run view "${RUN_ID}" --json attempt --jq '.attempt') | |
| if [ "${DRY_RUN}" = "true" ]; then | |
| echo "Skipping retry since this is a dry-run ..." | |
| elif [ -n "${ATTEMPT}" ] && [ "${last_attempt}" -ne "$ATTEMPT" ]; then | |
| echo "The specified attempt number to retry '$ATTEMPT' does not match the latest attempt number ${last_attempt}, meaning the workflow has been retried already for this attempt. Aborting ..." | |
| else | |
| echo "Triggering a new attempt of run-id=${RUN_ID} ..." | |
| if [ "${RERUN_WHOLE_WORKFLOW}" = "true" ]; then | |
| echo "Rerunning the whole workflow ..." | |
| gh run rerun "${RUN_ID}" | |
| else | |
| echo "Rerunning only failed jobs ..." | |
| gh run rerun "${RUN_ID}" --failed | |
| fi | |
| fi | |
| shell: bash | |
| - name: Notify back error for repo=${{ inputs.owner }}/${{ inputs.repository }} and run-id=${{ inputs.run-id }} | |
| if: > | |
| steps.errors.outputs.found == 'false' && | |
| inputs.notify-back-on-error == true | |
| env: | |
| DRY_RUN: ${{ inputs.dry-run }} | |
| GH_REPO: ${{ inputs.owner }}/${{ inputs.repository }} | |
| GH_TOKEN: ${{ steps.github-token.outputs.token }} | |
| NOTIFY_BACK_ERROR_MESSAGE: > | |
| This workflow attempted a retry but there was no match on the log message, so it was not retried. | |
| Please check: ${{ github.server_url }}/${{ inputs.owner }}/${{ inputs.repository }}/actions/runs/${{ inputs.run-id }} | |
| RUN_ID: ${{ inputs.run-id }} | |
| run: | | |
| # trigger a new run with error notification | |
| path=$(gh api "/repos/${GH_REPO}/actions/runs/${RUN_ID}" \ | |
| --jq '.path' | |
| ) | |
| workflow_name=$(basename "$path") | |
| if [ "${DRY_RUN}" = "true" ]; then | |
| echo "Skipping notify back since this is a dry-run ..." | |
| else | |
| echo "notifying back to workflow=${workflow_name} ..." | |
| gh workflow run "${workflow_name}" \ | |
| --field notify_back_error_message="${NOTIFY_BACK_ERROR_MESSAGE}" \ | |
| --ref="${{ inputs.notify-back-git-ref }}" | |
| fi | |
| shell: bash |