AMD CI Job Monitor #5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: AMD CI Job Monitor | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' # Daily at midnight UTC | |
| pull_request: | |
| paths: | |
| - '.github/workflows/amd-ci-job-monitor.yml' | |
| - 'scripts/ci/utils/query_job_status.py' | |
| workflow_dispatch: | |
| inputs: | |
| hours: | |
| description: 'Time window in hours' | |
| required: false | |
| default: '24' | |
| type: string | |
| job_filter: | |
| description: 'Job name filter (leave empty for all AMD jobs)' | |
| required: false | |
| type: string | |
| jobs: | |
| fetch-actions-data: | |
| name: Fetch Actions Snapshot | |
| runs-on: ubuntu-latest | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Select workflows for snapshot | |
| id: select-workflows | |
| run: | | |
| if [[ -n "${{ inputs.job_filter }}" ]]; then | |
| echo "workflows=pr-test-amd.yml" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "workflows=pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Fetch Actions data snapshot | |
| timeout-minutes: 30 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --workflow "${{ steps.select-workflows.outputs.workflows }}" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --dump-data-file actions-job-snapshot.json | |
| - name: Upload Actions data snapshot | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: actions-job-snapshot.json | |
| if-no-files-found: error | |
| # Single job filter mode | |
| custom-report: | |
| name: Custom Job Report | |
| if: ${{ inputs.job_filter }} | |
| needs: fetch-actions-data | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate Custom Job Report | |
| timeout-minutes: 30 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --job "${{ inputs.job_filter }}" \ | |
| --workflow "pr-test-amd.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary | |
| # Parse workflow files to get job names dynamically | |
| parse-workflows: | |
| name: Parse Workflow Jobs | |
| if: ${{ !inputs.job_filter }} | |
| runs-on: ubuntu-latest | |
| outputs: | |
| pr_jobs: ${{ steps.parse.outputs.pr_jobs }} | |
| nightly_jobs: ${{ steps.parse.outputs.nightly_jobs }} | |
| pr_rocm720_jobs: ${{ steps.parse.outputs.pr_rocm720_jobs }} | |
| nightly_rocm720_jobs: ${{ steps.parse.outputs.nightly_rocm720_jobs }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Parse workflow files | |
| id: parse | |
| run: | | |
| # Parse pr-test-amd.yml and extract job names (exclude utility jobs) | |
| # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs | |
| pr_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd.yml | \ | |
| grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \ | |
| jq -R -s -c 'split("\n") | map(select(length > 0))') | |
| echo "pr_jobs=$pr_jobs" >> $GITHUB_OUTPUT | |
| echo "PR jobs: $pr_jobs" | |
| # Parse nightly-test-amd.yml and extract job names (exclude utility jobs) | |
| # Excluded: check-all-jobs | |
| nightly_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd.yml | \ | |
| grep -v -E '^(check-all-jobs)$' | \ | |
| jq -R -s -c 'split("\n") | map(select(length > 0))') | |
| echo "nightly_jobs=$nightly_jobs" >> $GITHUB_OUTPUT | |
| echo "Nightly jobs: $nightly_jobs" | |
| # Parse pr-test-amd-rocm720.yml (exclude utility jobs) | |
| # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs | |
| pr_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd-rocm720.yml | \ | |
| grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \ | |
| jq -R -s -c 'split("\n") | map(select(length > 0))') | |
| echo "pr_rocm720_jobs=$pr_rocm720_jobs" >> $GITHUB_OUTPUT | |
| echo "PR ROCm 7.2 jobs: $pr_rocm720_jobs" | |
| # Parse nightly-test-amd-rocm720.yml (exclude utility jobs) | |
| # Excluded: check-all-jobs | |
| nightly_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd-rocm720.yml | \ | |
| grep -v -E '^(check-all-jobs)$' | \ | |
| jq -R -s -c 'split("\n") | map(select(length > 0))') | |
| echo "nightly_rocm720_jobs=$nightly_rocm720_jobs" >> $GITHUB_OUTPUT | |
| echo "Nightly ROCm 7.2 jobs: $nightly_rocm720_jobs" | |
| # PR CI reports using dynamic matrix | |
| pr-ci-reports: | |
| name: PR - ${{ matrix.job_name }} | |
| needs: [parse-workflows, fetch-actions-data] | |
| if: ${{ !inputs.job_filter }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_jobs) }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate Report | |
| timeout-minutes: 15 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --job "${{ matrix.job_name }}" \ | |
| --workflow "pr-test-amd.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary | |
| # Nightly AMD test reports using dynamic matrix | |
| nightly-reports: | |
| name: Nightly - ${{ matrix.job_name }} | |
| needs: [parse-workflows, fetch-actions-data] | |
| if: ${{ !inputs.job_filter }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_jobs) }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate Nightly Report | |
| timeout-minutes: 15 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --job "${{ matrix.job_name }}" \ | |
| --workflow "nightly-test-amd.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary | |
| # PR ROCm 7.2 CI reports using dynamic matrix | |
| pr-rocm720-ci-reports: | |
| name: PR ROCm720 - ${{ matrix.job_name }} | |
| needs: [parse-workflows, fetch-actions-data] | |
| if: ${{ !inputs.job_filter }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_rocm720_jobs) }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate PR ROCm 7.2 Report | |
| timeout-minutes: 15 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --job "${{ matrix.job_name }}" \ | |
| --workflow "pr-test-amd-rocm720.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary | |
| # Nightly ROCm 7.2 reports using dynamic matrix | |
| nightly-rocm720-reports: | |
| name: Nightly ROCm720 - ${{ matrix.job_name }} | |
| needs: [parse-workflows, fetch-actions-data] | |
| if: ${{ !inputs.job_filter }} | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_rocm720_jobs) }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate Nightly ROCm 7.2 Report | |
| timeout-minutes: 15 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --job "${{ matrix.job_name }}" \ | |
| --workflow "nightly-test-amd-rocm720.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary | |
| # Runner fleet report - cross-workflow runner analytics in a single pass | |
| runner-fleet-report: | |
| name: Runner Fleet Report | |
| if: ${{ !inputs.job_filter }} | |
| needs: fetch-actions-data | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: pip install tabulate | |
| - name: Download Actions data snapshot | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: actions-job-snapshot | |
| path: ci-data | |
| - name: Generate Runner Fleet Report | |
| timeout-minutes: 30 | |
| run: | | |
| python scripts/ci/utils/query_job_status.py \ | |
| --repo ${{ github.repository }} \ | |
| --runner-report \ | |
| --workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" \ | |
| --hours ${{ inputs.hours || '24' }} \ | |
| --input-data-file ci-data/actions-job-snapshot.json \ | |
| --summary |