CI Failure Monitor #73
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI Failure Monitor | |
| on: | |
| schedule: | |
| - cron: '0 */12 * * *' # Every 12 hour | |
| workflow_dispatch: | |
| concurrency: | |
| group: ci-failure-monitor-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| actions: read | |
| jobs: | |
| failure-analysis: | |
| if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.14' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install requests slack_sdk | |
| - name: Run Failure Analysis | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} | |
| GH_PAT_FOR_RUNNER_ADMIN: ${{ secrets.GH_PAT_FOR_RUNNER_ADMIN }} | |
| PYTHONUNBUFFERED: 1 | |
| PYTHONIOENCODING: utf-8 | |
| run: | | |
| cd scripts/ci_monitor | |
| python ci_failures_analysis.py \ | |
| --token $GITHUB_TOKEN \ | |
| --limit 100 \ | |
| --output ci_failure_analysis_$(date +%Y%m%d_%H%M%S).json | |
| - name: Upload Analysis Results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ci-failure-analysis-${{ github.run_number }} | |
| path: | | |
| scripts/ci_monitor/ci_failure_analysis_*.json | |
| retention-days: 7 | |
| - name: Send Slack Notification | |
| if: always() | |
| env: | |
| SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} | |
| run: | | |
| cd scripts/ci_monitor | |
| LATEST_REPORT=$(ls -t ci_failure_analysis_*.json | head -1) | |
| if [ ! -f "$LATEST_REPORT" ]; then | |
| echo "No report found, so skipping Slack notification" | |
| exit 0 | |
| fi | |
| if [ -n "$SGLANG_DIFFUSION_SLACK_TOKEN" ]; then | |
| python3 post_ci_failures_to_slack.py --report-file "$LATEST_REPORT" | |
| else | |
| echo "SGLANG_DIFFUSION_SLACK_TOKEN not configured, skipping notification" | |
| fi |