feat: BF16 GEMM using cuDNN backend #426
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # CI workflow using AWS self-hosted runners. | |
| # Runs AOT build tests and GPU unit tests on push/PR to main. | |
| # Uses ci/bash.sh for Docker execution (same as Jenkins). | |
| # | |
| # Permission Control: | |
| # - Push to main: Always runs | |
| # - PR from org members (ci-users team): Runs automatically | |
| # - PR from external contributors: Requires 'run-ci' label | |
| # (added via @flashinfer-bot run command from authorized user) | |
| name: PR Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| types: [opened, synchronize, reopened, labeled] | |
| workflow_dispatch: | |
| inputs: | |
| skip_aot: | |
| description: 'Skip AOT build tests' | |
| type: boolean | |
| default: false | |
| skip_gpu: | |
| description: 'Skip GPU tests' | |
| type: boolean | |
| default: false | |
| concurrency: | |
| group: pr-test-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| env: | |
| EXECUTOR_NUMBER: "0" | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Gate - Check if PR is authorized to run CI | |
| # --------------------------------------------------------------------------- | |
| gate: | |
| name: Permission Check | |
| runs-on: ubuntu-latest | |
| outputs: | |
| authorized: ${{ steps.check.outputs.authorized }} | |
| steps: | |
| - name: Check authorization | |
| id: check | |
| env: | |
| GH_TOKEN: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} | |
| run: | | |
| # Always allow push to main and workflow_dispatch | |
| if [[ "${{ github.event_name }}" != "pull_request" ]]; then | |
| echo "authorized=true" >> "$GITHUB_OUTPUT" | |
| echo "Not a PR, authorized" | |
| exit 0 | |
| fi | |
| # Check if PR has run-ci label | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci') }}" == "true" ]]; then | |
| echo "authorized=true" >> "$GITHUB_OUTPUT" | |
| echo "PR has run-ci label, authorized" | |
| exit 0 | |
| fi | |
| # Check if PR author is a member of ci-users team | |
| AUTHOR="${{ github.event.pull_request.user.login }}" | |
| ORG="${{ github.repository_owner }}" | |
| TEAM="ci-users" | |
| echo "Checking if $AUTHOR is a member of $ORG/$TEAM..." | |
| if [[ -z "$GH_TOKEN" ]]; then | |
| echo "::warning::FLASHINFER_GITHUB_TOKEN not set, falling back to association check" | |
| # Fallback: check if author has write access | |
| ASSOC="${{ github.event.pull_request.author_association }}" | |
| if [[ "$ASSOC" =~ ^(OWNER|MEMBER|COLLABORATOR)$ ]]; then | |
| echo "authorized=true" >> "$GITHUB_OUTPUT" | |
| echo "PR author has $ASSOC access, authorized" | |
| else | |
| echo "authorized=false" >> "$GITHUB_OUTPUT" | |
| echo "PR author is $ASSOC, not authorized" | |
| fi | |
| exit 0 | |
| fi | |
| # Check team membership | |
| MEMBERS=$(gh api \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| --paginate \ | |
| "/orgs/${ORG}/teams/${TEAM}/members" \ | |
| --jq '.[].login' 2>&1) || { | |
| echo "::warning::Failed to get team members: $MEMBERS" | |
| echo "authorized=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| } | |
| if echo "$MEMBERS" | grep -qx "$AUTHOR"; then | |
| echo "authorized=true" >> "$GITHUB_OUTPUT" | |
| echo "$AUTHOR is a member of $TEAM, authorized" | |
| else | |
| echo "authorized=false" >> "$GITHUB_OUTPUT" | |
| echo "$AUTHOR is not a member of $TEAM, not authorized" | |
| fi | |
| # --------------------------------------------------------------------------- | |
| # Setup - Read docker tag and check if build should be skipped | |
| # --------------------------------------------------------------------------- | |
| setup: | |
| name: Setup | |
| needs: gate | |
| if: needs.gate.outputs.authorized == 'true' | |
| runs-on: ubuntu-latest | |
| outputs: | |
| docker_tag: ${{ steps.get-tag.outputs.tag }} | |
| skip_build: ${{ steps.check.outputs.skip }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Get Docker Tag | |
| id: get-tag | |
| run: | | |
| TAG=$(grep 'flashinfer/flashinfer-ci-cu129:' ci/docker-tags.yml | cut -d':' -f2 | tr -d ' ') | |
| if [ -z "$TAG" ]; then | |
| echo "::error::Failed to extract Docker tag from ci/docker-tags.yml" | |
| exit 1 | |
| fi | |
| echo "tag=$TAG" >> $GITHUB_OUTPUT | |
| echo "Docker tag: $TAG" | |
| - name: Check Skip Conditions | |
| id: check | |
| run: | | |
| if [ "${{ github.event_name }}" != "pull_request" ]; then | |
| echo "skip=false" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| # Use PR event SHAs for reliable diff (avoids issues with origin refs) | |
| BASE_SHA="${{ github.event.pull_request.base.sha }}" | |
| HEAD_SHA="${{ github.event.pull_request.head.sha }}" | |
| CHANGED=$(git diff --name-only "$BASE_SHA...$HEAD_SHA") | |
| # TODO (yongwww): Add back ^\.github/ before merging to main | |
| SKIP_PATTERNS="README.md|^docs/|^docker/|^licenses/|^LICENSE$|^NOTICE$|^version\.txt$" | |
| SKIP=true | |
| while IFS= read -r file; do | |
| if [ -n "$file" ] && ! echo "$file" | grep -qE "$SKIP_PATTERNS"; then | |
| SKIP=false | |
| break | |
| fi | |
| done <<< "$CHANGED" | |
| echo "skip=$SKIP" >> $GITHUB_OUTPUT | |
| if [ "$SKIP" == "true" ]; then | |
| echo "::notice::Skipping build - only docs/config files changed" | |
| fi | |
| # --------------------------------------------------------------------------- | |
| # AOT Build Import Tests - x86_64 and aarch64 (multiple CUDA versions) | |
| # Uses ci/bash.sh with --no-gpu (same as Jenkins) | |
| # --------------------------------------------------------------------------- | |
| aot-build-import: | |
| name: AOT Build Import (${{ matrix.arch }}, ${{ matrix.cuda }}) | |
| needs: [gate, setup] | |
| if: | | |
| needs.gate.outputs.authorized == 'true' && | |
| needs.setup.outputs.skip_build != 'true' && | |
| github.event.inputs.skip_aot != 'true' | |
| runs-on: | |
| - self-hosted | |
| - Linux | |
| - ${{ matrix.arch }} | |
| - cpu | |
| timeout-minutes: 360 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| arch: [X64, ARM64] | |
| cuda: [cu126, cu128, cu129, cu130] | |
| env: | |
| DOCKER_IMAGE: flashinfer/flashinfer-ci-${{ matrix.cuda }}:${{ needs.setup.outputs.docker_tag }} | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| # Stop all Docker containers to free memory | |
| docker stop $(docker ps -q) 2>/dev/null || true | |
| docker rm $(docker ps -aq) 2>/dev/null || true | |
| # Clean workspace and caches | |
| sudo rm -rf ${{ github.workspace }}/* || true | |
| sudo rm -rf ${{ github.workspace }}/.[!.]* || true | |
| rm -rf ~/.cache/flashinfer_jit || true | |
| docker system prune -f || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Login to Docker Hub | |
| uses: docker/login-action@v3 | |
| with: | |
| username: flashinfer | |
| password: ${{ secrets.DOCKERHUB_TOKEN }} | |
| continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs) | |
| - name: Show Node Info | |
| run: ./scripts/task_show_node_info.sh | |
| env: | |
| NODE_NAME: ${{ runner.name }} | |
| WORKSPACE: ${{ github.workspace }} | |
| BUILD_NUMBER: ${{ github.run_number }} | |
| - name: Test JIT Cache Package Build and Import | |
| run: bash ci/bash.sh ${DOCKER_IMAGE} --no-gpu ./scripts/task_test_jit_cache_package_build_import.sh | |
| # --------------------------------------------------------------------------- | |
| # GPU JIT Tests - SM86 (A10G) - 5 Shards | |
| # Uses ci/bash.sh with GPU (same as Jenkins) | |
| # --------------------------------------------------------------------------- | |
| gpu-tests-a10g: | |
| name: JIT Unittest ${{ matrix.shard }} (A10G) | |
| needs: [gate, setup] | |
| if: | | |
| needs.gate.outputs.authorized == 'true' && | |
| needs.setup.outputs.skip_build != 'true' && | |
| github.event.inputs.skip_gpu != 'true' | |
| runs-on: [self-hosted, Linux, X64, gpu, sm86] | |
| timeout-minutes: 360 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| shard: [1, 2, 3, 4, 5] | |
| env: | |
| DOCKER_IMAGE: flashinfer/flashinfer-ci-cu129:${{ needs.setup.outputs.docker_tag }} | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| # Stop all Docker containers to free GPU memory | |
| docker stop $(docker ps -q) 2>/dev/null || true | |
| docker rm $(docker ps -aq) 2>/dev/null || true | |
| # Clean workspace and caches | |
| sudo rm -rf ${{ github.workspace }}/* || true | |
| sudo rm -rf ${{ github.workspace }}/.[!.]* || true | |
| rm -rf ~/.cache/flashinfer_jit || true | |
| docker system prune -f || true | |
| nvidia-smi || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Login to Docker Hub | |
| uses: docker/login-action@v3 | |
| with: | |
| username: flashinfer | |
| password: ${{ secrets.DOCKERHUB_TOKEN }} | |
| continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs) | |
| - name: Show Node Info | |
| run: ./scripts/task_show_node_info.sh | |
| env: | |
| NODE_NAME: ${{ runner.name }} | |
| WORKSPACE: ${{ github.workspace }} | |
| BUILD_NUMBER: ${{ github.run_number }} | |
| - name: Run JIT Unittest Part ${{ matrix.shard }} | |
| run: bash ci/bash.sh ${DOCKER_IMAGE} ./scripts/task_jit_run_tests_part${{ matrix.shard }}.sh | |
| # --------------------------------------------------------------------------- | |
| # GPU JIT Tests - SM75 (T4) - sampling tests only | |
| # Uses ci/bash.sh with GPU (same as Jenkins) | |
| # --------------------------------------------------------------------------- | |
| gpu-tests-t4: | |
| name: JIT Unittest (T4) | |
| needs: [gate, setup] | |
| if: | | |
| needs.gate.outputs.authorized == 'true' && | |
| needs.setup.outputs.skip_build != 'true' && | |
| github.event.inputs.skip_gpu != 'true' | |
| runs-on: [self-hosted, Linux, X64, gpu, sm75] | |
| timeout-minutes: 360 | |
| env: | |
| DOCKER_IMAGE: flashinfer/flashinfer-ci-cu129:${{ needs.setup.outputs.docker_tag }} | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| # Stop all Docker containers to free GPU memory | |
| docker stop $(docker ps -q) 2>/dev/null || true | |
| docker rm $(docker ps -aq) 2>/dev/null || true | |
| # Clean workspace and caches | |
| sudo rm -rf ${{ github.workspace }}/* || true | |
| sudo rm -rf ${{ github.workspace }}/.[!.]* || true | |
| rm -rf ~/.cache/flashinfer_jit || true | |
| docker system prune -f || true | |
| nvidia-smi || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Login to Docker Hub | |
| uses: docker/login-action@v3 | |
| with: | |
| username: flashinfer | |
| password: ${{ secrets.DOCKERHUB_TOKEN }} | |
| continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs) | |
| - name: Show Node Info | |
| run: ./scripts/task_show_node_info.sh | |
| env: | |
| NODE_NAME: ${{ runner.name }} | |
| WORKSPACE: ${{ github.workspace }} | |
| BUILD_NUMBER: ${{ github.run_number }} | |
| - name: Run JIT Unittest Part 3 (T4) | |
| run: bash ci/bash.sh ${DOCKER_IMAGE} ./scripts/task_jit_run_tests_part3.sh | |
| # --------------------------------------------------------------------------- | |
| # Test Results Summary | |
| # --------------------------------------------------------------------------- | |
| test-results-summary: | |
| name: Test Results Summary | |
| if: always() | |
| needs: [gate, setup, aot-build-import, gpu-tests-a10g, gpu-tests-t4] | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check Results | |
| run: | | |
| echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY | |
| # Check if CI was skipped due to permissions | |
| if [ "${{ needs.gate.outputs.authorized }}" != "true" ]; then | |
| echo "CI skipped (pending authorization)" >> $GITHUB_STEP_SUMMARY | |
| echo "A contributor in @flashinfer-ai/ci-users can comment \`@flashinfer-bot run\` to approve." >> $GITHUB_STEP_SUMMARY | |
| exit 0 | |
| fi | |
| if [ "${{ needs.setup.outputs.skip_build }}" == "true" ]; then | |
| echo "Build skipped (docs/config only changes)" >> $GITHUB_STEP_SUMMARY | |
| exit 0 | |
| fi | |
| AOT="${{ needs.aot-build-import.result }}" | |
| A10G="${{ needs.gpu-tests-a10g.result }}" | |
| T4="${{ needs.gpu-tests-t4.result }}" | |
| SKIP_AOT="${{ github.event.inputs.skip_aot }}" | |
| SKIP_GPU="${{ github.event.inputs.skip_gpu }}" | |
| echo "AOT Build Import: $AOT" >> $GITHUB_STEP_SUMMARY | |
| echo "GPU Tests (A10G): $A10G" >> $GITHUB_STEP_SUMMARY | |
| echo "GPU Tests (T4): $T4" >> $GITHUB_STEP_SUMMARY | |
| # Fail if any required job is not success (unless explicitly skipped) | |
| if { [ "$AOT" != "success" ] && [ "$SKIP_AOT" != "true" ]; } || \ | |
| { [ "$A10G" != "success" ] && [ "$SKIP_GPU" != "true" ]; } || \ | |
| { [ "$T4" != "success" ] && [ "$SKIP_GPU" != "true" ]; }; then | |
| echo "**Tests Failed**" >> $GITHUB_STEP_SUMMARY | |
| exit 1 | |
| fi | |
| echo "**Tests Passed**" >> $GITHUB_STEP_SUMMARY |