Skip to content

feat: BF16 GEMM using cuDNN backend #426

feat: BF16 GEMM using cuDNN backend

feat: BF16 GEMM using cuDNN backend #426

Workflow file for this run

# CI workflow using AWS self-hosted runners.
# Runs AOT build tests and GPU unit tests on push/PR to main.
# Uses ci/bash.sh for Docker execution (same as Jenkins).
#
# Permission Control:
# - Push to main: Always runs
# - PR from org members (ci-users team): Runs automatically
# - PR from external contributors: Requires 'run-ci' label
# (added via @flashinfer-bot run command from authorized user)
name: PR Test
on:
push:
branches: [main]
pull_request:
branches: [main]
types: [opened, synchronize, reopened, labeled]
workflow_dispatch:
inputs:
skip_aot:
description: 'Skip AOT build tests'
type: boolean
default: false
skip_gpu:
description: 'Skip GPU tests'
type: boolean
default: false
concurrency:
group: pr-test-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
env:
EXECUTOR_NUMBER: "0"
jobs:
# ---------------------------------------------------------------------------
# Gate - Check if PR is authorized to run CI
# ---------------------------------------------------------------------------
gate:
name: Permission Check
runs-on: ubuntu-latest
outputs:
authorized: ${{ steps.check.outputs.authorized }}
steps:
- name: Check authorization
id: check
env:
GH_TOKEN: ${{ secrets.FLASHINFER_GITHUB_TOKEN }}
run: |
# Always allow push to main and workflow_dispatch
if [[ "${{ github.event_name }}" != "pull_request" ]]; then
echo "authorized=true" >> "$GITHUB_OUTPUT"
echo "Not a PR, authorized"
exit 0
fi
# Check if PR has run-ci label
if [[ "${{ contains(github.event.pull_request.labels.*.name, 'run-ci') }}" == "true" ]]; then
echo "authorized=true" >> "$GITHUB_OUTPUT"
echo "PR has run-ci label, authorized"
exit 0
fi
# Check if PR author is a member of ci-users team
AUTHOR="${{ github.event.pull_request.user.login }}"
ORG="${{ github.repository_owner }}"
TEAM="ci-users"
echo "Checking if $AUTHOR is a member of $ORG/$TEAM..."
if [[ -z "$GH_TOKEN" ]]; then
echo "::warning::FLASHINFER_GITHUB_TOKEN not set, falling back to association check"
# Fallback: check if author has write access
ASSOC="${{ github.event.pull_request.author_association }}"
if [[ "$ASSOC" =~ ^(OWNER|MEMBER|COLLABORATOR)$ ]]; then
echo "authorized=true" >> "$GITHUB_OUTPUT"
echo "PR author has $ASSOC access, authorized"
else
echo "authorized=false" >> "$GITHUB_OUTPUT"
echo "PR author is $ASSOC, not authorized"
fi
exit 0
fi
# Check team membership
MEMBERS=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
--paginate \
"/orgs/${ORG}/teams/${TEAM}/members" \
--jq '.[].login' 2>&1) || {
echo "::warning::Failed to get team members: $MEMBERS"
echo "authorized=false" >> "$GITHUB_OUTPUT"
exit 0
}
if echo "$MEMBERS" | grep -qx "$AUTHOR"; then
echo "authorized=true" >> "$GITHUB_OUTPUT"
echo "$AUTHOR is a member of $TEAM, authorized"
else
echo "authorized=false" >> "$GITHUB_OUTPUT"
echo "$AUTHOR is not a member of $TEAM, not authorized"
fi
# ---------------------------------------------------------------------------
# Setup - Read docker tag and check if build should be skipped
# ---------------------------------------------------------------------------
setup:
name: Setup
needs: gate
if: needs.gate.outputs.authorized == 'true'
runs-on: ubuntu-latest
outputs:
docker_tag: ${{ steps.get-tag.outputs.tag }}
skip_build: ${{ steps.check.outputs.skip }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get Docker Tag
id: get-tag
run: |
TAG=$(grep 'flashinfer/flashinfer-ci-cu129:' ci/docker-tags.yml | cut -d':' -f2 | tr -d ' ')
if [ -z "$TAG" ]; then
echo "::error::Failed to extract Docker tag from ci/docker-tags.yml"
exit 1
fi
echo "tag=$TAG" >> $GITHUB_OUTPUT
echo "Docker tag: $TAG"
- name: Check Skip Conditions
id: check
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "skip=false" >> $GITHUB_OUTPUT
exit 0
fi
# Use PR event SHAs for reliable diff (avoids issues with origin refs)
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
CHANGED=$(git diff --name-only "$BASE_SHA...$HEAD_SHA")
# TODO (yongwww): Add back ^\.github/ before merging to main
SKIP_PATTERNS="README.md|^docs/|^docker/|^licenses/|^LICENSE$|^NOTICE$|^version\.txt$"
SKIP=true
while IFS= read -r file; do
if [ -n "$file" ] && ! echo "$file" | grep -qE "$SKIP_PATTERNS"; then
SKIP=false
break
fi
done <<< "$CHANGED"
echo "skip=$SKIP" >> $GITHUB_OUTPUT
if [ "$SKIP" == "true" ]; then
echo "::notice::Skipping build - only docs/config files changed"
fi
# ---------------------------------------------------------------------------
# AOT Build Import Tests - x86_64 and aarch64 (multiple CUDA versions)
# Uses ci/bash.sh with --no-gpu (same as Jenkins)
# ---------------------------------------------------------------------------
aot-build-import:
name: AOT Build Import (${{ matrix.arch }}, ${{ matrix.cuda }})
needs: [gate, setup]
if: |
needs.gate.outputs.authorized == 'true' &&
needs.setup.outputs.skip_build != 'true' &&
github.event.inputs.skip_aot != 'true'
runs-on:
- self-hosted
- Linux
- ${{ matrix.arch }}
- cpu
timeout-minutes: 360
strategy:
fail-fast: false
matrix:
arch: [X64, ARM64]
cuda: [cu126, cu128, cu129, cu130]
env:
DOCKER_IMAGE: flashinfer/flashinfer-ci-${{ matrix.cuda }}:${{ needs.setup.outputs.docker_tag }}
steps:
- name: Cleanup
run: |
# Stop all Docker containers to free memory
docker stop $(docker ps -q) 2>/dev/null || true
docker rm $(docker ps -aq) 2>/dev/null || true
# Clean workspace and caches
sudo rm -rf ${{ github.workspace }}/* || true
sudo rm -rf ${{ github.workspace }}/.[!.]* || true
rm -rf ~/.cache/flashinfer_jit || true
docker system prune -f || true
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: flashinfer
password: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs)
- name: Show Node Info
run: ./scripts/task_show_node_info.sh
env:
NODE_NAME: ${{ runner.name }}
WORKSPACE: ${{ github.workspace }}
BUILD_NUMBER: ${{ github.run_number }}
- name: Test JIT Cache Package Build and Import
run: bash ci/bash.sh ${DOCKER_IMAGE} --no-gpu ./scripts/task_test_jit_cache_package_build_import.sh
# ---------------------------------------------------------------------------
# GPU JIT Tests - SM86 (A10G) - 5 Shards
# Uses ci/bash.sh with GPU (same as Jenkins)
# ---------------------------------------------------------------------------
gpu-tests-a10g:
name: JIT Unittest ${{ matrix.shard }} (A10G)
needs: [gate, setup]
if: |
needs.gate.outputs.authorized == 'true' &&
needs.setup.outputs.skip_build != 'true' &&
github.event.inputs.skip_gpu != 'true'
runs-on: [self-hosted, Linux, X64, gpu, sm86]
timeout-minutes: 360
strategy:
fail-fast: false
matrix:
shard: [1, 2, 3, 4, 5]
env:
DOCKER_IMAGE: flashinfer/flashinfer-ci-cu129:${{ needs.setup.outputs.docker_tag }}
steps:
- name: Cleanup
run: |
# Stop all Docker containers to free GPU memory
docker stop $(docker ps -q) 2>/dev/null || true
docker rm $(docker ps -aq) 2>/dev/null || true
# Clean workspace and caches
sudo rm -rf ${{ github.workspace }}/* || true
sudo rm -rf ${{ github.workspace }}/.[!.]* || true
rm -rf ~/.cache/flashinfer_jit || true
docker system prune -f || true
nvidia-smi || true
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: flashinfer
password: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs)
- name: Show Node Info
run: ./scripts/task_show_node_info.sh
env:
NODE_NAME: ${{ runner.name }}
WORKSPACE: ${{ github.workspace }}
BUILD_NUMBER: ${{ github.run_number }}
- name: Run JIT Unittest Part ${{ matrix.shard }}
run: bash ci/bash.sh ${DOCKER_IMAGE} ./scripts/task_jit_run_tests_part${{ matrix.shard }}.sh
# ---------------------------------------------------------------------------
# GPU JIT Tests - SM75 (T4) - sampling tests only
# Uses ci/bash.sh with GPU (same as Jenkins)
# ---------------------------------------------------------------------------
gpu-tests-t4:
name: JIT Unittest (T4)
needs: [gate, setup]
if: |
needs.gate.outputs.authorized == 'true' &&
needs.setup.outputs.skip_build != 'true' &&
github.event.inputs.skip_gpu != 'true'
runs-on: [self-hosted, Linux, X64, gpu, sm75]
timeout-minutes: 360
env:
DOCKER_IMAGE: flashinfer/flashinfer-ci-cu129:${{ needs.setup.outputs.docker_tag }}
steps:
- name: Cleanup
run: |
# Stop all Docker containers to free GPU memory
docker stop $(docker ps -q) 2>/dev/null || true
docker rm $(docker ps -aq) 2>/dev/null || true
# Clean workspace and caches
sudo rm -rf ${{ github.workspace }}/* || true
sudo rm -rf ${{ github.workspace }}/.[!.]* || true
rm -rf ~/.cache/flashinfer_jit || true
docker system prune -f || true
nvidia-smi || true
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: flashinfer
password: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true # Don't fail if secret is unavailable (e.g., fork PRs)
- name: Show Node Info
run: ./scripts/task_show_node_info.sh
env:
NODE_NAME: ${{ runner.name }}
WORKSPACE: ${{ github.workspace }}
BUILD_NUMBER: ${{ github.run_number }}
- name: Run JIT Unittest Part 3 (T4)
run: bash ci/bash.sh ${DOCKER_IMAGE} ./scripts/task_jit_run_tests_part3.sh
# ---------------------------------------------------------------------------
# Test Results Summary
# ---------------------------------------------------------------------------
test-results-summary:
name: Test Results Summary
if: always()
needs: [gate, setup, aot-build-import, gpu-tests-a10g, gpu-tests-t4]
runs-on: ubuntu-latest
steps:
- name: Check Results
run: |
echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY
# Check if CI was skipped due to permissions
if [ "${{ needs.gate.outputs.authorized }}" != "true" ]; then
echo "CI skipped (pending authorization)" >> $GITHUB_STEP_SUMMARY
echo "A contributor in @flashinfer-ai/ci-users can comment \`@flashinfer-bot run\` to approve." >> $GITHUB_STEP_SUMMARY
exit 0
fi
if [ "${{ needs.setup.outputs.skip_build }}" == "true" ]; then
echo "Build skipped (docs/config only changes)" >> $GITHUB_STEP_SUMMARY
exit 0
fi
AOT="${{ needs.aot-build-import.result }}"
A10G="${{ needs.gpu-tests-a10g.result }}"
T4="${{ needs.gpu-tests-t4.result }}"
SKIP_AOT="${{ github.event.inputs.skip_aot }}"
SKIP_GPU="${{ github.event.inputs.skip_gpu }}"
echo "AOT Build Import: $AOT" >> $GITHUB_STEP_SUMMARY
echo "GPU Tests (A10G): $A10G" >> $GITHUB_STEP_SUMMARY
echo "GPU Tests (T4): $T4" >> $GITHUB_STEP_SUMMARY
# Fail if any required job is not success (unless explicitly skipped)
if { [ "$AOT" != "success" ] && [ "$SKIP_AOT" != "true" ]; } || \
{ [ "$A10G" != "success" ] && [ "$SKIP_GPU" != "true" ]; } || \
{ [ "$T4" != "success" ] && [ "$SKIP_GPU" != "true" ]; }; then
echo "**Tests Failed**" >> $GITHUB_STEP_SUMMARY
exit 1
fi
echo "**Tests Passed**" >> $GITHUB_STEP_SUMMARY