Skip to content

[Sync] Merge mainstream TileLang TVM-FFI features into TileScale #222

[Sync] Merge mainstream TileLang TVM-FFI features into TileScale

[Sync] Merge mainstream TileLang TVM-FFI features into TileScale #222

Workflow file for this run

name: CI
on:
pull_request:
types:
- labeled
- unlabeled
- opened
- synchronize
- reopened
# Allow to trigger the workflow manually
workflow_dispatch:
permissions:
contents: read
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
CLANG_TIDY_CMAKE_OPTIONS: "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON" # to be updated
PYTHONDEVMODE: "1"
PYTHONUNBUFFERED: "1"
PYTHONPATH: "" # explicit cleanup
PIP_USER: "" # explicit cleanup
COLUMNS: "100"
FORCE_COLOR: "1"
CLICOLOR_FORCE: "1"
UV_INDEX_STRATEGY: "unsafe-best-match"
UV_HTTP_TIMEOUT: "600"
XDG_CACHE_HOME: "${{ github.workspace }}/.cache" # to be updated
PIP_CACHE_DIR: "${{ github.workspace }}/.cache/pip" # to be updated
UV_CACHE_DIR: "${{ github.workspace }}/.cache/uv" # to be updated
PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pip/.pre-commit" # to be updated
jobs:
lint:
name: Quick Lint
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: recursive
- name: Setup Python 3.8
id: setup-pylowest
uses: actions/setup-python@v6
with:
python-version: "3.8" # use lowest supported version for linting
update-environment: false
- name: Check AST with Python 3.8
run: |
"${{ steps.setup-pylowest.outputs.python-path }}" -m compileall -q -f tilelang
- name: Setup Python 3.9
uses: actions/setup-python@v6
with:
python-version: "3.9"
update-environment: true
cache: pip
cache-dependency-path: |
pyproject.toml
requirements*.txt
.pre-commit-config.yaml
- name: Pre-commit Lint
run: |
if ! pipx run pre-commit run --all-files --color=always --show-diff-on-failure; then
echo "::error::Pre-commit checks failed. Please run 'pre-commit install' and 'pre-commit run --all-files' locally to see the issues."
exit 1
fi
tests:
name: Test for Python ${{ matrix.python-version }} with ${{ matrix.runner.toolkit }} (on ${{ matrix.runner.name }})
if: |
github.repository_owner == 'tile-ai' &&
(github.event_name != 'pull_request' || !github.event.pull_request.draft)
needs: [lint]
runs-on: ${{ matrix.runner.tags }}
strategy:
matrix:
runner:
- tags: [self-hosted, tilescale]
name: self-hosted-nvidia
# Format: [Nightly-]CUDA-<major>.<minor>[.<patch>]. E.g., "CUDA-12.8" or "Nightly-CUDA-13.0".
# Use "Nightly-" prefix to use torch nightly builds.
toolkit: CUDA-12.8
python-version:
- "3.12"
fail-fast: false
timeout-minutes: 120
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: recursive
- name: Set environment (self-hosted runners)
if: startsWith(matrix.runner.name, 'self-hosted')
run: |
# Hide sensitive data in logs for self-hosted runners
if [[ -n "${{ secrets.SECRET_PATH_PREFIXES }}" ]]; then
echo "::add-mask::${{ secrets.SECRET_PATH_PREFIXES }}"
# Colon separated list of secrets to mask
for secret in $(echo "${{ secrets.SECRET_PATH_PREFIXES }}" | tr ':' '\n'); do
echo "::add-mask::${secret}"
done
fi
# Use runner tool_cache as cache root for self-hosted runners to avoid internet connection
# issues and to share cache between jobs.
export XDG_CACHE_HOME="${{ runner.tool_cache }}/.ci-cache-${{ github.workflow }}"
echo "XDG_CACHE_HOME=${XDG_CACHE_HOME}" | tee -a "${GITHUB_ENV}"
echo "PIP_CACHE_DIR=${XDG_CACHE_HOME}/pip" | tee -a "${GITHUB_ENV}"
echo "UV_CACHE_DIR=${XDG_CACHE_HOME}/uv" | tee -a "${GITHUB_ENV}"
echo "PRE_COMMIT_HOME=${XDG_CACHE_HOME}/pip/.pre-commit" | tee -a "${GITHUB_ENV}"
# Do not use ccache on self-hosted runners, as it will download/upload caches which is slow.
# Self-hosted runners usually have more CPU power to compile without ccache.
- name: Setup ccache (GitHub-hosted runners)
id: setup-ccache
if: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
uses: hendrikmuhs/ccache-action@v1
with:
create-symlink: true
evict-old-files: "7d"
append-timestamp: false
key: ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}
${{ runner.os }}-${{ runner.arch }}
- name: Set environment (CUDA)
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
TOOLKIT="${{ matrix.runner.toolkit }}"
CUDA_VERSION="${TOOLKIT##*-}"
CUDA_VERSION_MAJMIN="$(echo ${CUDA_VERSION} | cut -d '.' -f-2)"
CUDA_VERSION_MAJMIN_NODOT="${CUDA_VERSION_MAJMIN//./}"
if [[ "${TOOLKIT}" == "Nightly-"* ]]; then
# Use torch nightly builds
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MAJMIN_NODOT}"
else
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MAJMIN_NODOT}"
fi
export UV_INDEX="${PIP_EXTRA_INDEX_URL}"
export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_CUDA=ON"
echo "USE_CUDA=ON" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION=${CUDA_VERSION}" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION_MAJMIN=${CUDA_VERSION_MAJMIN}" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION_MAJMIN_NODOT=${CUDA_VERSION_MAJMIN_NODOT}" | tee -a "${GITHUB_ENV}"
echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}"
echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}"
echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}"
if [[ ! -x "$(command -v nvcc)" ]]; then
export PATH="/usr/local/cuda/bin:${PATH}"
export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
echo "PATH=${PATH}" | tee -a "${GITHUB_ENV}"
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | tee -a "${GITHUB_ENV}"
fi
if [[ -x "$(command -v nvcc)" ]]; then
echo "\$ $(command -v nvcc) --version" && nvcc --version
else
echo "::warning::nvcc not found in PATH!"
fi
- name: Setup Python and uv with caching
id: setup-uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
activate-environment: true
# Do not use cache for self-hosted runners, as it will download/upload caches which is slow.
enable-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
prune-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
# Use runner tool_cache for self-hosted runners
cache-local-path: ${{ env.UV_CACHE_DIR }}
ignore-nothing-to-cache: true
# Extra cache key to upload/download caches on GitHub-hosted runners
cache-suffix: uv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.runner.name }}-${{ matrix.runner.toolkit }}
cache-dependency-glob: |
pyproject.toml
requirements*.txt
.pre-commit-config.yaml
- name: Setup venv
id: setup-venv
run: |
set -o pipefail
uv pip install --upgrade pip setuptools wheel
if [[ "${UV_INDEX}" == *"/nightly/"* ]]; then
uv pip install --prerelease=allow -v torch
fi
uv pip install -v -r requirements-test.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
echo "import torch; print(f'torch: {torch.__version__}')" | uv run --no-project --script -
if [[ "${{ matrix.runner.toolkit }}" == *"CUDA"* ]]; then
uv pip install --no-build-isolation-package=flash-attn -v -r requirements-test-cuda.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
echo "import flash_attn; print(f'flash_attn: {flash_attn.__version__}')" | uv run --no-project --script -
# elif [[ "${{ matrix.runner.toolkit }}" == *"ROCm"* ]]; then
# uv pip install -v -r requirements-test-rocm.txt
# elif [[ "${{ matrix.runner.toolkit }}" == *"Metal"* ]]; then
# uv pip install -v -r requirements-test-metal.txt
else
echo "::error::Unknown toolkit: ${{ matrix.runner.toolkit }}"
exit 1
fi
echo "::group::torch.utils.collect_env"
uv run --no-project -m -- torch.utils.collect_env
echo "::endgroup::"
- name: Clear uv cache for self-hosted runners (if setup failed)
if: >-
${{
failure() &&
startsWith(matrix.runner.name, 'self-hosted') &&
(steps.setup-uv.conclusion == 'failure' || steps.setup-venv.conclusion == 'failure')
}}
run: |
echo "Clearing uv cache at ${UV_CACHE_DIR} due to failure."
uv cache clean
- name: Enable core dump generation (Linux / GitHub-hosted runners)
if: ${{ runner.os == 'Linux' && !startsWith(matrix.runner.name, 'self-hosted') }}
run: |
sudo sysctl -w kernel.core_pattern="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
sudo sysctl -w kernel.core_uses_pid=0
sudo sysctl -w fs.suid_dumpable=1
sysctl kernel.core_pattern kernel.core_uses_pid fs.suid_dumpable
- name: Enable core dump generation (macOS / GitHub-hosted runners)
if: ${{ runner.os == 'macOS' && !startsWith(matrix.runner.name, 'self-hosted') }}
run: |
sudo sysctl -w kern.corefile="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
sudo sysctl -w kern.coredump=1
sudo sysctl -w kern.sugid_coredump=1
sysctl kern.corefile kern.coredump kern.sugid_coredump
- name: Install project (wheel form)
run: |
uv pip install -v .
bash tilelang/distributed/install_deepep.sh # Install DeepEP for testing purpose
export NCCL_IB_DISABLE=1 # Our CI machine's IB is incomplete, disable it to avoid unnecessary error msgs
# - name: Run clang-tidy
# id: clang-tidy
# if: runner.os == 'Linux'
# run: |
# echo "\$ $(command -v clang-tidy) --version" && clang-tidy --version
# # Download run-clang-tidy script
# RCT_URL=https://raw.githubusercontent.com/llvm/llvm-project/refs/heads/release/21.x/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
# echo "Downloading run-clang-tidy script from ${RCT_URL}"
# echo "import urllib.request; url = '${RCT_URL}'.rstrip('/'); urllib.request.urlretrieve(url, url.split('/')[-1])" | uv run --no-project --script -
# RUN_CLANG_TIDY=(uv run --no-project --script -- run-clang-tidy.py)
# if [[ -x "$(command -v clang-apply-replacements)" ]]; then
# echo "Using clang-apply-replacements from $(command -v clang-apply-replacements)"
# RUN_CLANG_TIDY+=(-fix -clang-apply-replacements-binary="$(command -v clang-apply-replacements)")
# else
# echo "::warning::clang-apply-replacements not found in PATH, automatic fixing disabled."
# fi
# # Run cmake to create the build directory with compile_commands.json
# cmake -S . -B cmake-build --fresh ${CLANG_TIDY_CMAKE_OPTIONS} # no quotes here
# echo "::group::compile_commands.json"
# ls -alh cmake-build/compile_commands.json
# uv run --no-project -m -- json.tool --no-ensure-ascii cmake-build/compile_commands.json
# echo "::endgroup::"
# CXX_FILES=$(find src -type f -iname "*.[ch]pp" -o -iname "*.cc" -o -iname "*.c" -o -iname "*.h")
# rc=0
# echo "::group::run-clang-tidy"
# "${RUN_CLANG_TIDY[@]}" -clang-tidy-binary="$(command -v clang-tidy)" \
# -exclude-header-filter='^(3rdparty|tvm)/.*$' \
# -p="cmake-build" ${CXX_FILES} || rc="$?"
# echo "::endgroup::"
# rm -rf cmake-build run-clang-tidy.py
# if (( rc != 0 )); then
# echo "::error::clang-tidy found issues (exit code: ${rc}). Please run 'clang-tidy --fix' locally to fix them."
# git diff --color=always || true
# exit "${rc}"
# fi
- name: Run examples with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
cd examples
unset PYTHONPATH
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear -r fE
)
# Find and run distributed tests with TILELANG_USE_DISTRIBUTED=1
# DeepEP tests requires fullmesh nvl or internode environment, we disable for now
mapfile -t DIST_TESTS < <(find . -type f -path '*/distributed/*' -name 'test*.py' ! -path '*deepep*' 2>/dev/null || true)
if [ "${#DIST_TESTS[@]}" -gt 0 ]; then
echo "Running distributed examples with TILELANG_USE_DISTRIBUTED=1:"
printf '%s\n' "${DIST_TESTS[@]}"
TILELANG_USE_DISTRIBUTED=1 "${PYTEST[@]}" --maxfail=3 --numprocesses=1 "${DIST_TESTS[@]}"
else
echo "No distributed examples found."
fi
# Run remaining example tests (non-distributed)
# Temporarily disable problematic tests: sink, vs_sparse
mapfile -t OTHER_TESTS < <(find . -type f -name 'test*.py' ! -path '*/distributed/*' | grep -vE 'sink|vs_sparse' 2>/dev/null || true)
if [ "${#OTHER_TESTS[@]}" -gt 0 ]; then
echo "Running non-distributed examples:"
printf '%s\n' "${OTHER_TESTS[@]}"
"${PYTEST[@]}" --maxfail=3 --numprocesses=2 "${OTHER_TESTS[@]}"
else
echo "No non-distributed example tests found."
fi
# NVIDIA CUDA tests
- name: Run CUDA tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
id: cuda-tests
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
cd testing/python
unset PYTHONPATH
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear -r fE
)
# Run distributed tests first with env var
mapfile -t DIST_TESTS < <(find . -type f -path '*/distributed/*' -name 'test*.py' 2>/dev/null || true)
if [ "${#DIST_TESTS[@]}" -gt 0 ]; then
echo "Running distributed tests with TILELANG_USE_DISTRIBUTED=1:"
printf '%s\n' "${DIST_TESTS[@]}"
TILELANG_USE_DISTRIBUTED=1 "${PYTEST[@]}" --maxfail=3 --numprocesses=1 "${DIST_TESTS[@]}"
else
echo "No distributed tests found under testing/python."
fi
# Run remaining tests
# Temporarily disable problematic tests: tilelibrary_gemm, jit_gemm_ctypes
mapfile -t OTHER_TESTS < <(find . -type f -name 'test*.py' ! -path '*/distributed/*' | grep -vE 'tilelibrary_gemm|jit_gemm_ctypes' 2>/dev/null || true)
if [ "${#OTHER_TESTS[@]}" -gt 0 ]; then
echo "Running non-distributed tests:"
printf '%s\n' "${OTHER_TESTS[@]}"
"${PYTEST[@]}" --maxfail=3 --numprocesses=2 "${OTHER_TESTS[@]}"
else
echo "No non-distributed tests found under testing/python."
fi
- name: List generated files
if: ${{ !cancelled() }}
run: |
find . -type f -name '*.py[co]' -delete
find . -depth -type d -name "__pycache__" -exec rm -r "{}" +
if git status --ignored --porcelain | grep -qvE '/$'; then
ls -alh $(git status --ignored --porcelain | grep -vE '/$' | grep -oE '\S+$')
fi