Skip to content

Build vLLM + ROCm

Build vLLM + ROCm #12

name: Build vLLM + ROCm
on:
workflow_dispatch:
inputs:
gfx_target:
description: 'AMD GPU targets (comma-separated)'
required: false
default: 'gfx1151,gfx1150,gfx120X'
create_release:
description: 'Create a GitHub release after successful build'
required: false
default: true
type: boolean
pull_request:
types: [opened, synchronize, reopened]
schedule:
- cron: '0 15 * * 0'
env:
GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X' }}
jobs:
prepare-matrix:
runs-on: ubuntu-22.04
outputs:
ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }}
steps:
- name: Set matrix
id: set-matrix
run: |
targets="${{ env.GFX_TARGETS }}"
matrix_targets=$(echo "$targets" \
| tr ',' '\n' \
| sed 's/^ *//;s/ *$//' \
| jq -R . \
| jq -s '{gfx_target: .}' \
| jq -c)
echo "ubuntu_matrix=$matrix_targets" >> $GITHUB_OUTPUT
echo "Generated matrix: $matrix_targets"
build-ubuntu:
runs-on: ubuntu-22.04
needs: prepare-matrix
strategy:
matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}}
fail-fast: false
outputs:
vllm_version: ${{ steps.set-outputs.outputs.vllm_version }}
torch_version: ${{ steps.set-outputs.outputs.torch_version }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Python 3.12
run: |
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update
sudo apt install -y python3.12 python3.12-venv python3.12-dev
python3.12 --version
- name: Map GPU target to AMD wheel URLs
id: wheel-urls
run: |
target="${{ matrix.gfx_target }}"
# Map targets to AMD wheel URL suffixes
# See: https://rocm.docs.amd.com/en/latest/rocm-for-ai/vllm.html
case "$target" in
gfx1151) suffix="gfx1151" ;;
gfx1150) suffix="gfx1150" ;;
gfx120X) suffix="gfx120X-all" ;;
*)
echo "ERROR: No AMD pre-built wheels for target: $target"
exit 1
;;
esac
echo "torch_index=https://repo.amd.com/rocm/whl/${suffix}/" >> $GITHUB_OUTPUT
echo "vllm_index=https://rocm.frameworks.amd.com/whl/${suffix}/" >> $GITHUB_OUTPUT
echo "Using PyTorch index: https://repo.amd.com/rocm/whl/${suffix}/"
echo "Using vLLM index: https://rocm.frameworks.amd.com/whl/${suffix}/"
- name: Create Python virtual environment
run: |
python3.12 -m venv --copies /opt/vllm
/opt/vllm/bin/python3 -m pip install --upgrade pip
echo "Python: $(/opt/vllm/bin/python3 --version)"
- name: Install PyTorch ROCm from AMD
run: |
echo "Installing PyTorch from AMD ROCm wheel index..."
/opt/vllm/bin/pip install \
--index-url "${{ steps.wheel-urls.outputs.torch_index }}" \
torch torchvision
/opt/vllm/bin/python3 -c "
import torch
print(f'PyTorch {torch.__version__}')
print(f'ROCm built-in: {torch.version.hip is not None}')
"
- name: Install vLLM ROCm from AMD
run: |
echo "Installing vLLM from AMD ROCm wheel index..."
/opt/vllm/bin/pip install \
--extra-index-url "${{ steps.wheel-urls.outputs.vllm_index }}" \
vllm
/opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__}')"
- name: Create launcher script
run: |
cat > /opt/vllm/bin/vllm-server << 'LAUNCHER_EOF'
#!/bin/bash
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENV_DIR="$(dirname "$SCRIPT_DIR")"
SP="$VENV_DIR/lib/python3.12/site-packages"
ROCM_LIB="$SP/_rocm_sdk_core/lib"
if [ -d "$ROCM_LIB" ]; then
export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"
fi
export PYTHONPATH="$SP/_rocm_sdk_core/share/amd_smi:${PYTHONPATH:-}"
export FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE
exec "$SCRIPT_DIR/python3" -m vllm.entrypoints.openai.api_server "$@"
LAUNCHER_EOF
# Remove YAML indentation from heredoc
sed -i 's/^ //' /opt/vllm/bin/vllm-server
chmod +x /opt/vllm/bin/vllm-server
echo "Launcher script:"
cat /opt/vllm/bin/vllm-server
- name: Strip unnecessary files to reduce size
run: |
cd /opt/vllm
echo "=== Size before cleanup ==="
du -sh .
SP="lib/python3.12/site-packages"
# Remove pip/setuptools/wheel
rm -rf $SP/pip* $SP/setuptools* $SP/wheel* $SP/pkg_resources* 2>/dev/null || true
rm -f $SP/distutils-precedence.pth 2>/dev/null || true
rm -rf $SP/_distutils_hack 2>/dev/null || true
# Remove __pycache__ and .pyc
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -name "*.pyc" -delete 2>/dev/null || true
# Remove test/benchmark dirs (but NOT torch.testing — it's imported at runtime)
rm -rf $SP/torch/test 2>/dev/null || true
rm -rf $SP/torch/benchmarks 2>/dev/null || true
# Remove .dist-info except vllm
find $SP -maxdepth 1 -type d -name "*.dist-info" ! -name "vllm*" -exec rm -rf {} + 2>/dev/null || true
# Remove Python stdlib we don't need
rm -rf lib/python3.12/test lib/python3.12/tkinter lib/python3.12/idlelib 2>/dev/null || true
rm -rf lib/python3.12/turtledemo lib/python3.12/ensurepip 2>/dev/null || true
rm -rf include/ 2>/dev/null || true
# NOTE: Do NOT strip .so files — AMD ROCm wheels use special ELF
# alignment that strip corrupts, and numpy/scipy also break.
echo "=== Size after cleanup ==="
du -sh .
echo ""
echo "Top consumers:"
du -sh $SP/torch/ 2>/dev/null || true
du -sh $SP/vllm/ 2>/dev/null || true
du -sh $SP/_rocm_sdk_core/ 2>/dev/null || true
- name: Verify bundled environment works
run: |
SP="/opt/vllm/lib/python3.12/site-packages"
ROCM_LIB="$SP/_rocm_sdk_core/lib"
export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"
/opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__} OK')"
/opt/vllm/bin/python3 -c "import torch; print(f'PyTorch {torch.__version__} OK')"
bash -n /opt/vllm/bin/vllm-server
echo "All sanity checks passed"
- name: Report final size
run: |
echo "=== Final artifact ==="
du -sh /opt/vllm/
echo ""
du -sh /opt/vllm/*/ 2>/dev/null
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: vllm-ubuntu-rocm-${{ matrix.gfx_target }}-x64
path: /opt/vllm/
retention-days: 30
compression-level: 6
- name: Set job outputs
id: set-outputs
run: |
SP="/opt/vllm/lib/python3.12/site-packages"
ROCM_LIB="$SP/_rocm_sdk_core/lib"
export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"
vllm_ver=$(/opt/vllm/bin/python3 -c "import vllm; print(vllm.__version__)")
torch_ver=$(/opt/vllm/bin/python3 -c "import torch; print(torch.__version__)")
echo "vllm_version=$vllm_ver" >> $GITHUB_OUTPUT
echo "torch_version=$torch_ver" >> $GITHUB_OUTPUT
- name: Clean up
if: always()
run: |
[ -d "/opt/vllm" ] && sudo rm -rf /opt/vllm
create-release:
needs: [prepare-matrix, build-ubuntu]
runs-on: ubuntu-22.04
permissions:
contents: write
pull-requests: write
if: |
always() &&
needs.build-ubuntu.result == 'success' &&
github.event_name != 'pull_request' &&
(github.event_name == 'workflow_dispatch' &&
(github.event.inputs.create_release == 'true' || github.event.inputs.create_release == null) ||
github.event_name == 'schedule')
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all build artifacts
uses: actions/download-artifact@v4
with:
path: ./all-artifacts
- name: Generate release tag
id: generate-tag
env:
GH_TOKEN: ${{ github.token }}
run: |
existing_tags=$(gh release list --limit 1000 --json tagName --jq '.[].tagName' | grep -E '^b[0-9]{4}$' | sort -V || echo "")
if [ -z "$existing_tags" ]; then
next_number=1000
else
highest_tag=$(echo "$existing_tags" | tail -n 1)
highest_number=$(echo "$highest_tag" | sed 's/^b//')
next_number=$((highest_number + 1))
fi
TAG=$(printf "b%04d" $next_number)
echo "tag=${TAG}" >> $GITHUB_OUTPUT
echo "Generated release tag: ${TAG}"
- name: Create archives (split if >1.9 GB for GitHub release limit)
run: |
targets="${{ env.GFX_TARGETS }}"
TAG="${{ steps.generate-tag.outputs.tag }}"
MAX_SIZE=1900 # MB — GitHub limit is 2 GB per asset
IFS=',' read -ra TARGET_ARRAY <<< "$targets"
for target in "${TARGET_ARRAY[@]}"; do
target=$(echo "$target" | xargs)
artifact_dir="./all-artifacts/vllm-ubuntu-rocm-${target}-x64"
base="vllm-${TAG}-ubuntu-rocm-${target}-x64"
if [ -d "$artifact_dir" ]; then
echo "Creating: ${base}.tar.gz"
tar -czf "${base}.tar.gz" -C "$artifact_dir" .
size_mb=$(du -m "${base}.tar.gz" | cut -f1)
echo "Archive size: ${size_mb} MB"
if [ "$size_mb" -gt "$MAX_SIZE" ]; then
echo "Splitting into ${MAX_SIZE}MB parts..."
split -b ${MAX_SIZE}M -d --additional-suffix=.tar.gz \
"${base}.tar.gz" "${base}.part"
rm "${base}.tar.gz"
echo "Parts created:"
ls -la ${base}.part*
fi
else
echo "Warning: $artifact_dir not found"
fi
done
echo "=== Release assets ==="
ls -la *.tar.gz 2>/dev/null || echo "No archives"
- name: Create Release
env:
GH_TOKEN: ${{ github.token }}
run: |
TAG="${{ steps.generate-tag.outputs.tag }}"
VLLM_VERSION="${{ needs.build-ubuntu.outputs.vllm_version }}"
TORCH_VERSION="${{ needs.build-ubuntu.outputs.torch_version }}"
targets="${{ env.GFX_TARGETS }}"
# Collect all .tar.gz files (may be split parts)
upload_files=$(ls -1 vllm-*.tar.gz 2>/dev/null | tr '\n' ' ')
echo "Files to upload: $upload_files"
gh release create "$TAG" \
--title "$TAG" \
--notes "**Build Number**: $TAG
**GPU Target(s)**: $targets
**vLLM Version**: $VLLM_VERSION
**PyTorch Version**: $TORCH_VERSION
**Build Date**: $(date -u '+%Y-%m-%d %H:%M:%S UTC')
Portable vLLM builds using AMD's official ROCm wheels. Includes bundled Python, PyTorch ROCm, and ROCm runtime. No separate installation required." \
$upload_files