Build vLLM + ROCm #12
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build vLLM + ROCm | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| gfx_target: | |
| description: 'AMD GPU targets (comma-separated)' | |
| required: false | |
| default: 'gfx1151,gfx1150,gfx120X' | |
| create_release: | |
| description: 'Create a GitHub release after successful build' | |
| required: false | |
| default: true | |
| type: boolean | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| schedule: | |
| - cron: '0 15 * * 0' | |
| env: | |
| GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X' }} | |
| jobs: | |
| prepare-matrix: | |
| runs-on: ubuntu-22.04 | |
| outputs: | |
| ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} | |
| steps: | |
| - name: Set matrix | |
| id: set-matrix | |
| run: | | |
| targets="${{ env.GFX_TARGETS }}" | |
| matrix_targets=$(echo "$targets" \ | |
| | tr ',' '\n' \ | |
| | sed 's/^ *//;s/ *$//' \ | |
| | jq -R . \ | |
| | jq -s '{gfx_target: .}' \ | |
| | jq -c) | |
| echo "ubuntu_matrix=$matrix_targets" >> $GITHUB_OUTPUT | |
| echo "Generated matrix: $matrix_targets" | |
| build-ubuntu: | |
| runs-on: ubuntu-22.04 | |
| needs: prepare-matrix | |
| strategy: | |
| matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} | |
| fail-fast: false | |
| outputs: | |
| vllm_version: ${{ steps.set-outputs.outputs.vllm_version }} | |
| torch_version: ${{ steps.set-outputs.outputs.torch_version }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Install Python 3.12 | |
| run: | | |
| sudo add-apt-repository ppa:deadsnakes/ppa -y | |
| sudo apt update | |
| sudo apt install -y python3.12 python3.12-venv python3.12-dev | |
| python3.12 --version | |
| - name: Map GPU target to AMD wheel URLs | |
| id: wheel-urls | |
| run: | | |
| target="${{ matrix.gfx_target }}" | |
| # Map targets to AMD wheel URL suffixes | |
| # See: https://rocm.docs.amd.com/en/latest/rocm-for-ai/vllm.html | |
| case "$target" in | |
| gfx1151) suffix="gfx1151" ;; | |
| gfx1150) suffix="gfx1150" ;; | |
| gfx120X) suffix="gfx120X-all" ;; | |
| *) | |
| echo "ERROR: No AMD pre-built wheels for target: $target" | |
| exit 1 | |
| ;; | |
| esac | |
| echo "torch_index=https://repo.amd.com/rocm/whl/${suffix}/" >> $GITHUB_OUTPUT | |
| echo "vllm_index=https://rocm.frameworks.amd.com/whl/${suffix}/" >> $GITHUB_OUTPUT | |
| echo "Using PyTorch index: https://repo.amd.com/rocm/whl/${suffix}/" | |
| echo "Using vLLM index: https://rocm.frameworks.amd.com/whl/${suffix}/" | |
| - name: Create Python virtual environment | |
| run: | | |
| python3.12 -m venv --copies /opt/vllm | |
| /opt/vllm/bin/python3 -m pip install --upgrade pip | |
| echo "Python: $(/opt/vllm/bin/python3 --version)" | |
| - name: Install PyTorch ROCm from AMD | |
| run: | | |
| echo "Installing PyTorch from AMD ROCm wheel index..." | |
| /opt/vllm/bin/pip install \ | |
| --index-url "${{ steps.wheel-urls.outputs.torch_index }}" \ | |
| torch torchvision | |
| /opt/vllm/bin/python3 -c " | |
| import torch | |
| print(f'PyTorch {torch.__version__}') | |
| print(f'ROCm built-in: {torch.version.hip is not None}') | |
| " | |
| - name: Install vLLM ROCm from AMD | |
| run: | | |
| echo "Installing vLLM from AMD ROCm wheel index..." | |
| /opt/vllm/bin/pip install \ | |
| --extra-index-url "${{ steps.wheel-urls.outputs.vllm_index }}" \ | |
| vllm | |
| /opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__}')" | |
| - name: Create launcher script | |
| run: | | |
| cat > /opt/vllm/bin/vllm-server << 'LAUNCHER_EOF' | |
| #!/bin/bash | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| VENV_DIR="$(dirname "$SCRIPT_DIR")" | |
| SP="$VENV_DIR/lib/python3.12/site-packages" | |
| ROCM_LIB="$SP/_rocm_sdk_core/lib" | |
| if [ -d "$ROCM_LIB" ]; then | |
| export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}" | |
| fi | |
| export PYTHONPATH="$SP/_rocm_sdk_core/share/amd_smi:${PYTHONPATH:-}" | |
| export FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE | |
| exec "$SCRIPT_DIR/python3" -m vllm.entrypoints.openai.api_server "$@" | |
| LAUNCHER_EOF | |
| # Remove YAML indentation from heredoc | |
| sed -i 's/^ //' /opt/vllm/bin/vllm-server | |
| chmod +x /opt/vllm/bin/vllm-server | |
| echo "Launcher script:" | |
| cat /opt/vllm/bin/vllm-server | |
| - name: Strip unnecessary files to reduce size | |
| run: | | |
| cd /opt/vllm | |
| echo "=== Size before cleanup ===" | |
| du -sh . | |
| SP="lib/python3.12/site-packages" | |
| # Remove pip/setuptools/wheel | |
| rm -rf $SP/pip* $SP/setuptools* $SP/wheel* $SP/pkg_resources* 2>/dev/null || true | |
| rm -f $SP/distutils-precedence.pth 2>/dev/null || true | |
| rm -rf $SP/_distutils_hack 2>/dev/null || true | |
| # Remove __pycache__ and .pyc | |
| find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true | |
| find . -name "*.pyc" -delete 2>/dev/null || true | |
| # Remove test/benchmark dirs (but NOT torch.testing — it's imported at runtime) | |
| rm -rf $SP/torch/test 2>/dev/null || true | |
| rm -rf $SP/torch/benchmarks 2>/dev/null || true | |
| # Remove .dist-info except vllm | |
| find $SP -maxdepth 1 -type d -name "*.dist-info" ! -name "vllm*" -exec rm -rf {} + 2>/dev/null || true | |
| # Remove Python stdlib we don't need | |
| rm -rf lib/python3.12/test lib/python3.12/tkinter lib/python3.12/idlelib 2>/dev/null || true | |
| rm -rf lib/python3.12/turtledemo lib/python3.12/ensurepip 2>/dev/null || true | |
| rm -rf include/ 2>/dev/null || true | |
| # NOTE: Do NOT strip .so files — AMD ROCm wheels use special ELF | |
| # alignment that strip corrupts, and numpy/scipy also break. | |
| echo "=== Size after cleanup ===" | |
| du -sh . | |
| echo "" | |
| echo "Top consumers:" | |
| du -sh $SP/torch/ 2>/dev/null || true | |
| du -sh $SP/vllm/ 2>/dev/null || true | |
| du -sh $SP/_rocm_sdk_core/ 2>/dev/null || true | |
| - name: Verify bundled environment works | |
| run: | | |
| SP="/opt/vllm/lib/python3.12/site-packages" | |
| ROCM_LIB="$SP/_rocm_sdk_core/lib" | |
| export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}" | |
| /opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__} OK')" | |
| /opt/vllm/bin/python3 -c "import torch; print(f'PyTorch {torch.__version__} OK')" | |
| bash -n /opt/vllm/bin/vllm-server | |
| echo "All sanity checks passed" | |
| - name: Report final size | |
| run: | | |
| echo "=== Final artifact ===" | |
| du -sh /opt/vllm/ | |
| echo "" | |
| du -sh /opt/vllm/*/ 2>/dev/null | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: vllm-ubuntu-rocm-${{ matrix.gfx_target }}-x64 | |
| path: /opt/vllm/ | |
| retention-days: 30 | |
| compression-level: 6 | |
| - name: Set job outputs | |
| id: set-outputs | |
| run: | | |
| SP="/opt/vllm/lib/python3.12/site-packages" | |
| ROCM_LIB="$SP/_rocm_sdk_core/lib" | |
| export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}" | |
| vllm_ver=$(/opt/vllm/bin/python3 -c "import vllm; print(vllm.__version__)") | |
| torch_ver=$(/opt/vllm/bin/python3 -c "import torch; print(torch.__version__)") | |
| echo "vllm_version=$vllm_ver" >> $GITHUB_OUTPUT | |
| echo "torch_version=$torch_ver" >> $GITHUB_OUTPUT | |
| - name: Clean up | |
| if: always() | |
| run: | | |
| [ -d "/opt/vllm" ] && sudo rm -rf /opt/vllm | |
| create-release: | |
| needs: [prepare-matrix, build-ubuntu] | |
| runs-on: ubuntu-22.04 | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| if: | | |
| always() && | |
| needs.build-ubuntu.result == 'success' && | |
| github.event_name != 'pull_request' && | |
| (github.event_name == 'workflow_dispatch' && | |
| (github.event.inputs.create_release == 'true' || github.event.inputs.create_release == null) || | |
| github.event_name == 'schedule') | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Download all build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: ./all-artifacts | |
| - name: Generate release tag | |
| id: generate-tag | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| existing_tags=$(gh release list --limit 1000 --json tagName --jq '.[].tagName' | grep -E '^b[0-9]{4}$' | sort -V || echo "") | |
| if [ -z "$existing_tags" ]; then | |
| next_number=1000 | |
| else | |
| highest_tag=$(echo "$existing_tags" | tail -n 1) | |
| highest_number=$(echo "$highest_tag" | sed 's/^b//') | |
| next_number=$((highest_number + 1)) | |
| fi | |
| TAG=$(printf "b%04d" $next_number) | |
| echo "tag=${TAG}" >> $GITHUB_OUTPUT | |
| echo "Generated release tag: ${TAG}" | |
| - name: Create archives (split if >1.9 GB for GitHub release limit) | |
| run: | | |
| targets="${{ env.GFX_TARGETS }}" | |
| TAG="${{ steps.generate-tag.outputs.tag }}" | |
| MAX_SIZE=1900 # MB — GitHub limit is 2 GB per asset | |
| IFS=',' read -ra TARGET_ARRAY <<< "$targets" | |
| for target in "${TARGET_ARRAY[@]}"; do | |
| target=$(echo "$target" | xargs) | |
| artifact_dir="./all-artifacts/vllm-ubuntu-rocm-${target}-x64" | |
| base="vllm-${TAG}-ubuntu-rocm-${target}-x64" | |
| if [ -d "$artifact_dir" ]; then | |
| echo "Creating: ${base}.tar.gz" | |
| tar -czf "${base}.tar.gz" -C "$artifact_dir" . | |
| size_mb=$(du -m "${base}.tar.gz" | cut -f1) | |
| echo "Archive size: ${size_mb} MB" | |
| if [ "$size_mb" -gt "$MAX_SIZE" ]; then | |
| echo "Splitting into ${MAX_SIZE}MB parts..." | |
| split -b ${MAX_SIZE}M -d --additional-suffix=.tar.gz \ | |
| "${base}.tar.gz" "${base}.part" | |
| rm "${base}.tar.gz" | |
| echo "Parts created:" | |
| ls -la ${base}.part* | |
| fi | |
| else | |
| echo "Warning: $artifact_dir not found" | |
| fi | |
| done | |
| echo "=== Release assets ===" | |
| ls -la *.tar.gz 2>/dev/null || echo "No archives" | |
| - name: Create Release | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| TAG="${{ steps.generate-tag.outputs.tag }}" | |
| VLLM_VERSION="${{ needs.build-ubuntu.outputs.vllm_version }}" | |
| TORCH_VERSION="${{ needs.build-ubuntu.outputs.torch_version }}" | |
| targets="${{ env.GFX_TARGETS }}" | |
| # Collect all .tar.gz files (may be split parts) | |
| upload_files=$(ls -1 vllm-*.tar.gz 2>/dev/null | tr '\n' ' ') | |
| echo "Files to upload: $upload_files" | |
| gh release create "$TAG" \ | |
| --title "$TAG" \ | |
| --notes "**Build Number**: $TAG | |
| **GPU Target(s)**: $targets | |
| **vLLM Version**: $VLLM_VERSION | |
| **PyTorch Version**: $TORCH_VERSION | |
| **Build Date**: $(date -u '+%Y-%m-%d %H:%M:%S UTC') | |
| Portable vLLM builds using AMD's official ROCm wheels. Includes bundled Python, PyTorch ROCm, and ROCm runtime. No separate installation required." \ | |
| $upload_files |