Build vLLM + ROCm #12

Workflow file for this run

.github/workflows/build-vllm-rocm.yml at a73dd95

	name: Build vLLM + ROCm
	on:
	workflow_dispatch:
	inputs:
	gfx_target:
	description: 'AMD GPU targets (comma-separated)'
	required: false
	default: 'gfx1151,gfx1150,gfx120X'
	create_release:
	description: 'Create a GitHub release after successful build'
	required: false
	default: true
	type: boolean

	pull_request:
	types: [opened, synchronize, reopened]

	schedule:
	- cron: '0 15 * * 0'

	env:
	GFX_TARGETS: ${{ github.event.inputs.gfx_target \|\| 'gfx1151,gfx1150,gfx120X' }}

	jobs:
	prepare-matrix:
	runs-on: ubuntu-22.04
	outputs:
	ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }}
	steps:
	- name: Set matrix
	id: set-matrix
	run: \|
	targets="${{ env.GFX_TARGETS }}"
	matrix_targets=$(echo "$targets" \
	\| tr ',' '\n' \
	\| sed 's/^ //;s/ $//' \
	\| jq -R . \
	\| jq -s '{gfx_target: .}' \
	\| jq -c)
	echo "ubuntu_matrix=$matrix_targets" >> $GITHUB_OUTPUT
	echo "Generated matrix: $matrix_targets"

	build-ubuntu:
	runs-on: ubuntu-22.04
	needs: prepare-matrix
	strategy:
	matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}}
	fail-fast: false
	outputs:
	vllm_version: ${{ steps.set-outputs.outputs.vllm_version }}
	torch_version: ${{ steps.set-outputs.outputs.torch_version }}

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Install Python 3.12
	run: \|
	sudo add-apt-repository ppa:deadsnakes/ppa -y
	sudo apt update
	sudo apt install -y python3.12 python3.12-venv python3.12-dev
	python3.12 --version

	- name: Map GPU target to AMD wheel URLs
	id: wheel-urls
	run: \|
	target="${{ matrix.gfx_target }}"

	# Map targets to AMD wheel URL suffixes
	# See: https://rocm.docs.amd.com/en/latest/rocm-for-ai/vllm.html
	case "$target" in
	gfx1151) suffix="gfx1151" ;;
	gfx1150) suffix="gfx1150" ;;
	gfx120X) suffix="gfx120X-all" ;;
	*)
	echo "ERROR: No AMD pre-built wheels for target: $target"
	exit 1
	;;
	esac

	echo "torch_index=https://repo.amd.com/rocm/whl/${suffix}/" >> $GITHUB_OUTPUT
	echo "vllm_index=https://rocm.frameworks.amd.com/whl/${suffix}/" >> $GITHUB_OUTPUT
	echo "Using PyTorch index: https://repo.amd.com/rocm/whl/${suffix}/"
	echo "Using vLLM index: https://rocm.frameworks.amd.com/whl/${suffix}/"

	- name: Create Python virtual environment
	run: \|
	python3.12 -m venv --copies /opt/vllm
	/opt/vllm/bin/python3 -m pip install --upgrade pip
	echo "Python: $(/opt/vllm/bin/python3 --version)"

	- name: Install PyTorch ROCm from AMD
	run: \|
	echo "Installing PyTorch from AMD ROCm wheel index..."
	/opt/vllm/bin/pip install \
	--index-url "${{ steps.wheel-urls.outputs.torch_index }}" \
	torch torchvision
	/opt/vllm/bin/python3 -c "
	import torch
	print(f'PyTorch {torch.__version__}')
	print(f'ROCm built-in: {torch.version.hip is not None}')
	"

	- name: Install vLLM ROCm from AMD
	run: \|
	echo "Installing vLLM from AMD ROCm wheel index..."
	/opt/vllm/bin/pip install \
	--extra-index-url "${{ steps.wheel-urls.outputs.vllm_index }}" \
	vllm
	/opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__}')"

	- name: Create launcher script
	run: \|
	cat > /opt/vllm/bin/vllm-server << 'LAUNCHER_EOF'
	#!/bin/bash
	SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	VENV_DIR="$(dirname "$SCRIPT_DIR")"
	SP="$VENV_DIR/lib/python3.12/site-packages"
	ROCM_LIB="$SP/_rocm_sdk_core/lib"
	if [ -d "$ROCM_LIB" ]; then
	export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"
	fi
	export PYTHONPATH="$SP/_rocm_sdk_core/share/amd_smi:${PYTHONPATH:-}"
	export FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE
	exec "$SCRIPT_DIR/python3" -m vllm.entrypoints.openai.api_server "$@"
	LAUNCHER_EOF

	# Remove YAML indentation from heredoc
	sed -i 's/^ //' /opt/vllm/bin/vllm-server
	chmod +x /opt/vllm/bin/vllm-server
	echo "Launcher script:"
	cat /opt/vllm/bin/vllm-server

	- name: Strip unnecessary files to reduce size
	run: \|
	cd /opt/vllm
	echo "=== Size before cleanup ==="
	du -sh .

	SP="lib/python3.12/site-packages"

	# Remove pip/setuptools/wheel
	rm -rf $SP/pip* $SP/setuptools* $SP/wheel* $SP/pkg_resources* 2>/dev/null \|\| true
	rm -f $SP/distutils-precedence.pth 2>/dev/null \|\| true
	rm -rf $SP/_distutils_hack 2>/dev/null \|\| true

	# Remove __pycache__ and .pyc
	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null \|\| true
	find . -name "*.pyc" -delete 2>/dev/null \|\| true

	# Remove test/benchmark dirs (but NOT torch.testing — it's imported at runtime)
	rm -rf $SP/torch/test 2>/dev/null \|\| true
	rm -rf $SP/torch/benchmarks 2>/dev/null \|\| true

	# Remove .dist-info except vllm
	find $SP -maxdepth 1 -type d -name ".dist-info" ! -name "vllm" -exec rm -rf {} + 2>/dev/null \|\| true

	# Remove Python stdlib we don't need
	rm -rf lib/python3.12/test lib/python3.12/tkinter lib/python3.12/idlelib 2>/dev/null \|\| true
	rm -rf lib/python3.12/turtledemo lib/python3.12/ensurepip 2>/dev/null \|\| true
	rm -rf include/ 2>/dev/null \|\| true

	# NOTE: Do NOT strip .so files — AMD ROCm wheels use special ELF
	# alignment that strip corrupts, and numpy/scipy also break.

	echo "=== Size after cleanup ==="
	du -sh .
	echo ""
	echo "Top consumers:"
	du -sh $SP/torch/ 2>/dev/null \|\| true
	du -sh $SP/vllm/ 2>/dev/null \|\| true
	du -sh $SP/_rocm_sdk_core/ 2>/dev/null \|\| true

	- name: Verify bundled environment works
	run: \|
	SP="/opt/vllm/lib/python3.12/site-packages"
	ROCM_LIB="$SP/_rocm_sdk_core/lib"
	export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"

	/opt/vllm/bin/python3 -c "import vllm; print(f'vLLM {vllm.__version__} OK')"
	/opt/vllm/bin/python3 -c "import torch; print(f'PyTorch {torch.__version__} OK')"
	bash -n /opt/vllm/bin/vllm-server
	echo "All sanity checks passed"

	- name: Report final size
	run: \|
	echo "=== Final artifact ==="
	du -sh /opt/vllm/
	echo ""
	du -sh /opt/vllm/*/ 2>/dev/null

	- name: Upload build artifacts
	uses: actions/upload-artifact@v4
	with:
	name: vllm-ubuntu-rocm-${{ matrix.gfx_target }}-x64
	path: /opt/vllm/
	retention-days: 30
	compression-level: 6

	- name: Set job outputs
	id: set-outputs
	run: \|
	SP="/opt/vllm/lib/python3.12/site-packages"
	ROCM_LIB="$SP/_rocm_sdk_core/lib"
	export LD_LIBRARY_PATH="$ROCM_LIB:${LD_LIBRARY_PATH:-}"

	vllm_ver=$(/opt/vllm/bin/python3 -c "import vllm; print(vllm.__version__)")
	torch_ver=$(/opt/vllm/bin/python3 -c "import torch; print(torch.__version__)")
	echo "vllm_version=$vllm_ver" >> $GITHUB_OUTPUT
	echo "torch_version=$torch_ver" >> $GITHUB_OUTPUT

	- name: Clean up
	if: always()
	run: \|
	[ -d "/opt/vllm" ] && sudo rm -rf /opt/vllm

	create-release:
	needs: [prepare-matrix, build-ubuntu]
	runs-on: ubuntu-22.04
	permissions:
	contents: write
	pull-requests: write
	if: \|
	always() &&
	needs.build-ubuntu.result == 'success' &&
	github.event_name != 'pull_request' &&
	(github.event_name == 'workflow_dispatch' &&
	(github.event.inputs.create_release == 'true' \|\| github.event.inputs.create_release == null) \|\|
	github.event_name == 'schedule')

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download all build artifacts
	uses: actions/download-artifact@v4
	with:
	path: ./all-artifacts

	- name: Generate release tag
	id: generate-tag
	env:
	GH_TOKEN: ${{ github.token }}
	run: \|
	existing_tags=$(gh release list --limit 1000 --json tagName --jq '.[].tagName' \| grep -E '^b[0-9]{4}$' \| sort -V \|\| echo "")

	if [ -z "$existing_tags" ]; then
	next_number=1000
	else
	highest_tag=$(echo "$existing_tags" \| tail -n 1)
	highest_number=$(echo "$highest_tag" \| sed 's/^b//')
	next_number=$((highest_number + 1))
	fi

	TAG=$(printf "b%04d" $next_number)
	echo "tag=${TAG}" >> $GITHUB_OUTPUT
	echo "Generated release tag: ${TAG}"

	- name: Create archives (split if >1.9 GB for GitHub release limit)
	run: \|
	targets="${{ env.GFX_TARGETS }}"
	TAG="${{ steps.generate-tag.outputs.tag }}"
	MAX_SIZE=1900 # MB — GitHub limit is 2 GB per asset

	IFS=',' read -ra TARGET_ARRAY <<< "$targets"
	for target in "${TARGET_ARRAY[@]}"; do
	target=$(echo "$target" \| xargs)
	artifact_dir="./all-artifacts/vllm-ubuntu-rocm-${target}-x64"
	base="vllm-${TAG}-ubuntu-rocm-${target}-x64"

	if [ -d "$artifact_dir" ]; then
	echo "Creating: ${base}.tar.gz"
	tar -czf "${base}.tar.gz" -C "$artifact_dir" .
	size_mb=$(du -m "${base}.tar.gz" \| cut -f1)
	echo "Archive size: ${size_mb} MB"

	if [ "$size_mb" -gt "$MAX_SIZE" ]; then
	echo "Splitting into ${MAX_SIZE}MB parts..."
	split -b ${MAX_SIZE}M -d --additional-suffix=.tar.gz \
	"${base}.tar.gz" "${base}.part"
	rm "${base}.tar.gz"
	echo "Parts created:"
	ls -la ${base}.part*
	fi
	else
	echo "Warning: $artifact_dir not found"
	fi
	done
	echo "=== Release assets ==="
	ls -la *.tar.gz 2>/dev/null \|\| echo "No archives"

	- name: Create Release
	env:
	GH_TOKEN: ${{ github.token }}
	run: \|
	TAG="${{ steps.generate-tag.outputs.tag }}"
	VLLM_VERSION="${{ needs.build-ubuntu.outputs.vllm_version }}"
	TORCH_VERSION="${{ needs.build-ubuntu.outputs.torch_version }}"
	targets="${{ env.GFX_TARGETS }}"

	# Collect all .tar.gz files (may be split parts)
	upload_files=$(ls -1 vllm-*.tar.gz 2>/dev/null \| tr '\n' ' ')
	echo "Files to upload: $upload_files"

	gh release create "$TAG" \
	--title "$TAG" \
	--notes "Build Number: $TAG
	GPU Target(s): $targets
	vLLM Version: $VLLM_VERSION
	PyTorch Version: $TORCH_VERSION
	Build Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')

	Portable vLLM builds using AMD's official ROCm wheels. Includes bundled Python, PyTorch ROCm, and ROCm runtime. No separate installation required." \
	$upload_files

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Build vLLM + ROCm #12

Workflow file

Build vLLM + ROCm #12

Uh oh!

Workflow file for this run