vLLM Build Test Push #1

Workflow file for this run

.github/workflows/vllm-build-test-push.yaml at 8b0d9e3

	# vLLM Build, Test, and Push Workflow
	# Place this file in .github/workflows/ to use
	# Uses Dockerfile from: deep-learning-containers/vllm/x86_64/gpu/Dockerfile
	# - Dockerfile changes trigger build + test
	# - Other changes only run test against existing ECR image

	name: vLLM Build Test Push

	on:
	workflow_dispatch:
	inputs:
	model:
	description: 'Model for inference test'
	required: false
	default: 'facebook/opt-125m'
	force_build:
	description: 'Force build even without Dockerfile changes'
	required: false
	default: false
	type: boolean
	push:
	paths:
	- 'vllm/x86_64/gpu/Dockerfile'
	- 'vllm/build_artifacts/**'
	- 'src/deep_learning_container.py'
	- 'miscellaneous_scripts/bash_telemetry.sh'
	- 'scripts/install_efa.sh'
	pull_request:
	paths:
	- 'vllm/x86_64/gpu/Dockerfile'
	- 'vllm/build_artifacts/**'

	env:
	ECR_REGISTRY: 897880167187.dkr.ecr.us-west-2.amazonaws.com
	ECR_REPOSITORY: jkottu-dev
	IMAGE_TAG: vllm-ec2-latest
	AWS_REGION: us-west-2

	jobs:
	check-changes:
	runs-on: ubuntu-latest
	outputs:
	dockerfile_changed: ${{ steps.check.outputs.dockerfile_changed }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 2

	- name: Check for Dockerfile changes
	id: check
	run: \|
	if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
	if [ "${{ inputs.force_build }}" == "true" ]; then
	echo "dockerfile_changed=true" >> $GITHUB_OUTPUT
	else
	echo "dockerfile_changed=false" >> $GITHUB_OUTPUT
	fi
	else
	# Check if Dockerfile or build artifacts changed
	CHANGED=$(git diff --name-only HEAD~1 HEAD \| grep -E '^(vllm/x86_64/gpu/Dockerfile\|vllm/build_artifacts/\|src/deep_learning_container.py\|miscellaneous_scripts/bash_telemetry.sh\|scripts/install_efa.sh)' \|\| true)
	if [ -n "$CHANGED" ]; then
	echo "dockerfile_changed=true" >> $GITHUB_OUTPUT
	echo "Changed files: $CHANGED"
	else
	echo "dockerfile_changed=false" >> $GITHUB_OUTPUT
	fi
	fi

	build:
	needs: [check-changes]
	if: needs.check-changes.outputs.dockerfile_changed == 'true'
	runs-on: gpu-2gpu-runner
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: AWS credentials check
	run: aws sts get-caller-identity

	- name: Login to ECR
	run: \|
	aws ecr get-login-password --region ${{ env.AWS_REGION }} \| \
	docker login --username AWS --password-stdin ${{ env.ECR_REGISTRY }}

	- name: Prepare build context
	run: \|
	echo "=== Preparing build context ==="
	mkdir -p build_context

	# Copy supporting files required by Dockerfile
	cp src/deep_learning_container.py build_context/
	cp miscellaneous_scripts/bash_telemetry.sh build_context/
	cp scripts/install_efa.sh build_context/
	cp vllm/build_artifacts/dockerd_entrypoint.sh build_context/
	cp vllm/build_artifacts/sagemaker_entrypoint.sh build_context/ \|\| true
	cp vllm/x86_64/gpu/Dockerfile build_context/

	ls -la build_context/

	- name: Build vLLM image
	run: \|
	cd build_context
	docker build \
	--target vllm-ec2 \
	-t ${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }} \
	.

	- name: Push to ECR
	run: \|
	docker push ${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }}
	echo "✅ Pushed ${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }}"

	test:
	needs: [build]
	if: always() && (needs.build.result == 'success' \|\| inputs.skip_build)
	runs-on: gpu-spot-runner
	steps:
	- name: Login to ECR
	run: \|
	aws ecr get-login-password --region ${{ env.AWS_REGION }} \| \
	docker login --username AWS --password-stdin ${{ env.ECR_REGISTRY }}

	- name: Pull image
	run: docker pull ${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }}

	- name: GPU check
	run: docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 nvidia-smi

	- name: Start vLLM server
	run: \|
	docker run -d --name vllm-server --gpus all -p 8000:8000 \
	${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }} \
	--model ${{ inputs.model \|\| 'facebook/opt-125m' }} \
	--host 0.0.0.0 --port 8000

	echo "Waiting for server..."
	for i in {1..60}; do
	curl -s http://localhost:8000/health && break \|\| sleep 5
	done

	- name: Run inference
	run: \|
	curl -X POST http://localhost:8000/v1/completions \
	-H "Content-Type: application/json" \
	-d '{"model": "${{ inputs.model \|\| 'facebook/opt-125m' }}", "prompt": "Hello, my name is", "max_tokens": 50}' \| jq .

	- name: Cleanup
	if: always()
	run: \|
	docker stop vllm-server \|\| true
	docker rm vllm-server \|\| true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

vLLM Build Test Push #1

Workflow file

vLLM Build Test Push #1

Uh oh!

Workflow file for this run