Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
322 changes: 322 additions & 0 deletions .github/workflows/pr-vllm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
name: PR - vLLM

on:
pull_request:
branches:
- main
paths:
- "docker/**"

permissions:
contents: read

concurrency:
group: pr-vllm-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
vllm-ec2: ${{ steps.changes.outputs.vllm-ec2 }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm-ec2:
- "docker/vllm/Dockerfile"

build-image:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Resolve image URI for build
run: |
IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.11.0-gpu-py312-cu128-ubuntu22.04-ec2-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Build image
run: |
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=$IMAGE_URI \
--tag $IMAGE_URI \
--target vllm-ec2 \
-f docker/vllm/Dockerfile .

- name: Docker Push and save image URI artifact
run: |
docker push $IMAGE_URI
docker rmi $IMAGE_URI
echo $IMAGE_URI > image_uri.txt

- name: Upload image URI artifact
uses: actions/upload-artifact@v4
with:
name: vllm-ec2-image-uri
path: image_uri.txt

regression-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.0
path: vllm_source

- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Regression Test # 7min
cd /workdir/tests
uv pip install --system modelscope
pytest -v -s test_regression.py
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df

cuda-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.0
path: vllm_source

- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Platform Tests (CUDA) # 4min
cd /workdir/tests
pytest -v -s cuda/test_cuda_context.py
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df

example-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.0
path: vllm_source

- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Examples Test # 30min
cd /workdir/examples
pip install tensorizer # for tensorizer test
python3 offline_inference/basic/generate.py --model facebook/opt-125m
# python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
python3 offline_inference/basic/chat.py
python3 offline_inference/prefix_caching.py
python3 offline_inference/llm_engine_example.py
python3 offline_inference/audio_language.py --seed 0
python3 offline_inference/vision_language.py --seed 0
python3 offline_inference/vision_language_pooling.py --seed 0
python3 offline_inference/vision_language_multi_image.py --seed 0
VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
python3 offline_inference/basic/classify.py
python3 offline_inference/basic/embed.py
python3 offline_inference/basic/score.py
python3 offline_inference/simple_profiling.py
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df
68 changes: 68 additions & 0 deletions docker/vllm/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
FROM docker.io/vllm/vllm-openai:v0.11.0 as base
ARG PYTHON="python3"
LABEL maintainer="Amazon AI"
ARG EFA_VERSION="1.43.3"
LABEL dlc_major_version="1"
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
DLC_CONTAINER_TYPE=base \
# Python won't try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONIOENCODING=UTF-8 \
LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"

WORKDIR /

COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
COPY ./scripts/setup_oss_compliance.sh setup_oss_compliance.sh

RUN chmod +x /usr/local/bin/deep_learning_container.py \
&& chmod +x /usr/local/bin/bash_telemetry.sh \
&& echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bash.bashrc \
&& bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh \
# create symlink for python
&& ln -s /usr/bin/python3 /usr/bin/python \
# clean up
&& rm -rf ${HOME_DIR}/oss_compliance* \
&& rm -rf /tmp/tmp* \
&& rm -rf /tmp/uv* \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /root/.cache | true

COPY ./scripts/install_efa.sh install_efa.sh
RUN bash install_efa.sh ${EFA_VERSION} \
&& rm install_efa.sh \
&& mkdir -p /tmp/nvjpeg \
&& cd /tmp/nvjpeg \
&& wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
&& tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
&& rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
&& rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
&& cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \
&& cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \
&& rm -rf /tmp/nvjpeg \
# remove cuobjdump and nvdisasm
&& rm -rf /usr/local/cuda/bin/cuobjdump* \
&& rm -rf /usr/local/cuda/bin/nvdisasm*

# ====================== ec2 =========================================
FROM base AS vllm-ec2

ARG CACHE_REFRESH=0
RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \
&& apt-get update \
&& apt-get upgrade -y \
&& apt-get clean

COPY ./scripts/dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh

ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"]

# TODO: add later
# ====================== sagemaker =========================================
Loading