aws · jinyan-li1 · Nov 14, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 13, 2025
diff --git a/.github/workflows/pr-vllm.yml b/.github/workflows/pr-vllm.yml
@@ -0,0 +1,322 @@
+name: PR - vLLM
+
+on:
+  pull_request:
+    branches: 
+      - main
+    paths:
+      - "docker/**"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: pr-vllm-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:    
+  check-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      vllm-ec2: ${{ steps.changes.outputs.vllm-ec2 }}
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+      - uses: pre-commit/action@v3.0.1
+        with:
+          extra_args: --all-files
+      - name: Detect file changes
+        id: changes
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            vllm-ec2:
+              - "docker/vllm/Dockerfile"
+
+  build-image:
+    needs: [check-changes]
+    if: needs.check-changes.outputs.vllm-ec2 == 'true'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-build-runner
+    steps:
+      - uses: actions/checkout@v5
+      - run: .github/scripts/runner_setup.sh
+      - run: .github/scripts/buildkitd.sh
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+
+      - name: Resolve image URI for build
+        run: |
+          IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.11.0-gpu-py312-cu128-ubuntu22.04-ec2-pr-${{ github.event.pull_request.number }}
+          echo "Image URI to build: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+
+      - name: Build image
+        run: |
+          docker buildx build --progress plain \
+            --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
+            --cache-to=type=inline \
+            --cache-from=type=registry,ref=$IMAGE_URI \
+            --tag $IMAGE_URI \
+            --target vllm-ec2 \
+            -f docker/vllm/Dockerfile .
+
+      - name: Docker Push and save image URI artifact
+        run: |
+          docker push $IMAGE_URI
+          docker rmi $IMAGE_URI
+          echo $IMAGE_URI > image_uri.txt
+
+      - name: Upload image URI artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: vllm-ec2-image-uri
+          path: image_uri.txt
+
+  regression-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.11.0
+          path: vllm_source
+
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
+
+          # Regression Test # 7min
+          cd /workdir/tests
+          uv pip install --system modelscope
+          pytest -v -s test_regression.py
+          '
+
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+
+  cuda-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.11.0
+          path: vllm_source
+
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
+
+          # Platform Tests (CUDA) # 4min
+          cd /workdir/tests
+          pytest -v -s cuda/test_cuda_context.py
+          '
+
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+
+  example-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.11.0
+          path: vllm_source
+
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
+
+          # Examples Test # 30min
+          cd /workdir/examples
+          pip install tensorizer # for tensorizer test
+          python3 offline_inference/basic/generate.py --model facebook/opt-125m
+          # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+          python3 offline_inference/basic/chat.py
+          python3 offline_inference/prefix_caching.py
+          python3 offline_inference/llm_engine_example.py
+          python3 offline_inference/audio_language.py --seed 0
+          python3 offline_inference/vision_language.py --seed 0
+          python3 offline_inference/vision_language_pooling.py --seed 0
+          python3 offline_inference/vision_language_multi_image.py --seed 0
+          VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+          python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+          python3 offline_inference/basic/classify.py
+          python3 offline_inference/basic/embed.py
+          python3 offline_inference/basic/score.py
+          python3 offline_inference/simple_profiling.py
+          '
+
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile
@@ -0,0 +1,68 @@
+FROM docker.io/vllm/vllm-openai:v0.11.0 as base
+ARG PYTHON="python3"
+LABEL maintainer="Amazon AI"
+ARG EFA_VERSION="1.43.3"
+LABEL dlc_major_version="1"
+ENV DEBIAN_FRONTEND=noninteractive \
+  LANG=C.UTF-8 \
+  LC_ALL=C.UTF-8 \
+  DLC_CONTAINER_TYPE=base \
+  # Python won't try to write .pyc or .pyo files on the import of source modules
+  # Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+  PYTHONDONTWRITEBYTECODE=1 \
+  PYTHONUNBUFFERED=1 \
+  PYTHONIOENCODING=UTF-8 \
+  LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
+  PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
+
+WORKDIR /
+
+COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+COPY ./scripts/setup_oss_compliance.sh setup_oss_compliance.sh
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py \
+  && chmod +x /usr/local/bin/bash_telemetry.sh \
+  && echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bash.bashrc \
+  && bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh \
+  # create symlink for python
+  && ln -s /usr/bin/python3 /usr/bin/python \
+  # clean up
+  && rm -rf ${HOME_DIR}/oss_compliance* \
+  && rm -rf /tmp/tmp* \
+  && rm -rf /tmp/uv* \
+  && rm -rf /var/lib/apt/lists/* \
+  && rm -rf /root/.cache | true
+
+COPY ./scripts/install_efa.sh install_efa.sh
+RUN bash install_efa.sh ${EFA_VERSION} \
+  && rm install_efa.sh \
+  && mkdir -p /tmp/nvjpeg \
+  && cd /tmp/nvjpeg \
+  && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+  && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+  && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
+  && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
+  && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \
+  && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \
+  && rm -rf /tmp/nvjpeg \
+  # remove cuobjdump and nvdisasm
+  && rm -rf /usr/local/cuda/bin/cuobjdump* \
+  && rm -rf /usr/local/cuda/bin/nvdisasm*
+
+# ====================== ec2 =========================================
+FROM base AS vllm-ec2
+
+ARG CACHE_REFRESH=0
+RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \
+  && apt-get update \
+  && apt-get upgrade -y \
+  && apt-get clean
+
+COPY ./scripts/dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"]
+
+# TODO: add later
+# ====================== sagemaker =========================================