diff --git a/.github/workflows/pr-vllm.yml b/.github/workflows/pr-vllm.yml new file mode 100644 index 000000000000..fd3b407a934e --- /dev/null +++ b/.github/workflows/pr-vllm.yml @@ -0,0 +1,322 @@ +name: PR - vLLM + +on: + pull_request: + branches: + - main + paths: + - "docker/**" + +permissions: + contents: read + +concurrency: + group: pr-vllm-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + check-changes: + runs-on: ubuntu-latest + outputs: + vllm-ec2: ${{ steps.changes.outputs.vllm-ec2 }} + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - uses: pre-commit/action@v3.0.1 + with: + extra_args: --all-files + - name: Detect file changes + id: changes + uses: dorny/paths-filter@v3 + with: + filters: | + vllm-ec2: + - "docker/vllm/Dockerfile" + + build-image: + needs: [check-changes] + if: needs.check-changes.outputs.vllm-ec2 == 'true' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-build-runner + steps: + - uses: actions/checkout@v5 + - run: .github/scripts/runner_setup.sh + - run: .github/scripts/buildkitd.sh + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Resolve image URI for build + run: | + IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.11.0-gpu-py312-cu128-ubuntu22.04-ec2-pr-${{ github.event.pull_request.number }} + echo "Image URI to build: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Build image + run: | + docker buildx build --progress plain \ + --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ + --cache-to=type=inline \ + --cache-from=type=registry,ref=$IMAGE_URI \ + --tag $IMAGE_URI \ + --target vllm-ec2 \ + -f docker/vllm/Dockerfile . + + - name: Docker Push and save image URI artifact + run: | + docker push $IMAGE_URI + docker rmi $IMAGE_URI + echo $IMAGE_URI > image_uri.txt + + - name: Upload image URI artifact + uses: actions/upload-artifact@v4 + with: + name: vllm-ec2-image-uri + path: image_uri.txt + + regression-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.11.0 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi + + # Regression Test # 7min + cd /workdir/tests + uv pip install --system modelscope + pytest -v -s test_regression.py + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + cuda-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.11.0 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi + + # Platform Tests (CUDA) # 4min + cd /workdir/tests + pytest -v -s cuda/test_cuda_context.py + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + example-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.11.0 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi + + # Examples Test # 30min + cd /workdir/examples + pip install tensorizer # for tensorizer test + python3 offline_inference/basic/generate.py --model facebook/opt-125m + # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 + python3 offline_inference/basic/chat.py + python3 offline_inference/prefix_caching.py + python3 offline_inference/llm_engine_example.py + python3 offline_inference/audio_language.py --seed 0 + python3 offline_inference/vision_language.py --seed 0 + python3 offline_inference/vision_language_pooling.py --seed 0 + python3 offline_inference/vision_language_multi_image.py --seed 0 + VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors + python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 + python3 offline_inference/basic/classify.py + python3 offline_inference/basic/embed.py + python3 offline_inference/basic/score.py + python3 offline_inference/simple_profiling.py + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile new file mode 100644 index 000000000000..566cb2accaa8 --- /dev/null +++ b/docker/vllm/Dockerfile @@ -0,0 +1,68 @@ +FROM docker.io/vllm/vllm-openai:v0.11.0 as base +ARG PYTHON="python3" +LABEL maintainer="Amazon AI" +ARG EFA_VERSION="1.43.3" +LABEL dlc_major_version="1" +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + # Python won't try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" + +WORKDIR / + +COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +COPY ./scripts/setup_oss_compliance.sh setup_oss_compliance.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py \ + && chmod +x /usr/local/bin/bash_telemetry.sh \ + && echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bash.bashrc \ + && bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh \ + # create symlink for python + && ln -s /usr/bin/python3 /usr/bin/python \ + # clean up + && rm -rf ${HOME_DIR}/oss_compliance* \ + && rm -rf /tmp/tmp* \ + && rm -rf /tmp/uv* \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /root/.cache | true + +COPY ./scripts/install_efa.sh install_efa.sh +RUN bash install_efa.sh ${EFA_VERSION} \ + && rm install_efa.sh \ + && mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ + && rm -rf /tmp/nvjpeg \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* + +# ====================== ec2 ========================================= +FROM base AS vllm-ec2 + +ARG CACHE_REFRESH=0 +RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ + && apt-get update \ + && apt-get upgrade -y \ + && apt-get clean + +COPY ./scripts/dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh +RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"] + +# TODO: add later +# ====================== sagemaker ========================================= \ No newline at end of file