diff --git a/.github/workflows/contributor.yml b/.github/workflows/contributor.yml new file mode 100644 index 00000000..606e1b20 --- /dev/null +++ b/.github/workflows/contributor.yml @@ -0,0 +1,21 @@ +name: Welcome message +on: + pull_request_target: + types: [opened] + +jobs: + pr_reminder: + runs-on: ubuntu-latest + steps: + - name: Add first comment + uses: actions/github-script@v6 + with: + script: | + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: 'šŸ‘‹ Hi! \nThank you for contributing to the project.\n Just a reminder: PRs will trigger full CI run by default. We will add verified labels on the PR once build and tests steps are successful.\nšŸš€' + }) + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 531cc291..ab4bd951 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -36,4 +36,3 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 - uses: pre-commit/action@v3.0.0 - diff --git a/.github/workflows/triton_update_ci.yml b/.github/workflows/triton_update_ci.yml new file mode 100644 index 00000000..8c7886d5 --- /dev/null +++ b/.github/workflows/triton_update_ci.yml @@ -0,0 +1,45 @@ +name: Validate Triton Pull request by running our change on the latest version of vLLM +on: + pull_request: +jobs: + mirror_repo: + environment: GITLAB + runs-on: self-hosted + steps: + - name: Sync Mirror Repository + run: | + #!/bin/bash + curl --request POST --header "PRIVATE-TOKEN:${{ secrets.TOKEN }}" "${{ secrets.MIRROR_URL }}" + trigger-ci: + environment: GITLAB + needs: mirror_repo + runs-on: self-hosted + steps: + - name: Trigger Pipeline + run: | + #!/bin/bash + # Get latest VLLM RELEASED VERSION from https://github.com/triton-inference-server/vllm_backend/releases + TAG=$(curl https://api.github.com/repos/triton-inference-server/vllm_backend/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') + export TRITON_CONTAINER_VERSION=${TAG#v} # example: 24.10 + if [ -z "$TRITON_CONTAINER_VERSION" ] + then + echo "\$TRITON_CONTAINER_VERSION is NULL, setting it to 24.10" + TRITON_CONTAINER_VERSION=24.10 + else + echo "\$TRITON_CONTAINER_VERSION is NOT NULL" + fi + echo "TRITON_CONTAINER_VERSION = ${TRITON_CONTAINER_VERSION}" + + # Get latest VLLM RELEASED VERSION from https://github.com/vllm-project/vllm/releases + TAG=$(curl https://api.github.com/repos/vllm-project/vllm/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') + export VLLM_VERSION=${TAG#v} # example: 0.5.5 + if [ -z "$VLLM_VERSION" ] + then + echo "\$VLLM_VERSION is NULL, setting it to 0.5.5" + VLLM_VERSION=0.5.5 + else + echo "\$VLLM_VERSION is NOT NULL" + fi + echo "VLLM_VERSION = ${VLLM_VERSION}" + + curl --fail --request POST --form token=${{ secrets.PIPELINE_TOKEN }} -F ref=${GITHUB_HEAD_REF} -F variables[BUILD_OPTION]="BUILD_SOURCE" -F variables[TRITON_CONTAINER_VERSION]="${TRITON_CONTAINER_VERSION}" -F variables[VLLM_VERSION]="${VLLM_VERSION}" -F variables[TEST_OPTION]="ALL_TESTS" "${{ secrets.PIPELINE_URL }}" diff --git a/.github/workflows/vllm_update_ci.yml b/.github/workflows/vllm_update_ci.yml new file mode 100644 index 00000000..903b9df8 --- /dev/null +++ b/.github/workflows/vllm_update_ci.yml @@ -0,0 +1,36 @@ +name: Validate latest vLLM release from https://github.com/vllm-project/vllm/releases against latest Triton release https://github.com/triton-inference-server/vllm_backend/releases +on: + schedule: + - cron: "30 09 */3 * *" +jobs: + mirror_repo: + environment: GITLAB + runs-on: self-hosted + steps: + - name: Sync Mirror Repository + run: | + #!/bin/bash + curl --request POST --header "PRIVATE-TOKEN:${{ secrets.TOKEN }}" "${{ secrets.MIRROR_URL }}" + trigger-ci: + environment: GITLAB + needs: mirror_repo + runs-on: self-hosted + steps: + - name: Trigger Pipeline + run: | + #!/bin/bash + # Get latest VLLM RELEASED VERSION from https://github.com/triton-inference-server/vllm_backend/releases + TAG=$(curl https://api.github.com/repos/triton-inference-server/vllm_backend/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') + export TRITON_CONTAINER_VERSION=${TAG#v} # example: 24.08 + # Get latest VLLM RELEASED VERSION from https://github.com/vllm-project/vllm/releases + TAG=$(curl https://api.github.com/repos/vllm-project/vllm/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') + export VLLM_VERSION=${TAG#v} # example: 0.5.5 + echo "VLLM_VERSION = ${VLLM_VERSION}" + if [ -z "$TRITON_CONTAINER_VERSION" || -z "$VLLM_VERSION"] + then + echo "Can't find latest Triton or vllm version.. Skipping CI run" + else + echo "TRITON_CONTAINER_VERSION = ${TRITON_CONTAINER_VERSION}" + echo "VLLM_VERSION = ${VLLM_VERSION}" + curl --fail --request POST --form token=${{ secrets.PIPELINE_TOKEN }} -F ref=${GITHUB_HEAD_REF} -F variables[BUILD_OPTION]="PULL_DOCKER" -F variables[TRITON_CONTAINER_VERSION]="${TRITON_CONTAINER_VERSION}" -F variables[TEST_OPTION]="ALL_HARDWARE" -F variables[VLLM_VERSION]="${VLLM_VERSION}" -F variables[TEST_OPTION]="ALL_TESTS" "${{ secrets.PIPELINE_URL }}" + fi diff --git a/README.md b/README.md index 8a993d99..a4a9d3fb 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ --> [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause) +![Static Badge](https://img.shields.io/badge/Triton-24.10-8A2BE2) +![Static Badge](https://img.shields.io/badge/vLLM-0.5.5-blue) +![Static Badge](https://img.shields.io/badge/CI_Passing-V100%2CA100%2CH100-Green) # vLLM Backend @@ -82,7 +85,18 @@ latest YY.MM (year.month) of [Triton release](https://github.com/triton-inferenc ``` # YY.MM is the version of Triton. -export TRITON_CONTAINER_VERSION= +# Get latest VLLM RELEASED VERSION from https://github.com/triton-inference-server/vllm_backend/releases +TAG=$(curl https://api.github.com/repos/triton-inference-server/vllm_backend/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') +export TRITON_CONTAINER_VERSION=${TAG#v} # example: 24.06 +echo "TRITON_CONTAINER_VERSION = ${TRITON_CONTAINER_VERSION}" + +# Get latest VLLM RELEASED VERSION from https://github.com/vllm-project/vllm/releases +TAG=$(curl https://api.github.com/repos/vllm-project/vllm/releases/latest | grep -i "tag_name" | awk -F '"' '{print $4}') +export VLLM_VERSION=${TAG#v} # example: 0.5.3.post1 +echo "VLLM_VERSION = ${VLLM_VERSION}" + +git clone -b r${TRITON_CONTAINER_VERSION} https://github.com/triton-inference-server/server.git +cd server ./build.py -v --enable-logging --enable-stats --enable-tracing @@ -101,6 +115,11 @@ export TRITON_CONTAINER_VERSION= --backend=python:r${TRITON_CONTAINER_VERSION} --backend=vllm:r${TRITON_CONTAINER_VERSION} --backend=ensemble + --vllm-version=${VLLM_VERSION} +# Build Triton Server +cd build +bash -x ./docker_build + ``` ### Option 3. Add the vLLM Backend to the Default Triton Container diff --git a/ci/build/build_docker.sh b/ci/build/build_docker.sh new file mode 100755 index 00000000..46d1a426 --- /dev/null +++ b/ci/build/build_docker.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +while getopts t: flag +do + case "${flag}" in + u) PROD_CONTAINER=${OPTARG};; + esac +done + +echo "Pulling container image ${PROD_CONTAINER}" +docker pull ${PROD_CONTAINER} diff --git a/ci/build/build_source.sh b/ci/build/build_source.sh new file mode 100755 index 00000000..e40cf471 --- /dev/null +++ b/ci/build/build_source.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +while getopts t:v: flag +do + case "${flag}" in + u) TRITON_CONTAINER_VERSION=${OPTARG};; + a) VLLM_VERSION=${OPTARG};; + esac +done + +echo "Triton version is ${TRITON_CONTAINER_VERSION} and vllm version is ${VLLM_VERSION}" +# This change will start working for r24.12 release +#git clone -b r${TRITON_CONTAINER_VERSION} https://github.com/triton-inference-server/server.git +git clone https://github.com/triton-inference-server/server.git +set -x && python3 server/build.py -v \ + --enable-logging \ + --enable-stats \ + --enable-tracing \ + --enable-metrics \ + --enable-gpu-metrics \ + --enable-cpu-metrics \ + --enable-gpu \ + --no-container-interactive \ + --container-prebuild-command="docker login -u gitlab-ci-token -p ${CI_JOB_TOKEN} ${CI_REGISTRY}" \ + --filesystem=gcs \ + --filesystem=s3 \ + --filesystem=azure_storage \ + --endpoint=http \ + --endpoint=grpc \ + --endpoint=sagemaker \ + --endpoint=vertex-ai \ + --upstream-container-version=${TRITON_CONTAINER_VERSION} \ + --backend=python:r${TRITON_CONTAINER_VERSION} \ + --backend=vllm:r${TRITON_CONTAINER_VERSION} \ + --vllm-version=${VLLM_VERSION} 2>&1 +# Build Triton Server +cd server/build +bash -x ./docker_build