Skip to content

Commit e120860

Browse files
committed
Add files for CI
1 parent 807aca6 commit e120860

File tree

3 files changed

+373
-0
lines changed

3 files changed

+373
-0
lines changed

.github/workflows/rbln_arc_ci.yaml

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
name: vLLM-RBLN ARC CI
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize, reopened]
6+
workflow_dispatch:
7+
inputs:
8+
ref:
9+
description: "ref to checkout"
10+
required: false
11+
type: string
12+
python_version:
13+
description: "Python version to use"
14+
required: false
15+
type: string
16+
default: "3.10.12"
17+
vllm_upstream_tag:
18+
description: "vLLM upstream tag/version to use for tests (e.g., v0.9.1)"
19+
required: false
20+
type: string
21+
22+
jobs:
23+
build:
24+
name: Build Container Image
25+
runs-on: vllm-rbln-runner-atom
26+
outputs:
27+
image_name: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:${{ steps.image_tag.outputs.tag }}
28+
steps:
29+
- name: Checkout Repository with Submodules
30+
uses: actions/checkout@v4
31+
with:
32+
fetch-depth: 0
33+
token: ${{ secrets.GIT_PAT }}
34+
ref: ${{ inputs.ref || github.ref }}
35+
submodules: recursive
36+
37+
- name: Calculate dependency hash
38+
id: dep_hash
39+
run: |
40+
HASH=$(grep -E '^ARG (ATOM_DRIVER_VERSION|RBLN_CCL_VERSION|BCM_DRIVER_VERSION|BCM_LIBBNXT_RE_VERSION|LLVM_VERSION|OPTIMUM_RBLN_VERSION|RAY_VERSION|REBEL_COMPILER_VERSION|TORCHVISION_VERSION|TORCH_VERSION|TRITON_VERSION|PYTHON_VERSION)=' Dockerfile.ubi.ci | sort | sha256sum | cut -c1-8)
41+
echo "hash=$HASH" >> $GITHUB_OUTPUT
42+
echo "Dependency hash: $HASH"
43+
44+
- name: Set image tag
45+
id: image_tag
46+
run: |
47+
TAG="env-${{ steps.dep_hash.outputs.hash }}"
48+
echo "tag=$TAG" >> $GITHUB_OUTPUT
49+
echo "Image tag: $TAG"
50+
51+
- name: Log in to GitHub Container Registry
52+
uses: docker/login-action@v3
53+
with:
54+
registry: ghcr.io
55+
username: ${{ github.actor }}
56+
password: ${{ secrets.GITHUB_TOKEN }}
57+
58+
- name: Check if image exists
59+
id: check_image
60+
run: |
61+
IMAGE="ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:${{ steps.image_tag.outputs.tag }}"
62+
if docker manifest inspect $IMAGE > /dev/null 2>&1; then
63+
echo "exists=true" >> $GITHUB_OUTPUT
64+
echo "Image already exists: $IMAGE"
65+
else
66+
echo "exists=false" >> $GITHUB_OUTPUT
67+
echo "Image does not exist, will build: $IMAGE"
68+
fi
69+
70+
- name: Set up Docker Buildx
71+
if: steps.check_image.outputs.exists == 'false'
72+
uses: docker/setup-buildx-action@v3
73+
with:
74+
driver: kubernetes
75+
76+
- name: Build and push Container image
77+
if: steps.check_image.outputs.exists == 'false'
78+
id: docker_build
79+
uses: docker/build-push-action@v5
80+
with:
81+
context: .
82+
file: ./Dockerfile.ubi.ci
83+
push: true
84+
tags: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:${{ steps.image_tag.outputs.tag }}
85+
cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:buildcache
86+
cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:buildcache,mode=max
87+
build-args: |
88+
PYTHON_VERSION=${{ inputs.python_version || '3.10.12' }}
89+
secrets: |
90+
E2E_REPOSITORY_URL=${{ secrets.E2E_REPOSITORY_URL }}
91+
E2E_REPOSITORY_ID=${{ secrets.E2E_REPOSITORY_ID }}
92+
E2E_REPOSITORY_PWD=${{ secrets.E2E_REPOSITORY_PWD }}
93+
94+
- name: Image info
95+
run: |
96+
echo "Using image: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}:${{ steps.image_tag.outputs.tag }}"
97+
echo "Image exists: ${{ steps.check_image.outputs.exists }}"
98+
echo "Build skipped: ${{ steps.check_image.outputs.exists }}"
99+
100+
test:
101+
if: github.repository_owner == 'rebellions-sw'
102+
name: Run Tests
103+
runs-on: vllm-rbln-runner-atom
104+
needs: build
105+
timeout-minutes: 1440
106+
container:
107+
image: ${{ needs.build.outputs.image_name }}
108+
credentials:
109+
username: ${{ github.actor }}
110+
password: ${{ secrets.GITHUB_TOKEN }}
111+
steps:
112+
- name: Checkout Repository
113+
uses: actions/checkout@v4
114+
with:
115+
fetch-depth: 0
116+
token: ${{ secrets.GIT_PAT }}
117+
ref: ${{ inputs.ref || github.ref }}
118+
submodules: recursive
119+
120+
- name: Determine vLLM upstream version
121+
id: vllm_version
122+
run: |
123+
if [ -n "${{ inputs.vllm_upstream_tag }}" ]; then
124+
VERSION="${{ inputs.vllm_upstream_tag }}"
125+
echo "Using vLLM version from workflow input: $VERSION"
126+
else
127+
VERSION=$(grep '^vllm==' requirements.txt | sed 's/vllm==\(.*\)/v\1/')
128+
echo "Using vLLM version from requirements.txt: $VERSION"
129+
fi
130+
echo "version=$VERSION" >> $GITHUB_OUTPUT
131+
echo "Final vLLM version to use: $VERSION"
132+
133+
- name: Clone vLLM upstream for tests
134+
run: |
135+
echo "Cloning vLLM upstream version: ${{ steps.vllm_version.outputs.version }}"
136+
git clone --depth 1 --branch ${{ steps.vllm_version.outputs.version }} \
137+
https://github.com/vllm-project/vllm.git /tmp/vllm-upstream
138+
echo "Clone completed successfully"
139+
140+
- name: Merge upstream tests with vllm-rbln tests
141+
run: |
142+
echo "Copying upstream tests to vllm-rbln..."
143+
cp -r /tmp/vllm-upstream/tests/* tests/
144+
145+
- name: Install vLLM-RBLN from PR source
146+
run: |
147+
source /workspace/.venv/bin/activate
148+
pip install -e .
149+
pip list | grep vllm
150+
151+
- name: Run Python Tests
152+
run: |
153+
source /workspace/.venv/bin/activate
154+
pytest tests/basic_correctness/test_basic_correctness.py -vv --durations 0

Dockerfile.ubi.ci

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
ARG BASE_UBI_IMAGE_TAG=9.5
2+
ARG PYTHON_VERSION=3.10.12
3+
4+
## Base Layer ##################################################################
5+
FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base
6+
7+
ARG WORKDIR=/workspace
8+
WORKDIR "$WORKDIR"
9+
10+
ARG ATOM_DRIVER_VERSION=2.0.1-2
11+
ARG RBLN_CCL_VERSION="2.0.0"
12+
ARG BCM_DRIVER_VERSION=233.1.135.7
13+
ARG BCM_LIBBNXT_RE_VERSION=233.0.152.2
14+
ARG LLVM_VERSION=19.1.7-rbln
15+
ARG OPTIMUM_RBLN_VERSION=0.7.5rc3.dev3+g0e82798
16+
ARG RAY_VERSION=2.46.0
17+
ARG REBEL_COMPILER_VERSION=0.9.1
18+
ARG TORCHVISION_VERSION=0.21.0+cpu
19+
ARG TORCH_VERSION=2.6.0+cpu
20+
ARG TRITON_VERSION=3.2.0+rbln.git47ee49fe
21+
ARG VLLM_RBLN_VERSION=0.8.0rc0
22+
ARG VLLM_VERSION=0.9.1
23+
24+
LABEL ATOM_DRIVER_VERSION="$ATOM_DRIVER_VERSION"
25+
LABEL RBLN_CCL_VERSION="$RBLN_CCL_VERSION"
26+
LABEL BCM_DRIVER_VERSION="$BCM_DRIVER_VERSION"
27+
LABEL BCM_LIBBNXT_RE_VERSION="$BCM_LIBBNXT_RE_VERSION"
28+
LABEL LLVM_VERSION="$LLVM_VERSION"
29+
LABEL OPTIMUM_RBLN_VERSION="$OPTIMUM_RBLN_VERSION"
30+
LABEL RAY_VERSION="$RAY_VERSION"
31+
LABEL REBEL_COMPILER_VERSION="$REBEL_COMPILER_VERSION"
32+
LABEL TORCHVISION_VERSION="$TORCHVISION_VERSION"
33+
LABEL TORCH_VERSION="$TORCH_VERSION"
34+
LABEL VLLM_RBLN_VERSION="$VLLM_RBLN_VERSION"
35+
36+
################################################################################
37+
FROM base AS atom
38+
39+
RUN dnf install -y rpm && \
40+
dnf clean all && \
41+
rm -rf /var/cache/dnf
42+
43+
RUN mkdir -p /atom/atom-driver
44+
45+
ARG ATOM_DRIVER_PACKAGE="atom-driver/atom_internal_release_${ATOM_DRIVER_VERSION}.x86_64.rpm"
46+
47+
RUN --mount=type=secret,id=E2E_REPOSITORY_URL,env=E2E_REPOSITORY_URL \
48+
--mount=type=secret,id=E2E_REPOSITORY_ID,env=E2E_REPOSITORY_ID \
49+
--mount=type=secret,id=E2E_REPOSITORY_PWD,env=E2E_REPOSITORY_PWD \
50+
curl -u "${E2E_REPOSITORY_ID}:${E2E_REPOSITORY_PWD}" \
51+
-o "/atom/${ATOM_DRIVER_PACKAGE}" \
52+
"http://${E2E_REPOSITORY_URL}/repository/yum/${ATOM_DRIVER_PACKAGE}" \
53+
&& rpm -ivh --nodeps --nosignature --noscripts \
54+
--root /atom \
55+
"/atom/${ATOM_DRIVER_PACKAGE}" \
56+
&& rm -f "/atom/${ATOM_DRIVER_PACKAGE}"
57+
58+
################################################################################
59+
FROM base AS bnxt
60+
RUN PYTHONUNBUFFERED=1 dnf install -y \
61+
autoconf automake \
62+
gcc gcc-c++ make \
63+
iproute kmod \
64+
libibverbs-devel librdmacm-devel librdmacm \
65+
libtool pciutils \
66+
python3 systemd wget \
67+
&& dnf clean all \
68+
&& rm -rf /var/cache/dnf
69+
70+
RUN wget "https://docs.broadcom.com/docs-and-downloads/ethernet-network-adapters/NXE/BRCM_$BCM_DRIVER_VERSION/bcm_$BCM_DRIVER_VERSION.tar.gz" && \
71+
tar -xvzf "bcm_$BCM_DRIVER_VERSION.tar.gz" && \
72+
tar -C /tmp -xf "$WORKDIR/bcm_$BCM_DRIVER_VERSION/drivers_linux/bnxt_rocelib/libbnxt_re-$BCM_LIBBNXT_RE_VERSION.tar.gz" && \
73+
cd "/tmp/libbnxt_re-$BCM_LIBBNXT_RE_VERSION" && \
74+
sh autogen.sh && \
75+
./configure && \
76+
make clean && \
77+
make && \
78+
make install
79+
80+
################################################################################
81+
FROM base AS driver
82+
83+
RUN dnf install -y wget rpm \
84+
&& dnf clean all
85+
86+
# Add Oracle Linux RDMA repo to install rdma-core and libibverbs-utils
87+
RUN echo -e "[ol9-rdma]\n\
88+
name=Oracle Linux \$releasever RDMA Packages\n\
89+
baseurl=http://yum.oracle.com/repo/OracleLinux/OL9/RDMA/x86_64/\n\
90+
enabled=1\n\
91+
gpgcheck=0" > /etc/yum.repos.d/oracle_rdma.repo
92+
93+
RUN dnf install -y dnf-plugins-core && \
94+
dnf clean all && \
95+
dnf makecache && \
96+
dnf install -y \
97+
bzip2-libs \
98+
libdrm \
99+
libibverbs \
100+
librdmacm \
101+
rdma-core \
102+
libibverbs-utils \
103+
kmod \
104+
pciutils && \
105+
dnf clean all && \
106+
rm -rf /var/cache/dnf && \
107+
ln -sf /usr/lib64/libbz2.so.1 /usr/lib64/libbz2.so.1.0 && \
108+
ldconfig
109+
110+
COPY --from=bnxt /usr/local/lib/libbnxt_re-rdmav34.so /usr/local/lib/libbnxt_re-rdmav34.so
111+
RUN if [ -f /usr/lib64/libibverbs/libbnxt_re-rdmav34.so ]; then \
112+
mv /usr/lib64/libibverbs/libbnxt_re-rdmav34.so /usr/lib64/libibverbs/libbnxt_re-rdmav34.so.inbox; \
113+
fi && \
114+
ln -s /usr/local/lib/libbnxt_re-rdmav34.so /usr/local/lib/libbnxt_re.so && \
115+
ldconfig
116+
117+
COPY --from=atom /atom/etc/ /etc/
118+
COPY --from=atom /atom/opt/ /opt/
119+
COPY --from=atom /atom/usr/ /usr/
120+
COPY --from=atom /atom/lib/ /usr/
121+
122+
################################################################################
123+
FROM driver AS python
124+
125+
RUN PYTHONUNBUFFERED=1 dnf install -y \
126+
gcc \
127+
gcc-c++ \
128+
make \
129+
openssl-devel \
130+
bzip2-devel \
131+
libffi-devel \
132+
zlib-devel \
133+
xz-devel \
134+
sqlite-devel \
135+
wget \
136+
tar \
137+
&& dnf clean all
138+
139+
ARG PYTHON_VERSION
140+
RUN cd /tmp && \
141+
wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz && \
142+
tar -xzf Python-${PYTHON_VERSION}.tgz && \
143+
cd Python-${PYTHON_VERSION} && \
144+
./configure --enable-optimizations --with-ensurepip=install && \
145+
make -j $(nproc) && \
146+
make altinstall && \
147+
cd / && \
148+
rm -rf /tmp/Python-${PYTHON_VERSION}*
149+
150+
# Create symbolic links for versioned and unversioned commands
151+
RUN ln -sf /usr/local/bin/python3.10 /usr/local/bin/python3 && \
152+
ln -sf /usr/local/bin/python3.10 /usr/local/bin/python && \
153+
ln -sf /usr/local/bin/pip3.10 /usr/local/bin/pip3 && \
154+
ln -sf /usr/local/bin/pip3.10 /usr/local/bin/pip
155+
156+
# Upgrade pip and install setuptools, wheel without version pinning
157+
# Python's bundled pip already knows which versions are compatible
158+
RUN python3.10 -m pip install --no-cache-dir --upgrade pip setuptools wheel
159+
160+
# Verify installation
161+
RUN python --version && \
162+
python3 --version && \
163+
pip --version && \
164+
python -m pip list
165+
166+
################################################################################
167+
FROM python AS vllm-rbln-env
168+
169+
RUN PYTHONUNBUFFERED=1 dnf -y update && \
170+
dnf install -y \
171+
wget \
172+
rsync \
173+
openssh-server \
174+
openssh-clients \
175+
git \
176+
ca-certificates && \
177+
dnf clean all && \
178+
rm -rf /var/cache/dnf
179+
180+
# Create venv and install fixed dependencies (without vllm-rbln)
181+
RUN --mount=type=secret,id=E2E_REPOSITORY_URL,env=E2E_REPOSITORY_URL \
182+
--mount=type=secret,id=E2E_REPOSITORY_ID,env=E2E_REPOSITORY_ID \
183+
--mount=type=secret,id=E2E_REPOSITORY_PWD,env=E2E_REPOSITORY_PWD \
184+
python3 -m venv $WORKDIR/.venv && . $WORKDIR/.venv/bin/activate \
185+
&& pip install --no-cache --upgrade pip setuptools wheel \
186+
&& pip install --no-cache rebel_compiler=="$REBEL_COMPILER_VERSION" --extra-index-url "http://${E2E_REPOSITORY_ID}:${E2E_REPOSITORY_PWD}@${E2E_REPOSITORY_URL}/repository/pypi/simple" --trusted-host ${E2E_REPOSITORY_URL} \
187+
&& pip install --no-cache triton=="$TRITON_VERSION" --extra-index-url "http://${E2E_REPOSITORY_ID}:${E2E_REPOSITORY_PWD}@${E2E_REPOSITORY_URL}/repository/pypi/simple" --trusted-host ${E2E_REPOSITORY_URL} \
188+
&& pip install --no-cache torch=="$TORCH_VERSION" --index-url https://download.pytorch.org/whl/cpu \
189+
&& pip install --no-cache torchvision=="$TORCHVISION_VERSION" --index-url https://download.pytorch.org/whl/cpu \
190+
&& pip install --no-cache "numpy<2" \
191+
&& pip install --no-cache-dir pytest pytest-asyncio pytest-xdist
192+
193+
# REBEL SDK ENV
194+
ENV VIRTUAL_ENV="$WORKDIR/.venv"
195+
ENV PATH="$WORKDIR/.venv/bin:$PATH"
196+
ENV PYTHONPATH="$WORKDIR/.venv/lib/python3.10/site-packages"
197+
198+
# vLLM-RBLN ENV
199+
ENV RBLN_KERNEL_MODE="triton" \
200+
VLLM_USE_V1="0" \
201+
USE_VLLM_MODEL="1" \
202+
VLLM_DISABLE_COMPILE_CACHE="1"
203+
204+
COPY ./entrypoint.sh /entrypoint.sh
205+
RUN chmod o+x /entrypoint.sh
206+
ENTRYPOINT [ "/bin/bash", "-c", "/entrypoint.sh || true; exec \"$@\"", "--" ]
207+
208+
CMD ["/bin/bash"]

entrypoint.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Prepare host modules and udev triggers
5+
depmod -a $(uname -r) 2>/dev/null || true
6+
udevadm control --reload 2>/dev/null || true
7+
udevadm trigger 2>/dev/null || true
8+
9+
# Execute user-specified command
10+
exec "$@"
11+

0 commit comments

Comments
 (0)