Skip to content

Commit 3c8d497

Browse files
authored
Merge branch 'main' into refactor-vllm-sm
2 parents c59c40f + 00224ac commit 3c8d497

File tree

5 files changed

+127
-111
lines changed

5 files changed

+127
-111
lines changed

.github/actions/container-cleanup/action.yml

Lines changed: 0 additions & 12 deletions
This file was deleted.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
version: 0.2
2+
3+
phases:
4+
install:
5+
commands:
6+
- echo "Setting up CodeBuild fleet runner infrastructure ..."
7+
8+
# Install common dependencies across all runner fleets
9+
- echo "Installing uv ..."
10+
- |
11+
if ! command -v uv &> /dev/null; then
12+
curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
13+
uv self update
14+
fi
15+
16+
pre_build:
17+
commands:
18+
- echo "Build started on $(date)"
19+
20+
# Clean up previous CodeBuild runtime environment
21+
# Each runner tasks are expected to initialize with only one runtime environment
22+
# All runtime environments apart from current runtime are considered dangling
23+
- echo "Cleaning up dangling runtime environments ..."
24+
- |
25+
RUNTIME_ROOT_VOLUME="/tmp"
26+
RUNTIME_PATTERN="codebuild-[^/]*"
27+
echo "Listing all runtime environments before clean up ..."
28+
ls -la ${RUNTIME_ROOT_VOLUME} | grep ${RUNTIME_PATTERN}
29+
30+
CURRENT_CODEBUILD_DIR=$(echo "${CODEBUILD_SRC_DIR}" | grep -o "${RUNTIME_ROOT_VOLUME}/${RUNTIME_PATTERN}")
31+
find ${RUNTIME_ROOT_VOLUME} -maxdepth 1 -type d -name 'codebuild-*' ! -path "${CURRENT_CODEBUILD_DIR}" -exec rm -rf {} +
32+
33+
echo "Listing all runtime environments after clean up ..."
34+
ls -la ${RUNTIME_ROOT_VOLUME} | grep ${RUNTIME_PATTERN}
35+
36+
build:
37+
commands:
38+
- echo "BuildSpec will be overloaded for GHA self-hosted runner builds."
39+
40+
post_build:
41+
commands:
42+
# Clean up docker containers and images
43+
# Each runner tasks are expected to stop and remove all docker containers
44+
# All running containers at this stage are considered dangling
45+
- echo "Cleaning up dangling docker containers and unused docker images ..."
46+
- |
47+
docker rm -f $(docker ps -aq) || true
48+
docker image prune -a --force --filter "until=24h"
49+
docker system df
50+
51+
- echo "Build completed on $(date)"

.github/scripts/runner_setup.sh

Lines changed: 0 additions & 8 deletions
This file was deleted.

.github/workflows/pr-sglang.yml

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ permissions:
1111
contents: read
1212

1313
env:
14+
# CI Image configuration
1415
SGLANG_VERSION: "0.5.5"
1516
PYTHON_VERSION: "py312"
1617
CUDA_VERSION: "cu129"
1718
OS_VERSION: "ubuntu22.04"
19+
# Prod Image configuration
1820
PROD_SAGEMAKER_IMAGE: sglang:0.5-gpu-py312
21+
# CI environment configuration
1922
FORCE_COLOR: "1"
23+
TEST_ARTIFACTS_DIRECTORY: "/test_artifacts/sglang"
2024

2125
jobs:
2226
check-changes:
@@ -64,14 +68,14 @@ jobs:
6468
runs-on:
6569
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
6670
fleet:x86-build-runner
71+
buildspec-override:true
6772
concurrency:
6873
group: ${{ github.workflow }}-build-sglang-image-${{ github.event.pull_request.number }}
6974
cancel-in-progress: true
7075
outputs:
7176
ci-image: ${{ steps.image-uri-build.outputs.CI_IMAGE_URI }}
7277
steps:
7378
- uses: actions/checkout@v5
74-
- run: .github/scripts/runner_setup.sh
7579
- run: .github/scripts/buildkitd.sh
7680

7781
- name: ECR login
@@ -142,6 +146,7 @@ jobs:
142146
runs-on:
143147
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
144148
fleet:x86-g6xl-runner
149+
buildspec-override:true
145150
concurrency:
146151
group: ${{ github.workflow }}-sglang-local-benchmark-test-${{ github.event.pull_request.number }}
147152
cancel-in-progress: true
@@ -158,10 +163,10 @@ jobs:
158163

159164
- name: Setup for SGLang datasets
160165
run: |
161-
mkdir -p /tmp/sglang/dataset
162-
if [ ! -f /tmp/sglang/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
166+
mkdir -p ${TEST_ARTIFACTS_DIRECTORY}/dataset
167+
if [ ! -f ${TEST_ARTIFACTS_DIRECTORY}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
163168
echo "Downloading ShareGPT dataset..."
164-
wget -P /tmp/sglang/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
169+
wget -P ${TEST_ARTIFACTS_DIRECTORY}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
165170
else
166171
echo "ShareGPT dataset already exists. Skipping download."
167172
fi
@@ -170,7 +175,7 @@ jobs:
170175
run: |
171176
CONTAINER_ID=$(docker run -d -it --rm --gpus=all \
172177
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
173-
-v /tmp/sglang/dataset:/dataset \
178+
-v ${TEST_ARTIFACTS_DIRECTORY}/dataset:/dataset \
174179
-p 30000:30000 \
175180
-e SM_SGLANG_MODEL_PATH=Qwen/Qwen3-0.6B \
176181
-e SM_SGLANG_REASONING_PARSER=qwen3 \
@@ -180,7 +185,7 @@ jobs:
180185
${{ needs.set-test-environment.outputs.image-uri }})
181186
echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
182187
echo "Waiting for serving endpoint startup ..."
183-
sleep 60s
188+
sleep 120s
184189
docker logs ${CONTAINER_ID}
185190
186191
- name: Run SGLang tests
@@ -193,16 +198,13 @@ jobs:
193198
--dataset-name sharegpt \
194199
--dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json
195200
196-
- name: Cleanup container and images
197-
if: always()
198-
uses: ./.github/actions/container-cleanup
199-
200201
sglang-frontend-test:
201202
needs: [build-sglang-image, set-test-environment]
202203
if: success()
203204
runs-on:
204205
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
205206
fleet:x86-g6exl-runner
207+
buildspec-override:true
206208
concurrency:
207209
group: ${{ github.workflow }}-sglang-frontend-test-${{ github.event.pull_request.number }}
208210
cancel-in-progress: true
@@ -252,10 +254,6 @@ jobs:
252254
python3 run_suite.py --suite per-commit
253255
'
254256
255-
- name: Cleanup container and images
256-
if: always()
257-
uses: ./.github/actions/container-cleanup
258-
259257
sglang-sagemaker-endpoint-test:
260258
needs: [set-test-environment]
261259
if: |
@@ -264,19 +262,18 @@ jobs:
264262
runs-on:
265263
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
266264
fleet:default-runner
265+
buildspec-override:true
267266
concurrency:
268267
group: ${{ github.workflow }}-sglang-sagemaker-endpoint-test-${{ github.event.pull_request.number }}
269268
cancel-in-progress: false
270269
steps:
271270
- name: Checkout DLC source
272271
uses: actions/checkout@v5
273272

274-
- run: .github/scripts/runner_setup.sh
275273
- name: Install test dependencies
276274
run: |
277275
uv venv
278276
source .venv/bin/activate
279-
280277
uv pip install -r test/requirements.txt
281278
uv pip install -r test/sglang/sagemaker/requirements.txt
282279

0 commit comments

Comments
 (0)