Add label to build #265

Workflow file for this run

.github/workflows/pr-sglang.yml at 67b58bd

	name: PR - SGLang

	on:
	pull_request:
	branches: [main]
	types: [opened, reopened, synchronize]
	paths:
	- "sglang"
	- "!docs/**"

	permissions:
	contents: read
	pull-requests: read

	env:
	# CI environment configuration
	FORCE_COLOR: "1"
	TEST_ARTIFACTS_DIRECTORY: "/test_artifacts/sglang"

	# Config file paths
	EC2_CONFIG: ".github/config/sglang-ec2.yml"
	SAGEMAKER_CONFIG: ".github/config/sglang-sagemaker.yml"

	jobs:
	gatekeeper:
	runs-on: ubuntu-latest
	concurrency:
	group: ${{ github.workflow }}-gate-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	steps:
	- name: Checkout base branch (safe)
	uses: actions/checkout@v5
	with:
	ref: ${{ github.event.pull_request.base.sha }}
	fetch-depth: 1

	- name: Run permission gate (from base)
	uses: ./.github/actions/pr-permission-gate

	load-config:
	needs: [gatekeeper]
	if: success()
	runs-on: ubuntu-latest
	outputs:
	ec2-config: ${{ steps.load.outputs.ec2-config }}
	sagemaker-config: ${{ steps.load.outputs.sagemaker-config }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v5

	- name: Load configurations
	id: load
	uses: ./.github/actions/load-config
	with:
	config-files: ${{ env.EC2_CONFIG }},${{ env.SAGEMAKER_CONFIG }}

	check-changes:
	needs: [load-config]
	if: success()
	runs-on: ubuntu-latest
	concurrency:
	group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	outputs:
	build-change: ${{ steps.changes.outputs.build-change }}
	test-change: ${{ steps.changes.outputs.test-change }}
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Setup python
	uses: actions/setup-python@v6
	with:
	python-version: "3.12"

	- name: Run pre-commit
	uses: pre-commit/action@v3.0.1
	with:
	extra_args: --all-files

	- name: Detect file changes
	id: changes
	uses: dorny/paths-filter@v3
	with:
	filters: \|
	build-change:
	- "docker/sglang/**"
	- "scripts/sglang/**"
	- "scripts/common/**"
	- "scripts/telemetry/**"
	- ".github/workflows/pr-sglang*"
	test-change:
	- "test/sglang/**"

	# ======================================================
	# =============== SGLang EC2 jobs ======================
	# ======================================================
	build-sglang-ec2-image:
	needs: [check-changes, load-config]
	if: needs.check-changes.outputs.build-change == 'true'
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-build-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-build-sglang-ec2-image-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	outputs:
	ci-image: ${{ steps.build.outputs.image-uri }}
	steps:
	- uses: actions/checkout@v5

	- name: Parse EC2 config
	id: config
	run: \|
	echo '${{ needs.load-config.outputs.ec2-config }}' > config.json
	echo "framework=$(jq -r '.common.framework' config.json)" >> $GITHUB_OUTPUT
	echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
	echo "container-type=$(jq -r '.common.job_type' config.json)" >> $GITHUB_OUTPUT
	echo "python-version=$(jq -r '.common.python_version' config.json)" >> $GITHUB_OUTPUT
	echo "cuda-version=$(jq -r '.common.cuda_version' config.json)" >> $GITHUB_OUTPUT
	echo "os-version=$(jq -r '.common.os_version' config.json)" >> $GITHUB_OUTPUT
	echo "device-type=$(jq -r '.common.device_type // "gpu"' config.json)" >> $GITHUB_OUTPUT
	echo "arch-type=$(jq -r '.common.arch_type // "x86"' config.json)" >> $GITHUB_OUTPUT
	echo "contributor=$(jq -r '.common.contributor // "None"' config.json)" >> $GITHUB_OUTPUT
	echo "customer-type=$(jq -r '.common.customer_type // ""' config.json)" >> $GITHUB_OUTPUT

	- name: Build image
	id: build
	uses: ./.github/actions/build-image
	with:
	framework: ${{ steps.config.outputs.framework }}
	target: sglang-ec2
	base-image: lmsysorg/sglang:v${{ steps.config.outputs.framework-version }}-${{ steps.config.outputs.cuda-version }}-amd64
	framework-version: ${{ steps.config.outputs.framework-version }}
	container-type: ${{ steps.config.outputs.container-type }}
	aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
	aws-region: ${{ vars.AWS_REGION }}
	tag-pr: ${{ steps.config.outputs.framework }}-${{ steps.config.outputs.framework-version }}-gpu-${{ steps.config.outputs.python-version }}-${{ steps.config.outputs.cuda-version }}-${{ steps.config.outputs.os-version }}-ec2-pr-${{ github.event.pull_request.number }}
	dockerfile-path: docker/${{ steps.config.outputs.framework }}/Dockerfile
	arch-type: ${{ steps.config.outputs.arch-type }}
	device-type: ${{ steps.config.outputs.device-type }}
	cuda-version: ${{ steps.config.outputs.cuda-version }}
	python-version: ${{ steps.config.outputs.python-version }}
	os-version: ${{ steps.config.outputs.os-version }}
	contributor: ${{ steps.config.outputs.contributor }}
	customer-type: ${{ steps.config.outputs.customer-type }}

	set-ec2-test-environment:
	needs: [check-changes, build-sglang-ec2-image, load-config]
	if: \|
	always() && !failure() && !cancelled() &&
	(needs.check-changes.outputs.build-change == 'true' \|\| needs.check-changes.outputs.test-change == 'true')
	runs-on: ubuntu-latest
	concurrency:
	group: ${{ github.workflow }}-set-ec2-test-environment-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	outputs:
	aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }}
	image-uri: ${{ steps.set-env.outputs.IMAGE_URI }}
	framework-version: ${{ steps.config.outputs.framework-version }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Parse EC2 config
	id: config
	run: \|
	echo '${{ needs.load-config.outputs.ec2-config }}' > config.json
	echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
	echo "prod-image=$(jq -r '.common.prod_image' config.json)" >> $GITHUB_OUTPUT

	- name: Set test environment
	id: set-env
	run: \|
	if [[ "${{ needs.build-sglang-ec2-image.result }}" == "success" ]]; then
	AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }}
	IMAGE_URI=${{ needs.build-sglang-ec2-image.outputs.ci-image }}
	else
	AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }}
	IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ steps.config.outputs.prod-image }}
	fi

	echo "Image URI to test: ${IMAGE_URI}"
	echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT}
	echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}

	sglang-ec2-local-benchmark-test:
	needs: [build-sglang-ec2-image, set-ec2-test-environment]
	if: success()
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-g6xl-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-sglang-ec2-local-benchmark-test-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Container pull
	uses: ./.github/actions/ecr-authenticate
	with:
	aws-account-id: ${{ needs.set-ec2-test-environment.outputs.aws-account-id }}
	aws-region: ${{ vars.AWS_REGION }}
	image-uri: ${{ needs.set-ec2-test-environment.outputs.image-uri }}

	- name: Setup for SGLang datasets
	run: \|
	mkdir -p ${TEST_ARTIFACTS_DIRECTORY}/dataset
	if [ ! -f ${TEST_ARTIFACTS_DIRECTORY}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
	echo "Downloading ShareGPT dataset..."
	wget -P ${TEST_ARTIFACTS_DIRECTORY}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
	else
	echo "ShareGPT dataset already exists. Skipping download."
	fi

	- name: Start container
	run: \|
	CONTAINER_ID=$(docker run -d -it --rm --gpus=all \
	-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
	-v ${TEST_ARTIFACTS_DIRECTORY}/dataset:/dataset \
	-p 30000:30000 \
	-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
	${{ needs.set-ec2-test-environment.outputs.image-uri }} \
	--model-path Qwen/Qwen3-0.6B \
	--reasoning-parser qwen3 \
	--host 127.0.0.1 \
	--port 30000)
	echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
	echo "Waiting for serving endpoint startup ..."
	sleep 120s
	docker logs ${CONTAINER_ID}

	- name: Run SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} python3 -m sglang.bench_serving \
	--backend sglang \
	--host 127.0.0.1 --port 30000 \
	--num-prompts 1000 \
	--model Qwen/Qwen3-0.6B \
	--dataset-name sharegpt \
	--dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json

	sglang-ec2-upstream-test:
	needs: [build-sglang-ec2-image, set-ec2-test-environment]
	if: success()
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-g6exl-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-sglang-ec2-upstream-test-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Container pull
	uses: ./.github/actions/ecr-authenticate
	with:
	aws-account-id: ${{ needs.set-ec2-test-environment.outputs.aws-account-id }}
	aws-region: ${{ vars.AWS_REGION }}
	image-uri: ${{ needs.set-ec2-test-environment.outputs.image-uri }}

	- name: Checkout SGLang tests
	uses: actions/checkout@v5
	with:
	repository: sgl-project/sglang
	ref: v${{ needs.set-ec2-test-environment.outputs.framework-version }}
	path: sglang_source

	- name: Start container
	run: \|
	CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
	-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
	-v ./sglang_source:/workdir --workdir /workdir \
	-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
	${{ needs.set-ec2-test-environment.outputs.image-uri }})
	echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}

	- name: Setup for SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} sh -c '
	set -eux
	# https://github.com/sgl-project/sglang/blob/v0.5.7/scripts/ci/ci_install_dependency.sh#L78C8-L80
	# our CI suffers the same issue
	export IS_BLACKWELL=1

	bash scripts/ci/ci_install_dependency.sh
	'

	- name: Run SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} sh -c '
	set -eux
	nvidia-smi

	# SRT backend Test
	cd /workdir/test
	python3 run_suite.py --hw cuda --suite stage-a-test-1
	'

	# ======================================================
	# =============== SGLang SageMaker jobs ================
	# ======================================================
	build-sglang-sagemaker-image:
	needs: [check-changes, load-config]
	if: needs.check-changes.outputs.build-change == 'true'
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-build-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-build-sglang-sagemaker-image-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	outputs:
	ci-image: ${{ steps.build.outputs.image-uri }}
	steps:
	- uses: actions/checkout@v5

	- name: Parse SageMaker config
	id: config
	run: \|
	echo '${{ needs.load-config.outputs.sagemaker-config }}' > config.json
	echo "framework=$(jq -r '.common.framework' config.json)" >> $GITHUB_OUTPUT
	echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
	echo "container-type=$(jq -r '.common.job_type' config.json)" >> $GITHUB_OUTPUT
	echo "python-version=$(jq -r '.common.python_version' config.json)" >> $GITHUB_OUTPUT
	echo "cuda-version=$(jq -r '.common.cuda_version' config.json)" >> $GITHUB_OUTPUT
	echo "os-version=$(jq -r '.common.os_version' config.json)" >> $GITHUB_OUTPUT
	echo "device-type=$(jq -r '.common.device_type // "gpu"' config.json)" >> $GITHUB_OUTPUT
	echo "arch-type=$(jq -r '.common.arch_type // "x86"' config.json)" >> $GITHUB_OUTPUT
	echo "contributor=$(jq -r '.common.contributor // "None"' config.json)" >> $GITHUB_OUTPUT
	echo "customer-type=$(jq -r '.common.customer_type // ""' config.json)" >> $GITHUB_OUTPUT

	- name: Build image
	id: build
	uses: ./.github/actions/build-image
	with:
	framework: ${{ steps.config.outputs.framework }}
	target: sglang-sagemaker
	base-image: lmsysorg/sglang:v${{ steps.config.outputs.framework-version }}-${{ steps.config.outputs.cuda-version }}-amd64
	framework-version: ${{ steps.config.outputs.framework-version }}
	container-type: ${{ steps.config.outputs.container-type }}
	aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
	aws-region: ${{ vars.AWS_REGION }}
	tag-pr: ${{ steps.config.outputs.framework }}-${{ steps.config.outputs.framework-version }}-gpu-${{ steps.config.outputs.python-version }}-${{ steps.config.outputs.cuda-version }}-${{ steps.config.outputs.os-version }}-sagemaker-pr-${{ github.event.pull_request.number }}
	dockerfile-path: docker/${{ steps.config.outputs.framework }}/Dockerfile
	arch-type: ${{ steps.config.outputs.arch-type }}
	device-type: ${{ steps.config.outputs.device-type }}
	cuda-version: ${{ steps.config.outputs.cuda-version }}
	python-version: ${{ steps.config.outputs.python-version }}
	os-version: ${{ steps.config.outputs.os-version }}
	contributor: ${{ steps.config.outputs.contributor }}
	customer-type: ${{ steps.config.outputs.customer-type }}

	set-sagemaker-test-environment:
	needs: [check-changes, build-sglang-sagemaker-image, load-config]
	if: \|
	always() && !failure() && !cancelled() &&
	(needs.check-changes.outputs.build-change == 'true' \|\| needs.check-changes.outputs.test-change == 'true')
	runs-on: ubuntu-latest
	concurrency:
	group: ${{ github.workflow }}-set-sagemaker-test-environment-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	outputs:
	aws-account-id: ${{ steps.set-env.outputs.AWS_ACCOUNT_ID }}
	image-uri: ${{ steps.set-env.outputs.IMAGE_URI }}
	framework-version: ${{ steps.config.outputs.framework-version }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Parse SageMaker config
	id: config
	run: \|
	echo '${{ needs.load-config.outputs.sagemaker-config }}' > config.json
	echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
	echo "prod-image=$(jq -r '.common.prod_image' config.json)" >> $GITHUB_OUTPUT

	- name: Set test environment
	id: set-env
	run: \|
	if [[ "${{ needs.build-sglang-sagemaker-image.result }}" == "success" ]]; then
	AWS_ACCOUNT_ID=${{ vars.CI_AWS_ACCOUNT_ID }}
	IMAGE_URI=${{ needs.build-sglang-sagemaker-image.outputs.ci-image }}
	else
	AWS_ACCOUNT_ID=${{ vars.PROD_AWS_ACCOUNT_ID }}
	IMAGE_URI=${{ vars.PROD_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/${{ steps.config.outputs.prod-image }}
	fi

	echo "Image URI to test: ${IMAGE_URI}"
	echo "AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID}" >> ${GITHUB_OUTPUT}
	echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}

	sglang-sagemaker-local-benchmark-test:
	needs: [build-sglang-sagemaker-image, set-sagemaker-test-environment]
	if: success()
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-g6xl-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-sglang-sagemaker-local-benchmark-test-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Container pull
	uses: ./.github/actions/ecr-authenticate
	with:
	aws-account-id: ${{ needs.set-sagemaker-test-environment.outputs.aws-account-id }}
	aws-region: ${{ vars.AWS_REGION }}
	image-uri: ${{ needs.set-sagemaker-test-environment.outputs.image-uri }}

	- name: Setup for SGLang datasets
	run: \|
	mkdir -p ${TEST_ARTIFACTS_DIRECTORY}/dataset
	if [ ! -f ${TEST_ARTIFACTS_DIRECTORY}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
	echo "Downloading ShareGPT dataset..."
	wget -P ${TEST_ARTIFACTS_DIRECTORY}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
	else
	echo "ShareGPT dataset already exists. Skipping download."
	fi

	- name: Start container
	run: \|
	CONTAINER_ID=$(docker run -d -it --rm --gpus=all \
	-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
	-v ${TEST_ARTIFACTS_DIRECTORY}/dataset:/dataset \
	-p 30000:30000 \
	-e SM_SGLANG_MODEL_PATH=Qwen/Qwen3-0.6B \
	-e SM_SGLANG_REASONING_PARSER=qwen3 \
	-e SM_SGLANG_HOST=127.0.0.1 \
	-e SM_SGLANG_PORT=30000 \
	-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
	${{ needs.set-sagemaker-test-environment.outputs.image-uri }})
	echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}
	echo "Waiting for serving endpoint startup ..."
	sleep 120s
	docker logs ${CONTAINER_ID}

	- name: Run SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} python3 -m sglang.bench_serving \
	--backend sglang \
	--host 127.0.0.1 --port 30000 \
	--num-prompts 1000 \
	--model Qwen/Qwen3-0.6B \
	--dataset-name sharegpt \
	--dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json

	sglang-sagemaker-upstream-test:
	needs: [build-sglang-sagemaker-image, set-sagemaker-test-environment]
	if: success()
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:x86-g6exl-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-sglang-frontend-test-${{ github.event.pull_request.number }}
	cancel-in-progress: true
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Container pull
	uses: ./.github/actions/ecr-authenticate
	with:
	aws-account-id: ${{ needs.set-sagemaker-test-environment.outputs.aws-account-id }}
	aws-region: ${{ vars.AWS_REGION }}
	image-uri: ${{ needs.set-sagemaker-test-environment.outputs.image-uri }}

	- name: Checkout SGLang tests
	uses: actions/checkout@v5
	with:
	repository: sgl-project/sglang
	ref: v${{ needs.set-sagemaker-test-environment.outputs.framework-version }}
	path: sglang_source

	- name: Start container
	run: \|
	CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
	-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
	-v ./sglang_source:/workdir --workdir /workdir \
	-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
	${{ needs.set-sagemaker-test-environment.outputs.image-uri }})
	echo "CONTAINER_ID=${CONTAINER_ID}" >> ${GITHUB_ENV}

	- name: Setup for SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} sh -c '
	set -eux
	# https://github.com/sgl-project/sglang/blob/v0.5.7/scripts/ci/ci_install_dependency.sh#L78C8-L80
	# our CI suffers the same issue
	export IS_BLACKWELL=1

	bash scripts/ci/ci_install_dependency.sh
	'

	- name: Run SGLang tests
	run: \|
	docker exec ${CONTAINER_ID} sh -c '
	set -eux
	nvidia-smi

	# SRT backend Test
	cd /workdir/test
	python3 run_suite.py --hw cuda --suite stage-a-test-1
	'

	sglang-sagemaker-endpoint-test:
	needs: [set-sagemaker-test-environment]
	if: \|
	always() && !failure() && !cancelled() &&
	needs.set-sagemaker-test-environment.result == 'success'
	runs-on:
	- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
	fleet:default-runner
	buildspec-override:true
	concurrency:
	group: ${{ github.workflow }}-sglang-sagemaker-endpoint-test-${{ github.event.pull_request.number }}
	cancel-in-progress: false
	steps:
	- name: Checkout DLC source
	uses: actions/checkout@v5

	- name: Install test dependencies
	run: \|
	uv venv --python 3.12
	source .venv/bin/activate
	uv pip install -r test/requirements.txt
	uv pip install -r test/sglang/sagemaker/requirements.txt

	- name: Run sagemaker tests
	run: \|
	source .venv/bin/activate
	cd test/
	python3 -m pytest -vs -rA --image-uri ${{ needs.set-sagemaker-test-environment.outputs.image-uri }} sglang/sagemaker

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add label to build #265

Workflow file

Add label to build #265

Uh oh!

Workflow file for this run