Skip to content

Commit c9fa7ea

Browse files
authored
Merge branch 'main' into llama4hybridCache
2 parents d053027 + 7a5e6ce commit c9fa7ea

File tree

149 files changed

+10598
-1937
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+10598
-1937
lines changed

.github/workflows/lint.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Lint
22

3-
on: [pull_request]
3+
on: [ pull_request ]
44

55
jobs:
66
lint:
@@ -19,4 +19,4 @@ jobs:
1919
pre-commit install
2020
2121
- name: Linting
22-
run: pre-commit run --all-files
22+
run: pre-commit run --all-files --show-diff-on-failure

.github/workflows/pr-test-amd.yml

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
run: bash scripts/amd_ci_install_dependency.sh
4545

4646
- name: Evaluate Accuracy
47-
timeout-minutes: 20
47+
timeout-minutes: 30
4848
run: |
4949
bash scripts/amd_ci_exec.sh python3 test_eval_accuracy_large.py
5050
bash scripts/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
@@ -70,7 +70,7 @@ jobs:
7070
run: bash scripts/amd_ci_install_dependency.sh
7171

7272
- name: Evaluate accuracy (TP=2)
73-
timeout-minutes: 20
73+
timeout-minutes: 30
7474
run: |
7575
bash scripts/amd_ci_exec.sh python3 test_moe_eval_accuracy_large.py
7676
@@ -94,7 +94,7 @@ jobs:
9494
run: bash scripts/amd_ci_install_dependency.sh
9595

9696
- name: MLA TEST
97-
timeout-minutes: 20
97+
timeout-minutes: 30
9898
run: |
9999
bash scripts/amd_ci_exec.sh python3 test_mla.py
100100
@@ -118,31 +118,26 @@ jobs:
118118
run: bash scripts/amd_ci_install_dependency.sh
119119

120120
- name: Benchmark single latency
121-
timeout-minutes: 10
121+
timeout-minutes: 20
122122
run: |
123123
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
124124
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
125125
126126
- name: Benchmark online latency
127-
timeout-minutes: 10
127+
timeout-minutes: 15
128128
run: |
129129
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
130130
131131
- name: Benchmark offline throughput
132-
timeout-minutes: 10
132+
timeout-minutes: 15
133133
run: |
134134
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
135135
136136
- name: Benchmark offline throughput (Non-streaming, small batch size)
137-
timeout-minutes: 10
137+
timeout-minutes: 15
138138
run: |
139139
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
140140
141-
- name: Benchmark online latency (EAGLE)
142-
timeout-minutes: 10
143-
run: |
144-
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
145-
146141
performance-test-1-gpu-part-2-amd:
147142
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
148143
github.event.pull_request.draft == false
@@ -163,17 +158,17 @@ jobs:
163158
run: bash scripts/amd_ci_install_dependency.sh
164159

165160
- name: Benchmark offline throughput (w/o RadixAttention)
166-
timeout-minutes: 10
161+
timeout-minutes: 15
167162
run: |
168163
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
169164
170165
- name: Benchmark offline throughput (w/ Triton)
171-
timeout-minutes: 10
166+
timeout-minutes: 15
172167
run: |
173168
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
174169
175170
- name: Benchmark offline throughput (w/ FP8)
176-
timeout-minutes: 10
171+
timeout-minutes: 15
177172
run: |
178173
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
179174
@@ -197,27 +192,27 @@ jobs:
197192
run: bash scripts/amd_ci_install_dependency.sh
198193

199194
- name: Benchmark dummy grok (TP=2)
200-
timeout-minutes: 20
195+
timeout-minutes: 30
201196
run: |
202197
bash scripts/amd_ci_exec.sh python3 models/test_dummy_grok_models.py
203198
204199
- name: Benchmark single latency (TP=2)
205-
timeout-minutes: 20
200+
timeout-minutes: 25
206201
run: |
207202
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
208203
209204
- name: Benchmark single latency + torch.compile (TP=2)
210-
timeout-minutes: 20
205+
timeout-minutes: 25
211206
run: |
212207
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
213208
214209
- name: Benchmark offline throughput (TP=2)
215-
timeout-minutes: 20
210+
timeout-minutes: 25
216211
run: |
217212
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
218213
219214
- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
220-
timeout-minutes: 20
215+
timeout-minutes: 25
221216
run: |
222217
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
223218
@@ -241,7 +236,7 @@ jobs:
241236
run: bash scripts/amd_ci_install_dependency.sh
242237

243238
- name: Run test
244-
timeout-minutes: 30
239+
timeout-minutes: 40
245240
run: |
246241
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd
247242
@@ -265,7 +260,7 @@ jobs:
265260
run: bash scripts/amd_ci_install_dependency.sh
266261

267262
- name: Run test
268-
timeout-minutes: 30
263+
timeout-minutes: 40
269264
run: |
270265
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
271266
@@ -274,7 +269,7 @@ jobs:
274269
github.event.pull_request.draft == false
275270
strategy:
276271
matrix:
277-
runner: [linux-mi300-gpu-8, linux-mi325-gpu-8]
272+
runner: [linux-mi300-gpu-8]
278273
runs-on: ${{matrix.runner}}
279274
steps:
280275
- name: Checkout code
@@ -289,7 +284,7 @@ jobs:
289284
run: bash scripts/amd_ci_install_dependency.sh
290285

291286
- name: Run test
292-
timeout-minutes: 30
287+
timeout-minutes: 40
293288
run: |
294289
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd
295290

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ repos:
2323
hooks:
2424
- id: isort
2525
- repo: https://github.com/astral-sh/ruff-pre-commit
26-
rev: v0.11.2
26+
rev: v0.11.7
2727
hooks:
2828
- id: ruff
2929
args: [--select=F401, --fixable=F401]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![license](https://img.shields.io/github/license/sgl-project/sglang.svg)](https://github.com/sgl-project/sglang/tree/main/LICENSE)
77
[![issue resolution](https://img.shields.io/github/issues-closed-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
88
[![open issues](https://img.shields.io/github/issues-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
9-
[![](https://img.shields.io/badge/Gurubase-(experimental)-006BFF)](https://gurubase.io/g/sglang)
9+
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/sgl-project/sglang)
1010

1111
</div>
1212

benchmark/deepseek_v3/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
3333

3434
```bash
3535
# Installation
36-
pip install "sglang[all]>=0.4.6.post4"
36+
pip install "sglang[all]>=0.4.6.post5"
3737

3838
# Launch
3939
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code

docker/Dockerfile.blackwell

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WORKDIR /sgl-workspace
66

77
RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
88

9-
RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.3/sgl_kernel-0.1.3+cu128-cp39-abi3-manylinux2014_x86_64.whl \
9+
RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.4/sgl_kernel-0.1.4+cu128-cp39-abi3-manylinux2014_x86_64.whl \
1010
&& pip3 install setuptools==75.0.0 wheel==0.41.0 scikit-build-core
1111

1212
RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \

docker/Dockerfile.rocm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Usage (to build SGLang ROCm docker image):
2-
# docker build --build-arg SGL_BRANCH=v0.4.6.post4 -t v0.4.6.post4-rocm630 -f Dockerfile.rocm .
2+
# docker build --build-arg SGL_BRANCH=v0.4.6.post5 -t v0.4.6.post5-rocm630 -f Dockerfile.rocm .
33

44
# default base image
55
ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"
@@ -18,7 +18,7 @@ ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
1818

1919

2020
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
21-
ARG AITER_COMMIT="v0.1.1"
21+
ARG AITER_COMMIT="v0.1.2"
2222

2323
RUN git clone ${SGL_REPO} \
2424
&& cd sglang \

0 commit comments

Comments
 (0)