sgl-project
diff --git a/‎.github/workflows/lint.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/lint.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pr-test-amd.yml
Lines changed: 19 additions & 24 deletions b/‎.github/workflows/pr-test-amd.yml
Lines changed: 19 additions & 24 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/deepseek_v3/README.md
Lines changed: 1 addition & 1 deletion b/‎benchmark/deepseek_v3/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile.blackwell
Lines changed: 1 addition & 1 deletion b/‎docker/Dockerfile.blackwell
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile.rocm
Lines changed: 2 additions & 2 deletions b/‎docker/Dockerfile.rocm
Lines changed: 2 additions & 2 deletions
@@ -1,6 +1,6 @@
 name: Lint
 
-on: [pull_request]
+on: [ pull_request ]
 
 jobs:
   lint:
@@ -19,4 +19,4 @@ jobs:
           pre-commit install
 
       - name: Linting
-        run: pre-commit run --all-files
+        run: pre-commit run --all-files --show-diff-on-failure
@@ -44,7 +44,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Evaluate Accuracy
-        timeout-minutes: 20
+        timeout-minutes: 30
         run: |
           bash scripts/amd_ci_exec.sh python3 test_eval_accuracy_large.py
           bash scripts/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
@@ -70,7 +70,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Evaluate accuracy (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 30
         run: |
           bash scripts/amd_ci_exec.sh python3 test_moe_eval_accuracy_large.py
 
@@ -94,7 +94,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: MLA TEST
-        timeout-minutes: 20
+        timeout-minutes: 30
         run: |
           bash scripts/amd_ci_exec.sh python3 test_mla.py
 
@@ -118,31 +118,26 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Benchmark single latency
-        timeout-minutes: 10
+        timeout-minutes: 20
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
 
       - name: Benchmark online latency
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
 
       - name: Benchmark offline throughput
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
 
       - name: Benchmark offline throughput (Non-streaming, small batch size)
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
 
-      - name: Benchmark online latency (EAGLE)
-        timeout-minutes: 10
-        run: |
-          bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
-
   performance-test-1-gpu-part-2-amd:
     if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
         github.event.pull_request.draft == false
@@ -163,17 +158,17 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Benchmark offline throughput (w/o RadixAttention)
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
 
       - name: Benchmark offline throughput (w/ Triton)
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
 
       - name: Benchmark offline throughput (w/ FP8)
-        timeout-minutes: 10
+        timeout-minutes: 15
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
 
@@ -197,27 +192,27 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Benchmark dummy grok (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 30
         run: |
           bash scripts/amd_ci_exec.sh python3 models/test_dummy_grok_models.py
 
       - name: Benchmark single latency (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 25
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
 
       - name: Benchmark single latency + torch.compile (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 25
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
 
       - name: Benchmark offline throughput (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 25
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
 
       - name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
-        timeout-minutes: 20
+        timeout-minutes: 25
         run: |
           bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
 
@@ -241,7 +236,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Run test
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd
 
@@ -265,7 +260,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Run test
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
 
@@ -274,7 +269,7 @@ jobs:
       github.event.pull_request.draft == false
     strategy:
       matrix:
-        runner: [linux-mi300-gpu-8, linux-mi325-gpu-8]
+        runner: [linux-mi300-gpu-8]
     runs-on: ${{matrix.runner}}
     steps:
       - name: Checkout code
@@ -289,7 +284,7 @@ jobs:
         run: bash scripts/amd_ci_install_dependency.sh
 
       - name: Run test
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd
 
 
@@ -23,7 +23,7 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.2
+    rev: v0.11.7
     hooks:
       - id: ruff
         args: [--select=F401, --fixable=F401]
 
@@ -6,7 +6,7 @@
 [![license](https://img.shields.io/github/license/sgl-project/sglang.svg)](https://github.com/sgl-project/sglang/tree/main/LICENSE)
 [![issue resolution](https://img.shields.io/github/issues-closed-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
 [![open issues](https://img.shields.io/github/issues-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
-[![](https://img.shields.io/badge/Gurubase-(experimental)-006BFF)](https://gurubase.io/g/sglang)
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/sgl-project/sglang)
 
 </div>
 
 
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
 
 ```bash
 # Installation
-pip install "sglang[all]>=0.4.6.post4"
+pip install "sglang[all]>=0.4.6.post5"
 
 # Launch
 python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code
 
@@ -6,7 +6,7 @@ WORKDIR /sgl-workspace
 
 RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
 
-RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.3/sgl_kernel-0.1.3+cu128-cp39-abi3-manylinux2014_x86_64.whl \
+RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.4/sgl_kernel-0.1.4+cu128-cp39-abi3-manylinux2014_x86_64.whl \
     && pip3 install setuptools==75.0.0 wheel==0.41.0 scikit-build-core
 
 RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \
 
@@ -1,5 +1,5 @@
 # Usage (to build SGLang ROCm docker image):
-#   docker build --build-arg SGL_BRANCH=v0.4.6.post4 -t v0.4.6.post4-rocm630 -f Dockerfile.rocm .
+#   docker build --build-arg SGL_BRANCH=v0.4.6.post5 -t v0.4.6.post5-rocm630 -f Dockerfile.rocm .
 
 # default base image
 ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"
@@ -18,7 +18,7 @@ ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
 
 
 ARG AITER_REPO="https://github.com/ROCm/aiter.git"
-ARG AITER_COMMIT="v0.1.1"
+ARG AITER_COMMIT="v0.1.2"
 
 RUN git clone ${SGL_REPO} \
     && cd sglang \