volcengine
diff --git a/‎.github/workflows/e2e_ppo_trainer.yml
Lines changed: 6 additions & 35 deletions b/‎.github/workflows/e2e_ppo_trainer.yml
Lines changed: 6 additions & 35 deletions
diff --git a/‎.github/workflows/e2e_ppo_trainer_megatron.yml
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/e2e_ppo_trainer_megatron.yml
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/sgl.yml
Lines changed: 2 additions & 6 deletions b/‎.github/workflows/sgl.yml
Lines changed: 2 additions & 6 deletions
diff --git a/‎docker/Dockerfile.rocm
Lines changed: 1 addition & 1 deletion b/‎docker/Dockerfile.rocm
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile.sglang
Lines changed: 2 additions & 2 deletions b/‎docker/Dockerfile.sglang
Lines changed: 2 additions & 2 deletions
diff --git a/‎docker/Dockerfile.vllm.sglang.megatron
Lines changed: 2 additions & 2 deletions b/‎docker/Dockerfile.vllm.sglang.megatron
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/amd_tutorial/amd_build_dockerfile_page.rst
Lines changed: 1 addition & 1 deletion b/‎docs/amd_tutorial/amd_build_dockerfile_page.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sglang_multiturn/multiturn.rst
Lines changed: 2 additions & 2 deletions b/‎docs/sglang_multiturn/multiturn.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/start/install.rst
Lines changed: 2 additions & 2 deletions b/‎docs/start/install.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/workers/sglang_worker.rst
Lines changed: 5 additions & 5 deletions b/‎docs/workers/sglang_worker.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml
Lines changed: 1 addition & 1 deletion b/‎examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sglang_multiturn/config/gsm8k_multiturn_megatron_grpo.yaml
Lines changed: 1 addition & 1 deletion b/‎examples/sglang_multiturn/config/gsm8k_multiturn_megatron_grpo.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
Lines changed: 2 additions & 2 deletions b/‎examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh
Lines changed: 1 addition & 1 deletion b/‎examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sglang_multiturn/run_qwen2.5-3b_megatron_gsm8k_multiturn.sh
Lines changed: 2 additions & 2 deletions b/‎examples/sglang_multiturn/run_qwen2.5-3b_megatron_gsm8k_multiturn.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎requirements_sglang.txt
Lines changed: 2 additions & 2 deletions b/‎requirements_sglang.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎setup.py
Lines changed: 6 additions & 1 deletion b/‎setup.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
Lines changed: 2 additions & 2 deletions b/‎tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/e2e/run_ppo_trainer_megatron.sh
Lines changed: 1 addition & 1 deletion b/‎tests/e2e/run_ppo_trainer_megatron.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/workers/rollout/test_async_sglang_server.py
Lines changed: 1 addition & 1 deletion b/‎tests/workers/rollout/test_async_sglang_server.py
Lines changed: 1 addition & 1 deletion
@@ -204,7 +204,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -222,7 +222,7 @@ jobs:
           ray stop --force
           ENGINE=sglang bash tests/e2e/ppo_trainer/run_function_reward.sh
 
-  e2e_ppo_trainer_sglang_async:
+  e2e_ppo_trainer_sglang_multiturn_with_tool:
     runs-on: [L20x8]
     needs: pre_commit_for_ppo
     timeout-minutes: 40 # Increase this timeout value as needed
@@ -233,36 +233,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
-      options: --gpus all --shm-size=10g
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-      - name: Install the current repository
-        run: |
-          pip3 install -e .[test,gpu,sglang] --no-deps
-      - name: Prepare gsm8k dataset
-        run: |
-          ray stop --force
-          python3 examples/data_preprocess/gsm8k.py
-      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang async
-        run: |
-          ray stop --force
-          ENGINE=sglang_async bash tests/e2e/ppo_trainer/run_function_reward.sh
-
-  e2e_ppo_trainer_sglang_async_with_tool:
-    runs-on: [L20x8]
-    needs: pre_commit_for_ppo
-    timeout-minutes: 40 # Increase this timeout value as needed
-    env:
-      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
-      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
-      NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
-      HF_ENDPOINT: "https://hf-mirror.com"
-      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
-    container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -275,7 +246,7 @@ jobs:
         run: |
           ray stop --force
           python3 examples/data_preprocess/gsm8k_multiturn_w_tool.py --local_dir $HOME/data/gsm8k_verl_sgl_multi_turn_preprocessed
-      - name: Running GSM8K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang async
+      - name: Running GSM8K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang
         run: |
           ray stop --force
           bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
@@ -295,7 +266,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=50g # Visual dataloader requires large memory
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -367,7 +338,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=50g # Visual dataloader requires large memory
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -50,7 +50,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -92,7 +92,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -134,7 +134,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -167,7 +167,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -206,7 +206,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -56,7 +56,7 @@ jobs:
       HF_HUB_ENABLE_HF_TRANSFER: 1
       SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
     container:
-      image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
+      image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -73,11 +73,7 @@ jobs:
       - name: Test the latest SGLang
         run: |
           cd tests/workers/rollout
-          torchrun --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_sglang_spmd.py
-      - name: Test the latest SGLang async
-        run: |
-          cd tests/workers/rollout
-          torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_spmd.py
+          torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_spmd.py
       - name: Test the latest SGLang Rollout async with tool
         run: |
           cd tests/workers/rollout
 
@@ -6,7 +6,7 @@
 # Support - Traing: fsdp; Inference: vllm
 # FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 # Support - Traing: fsdp; Inference: vllm, sglang
-FROM lmsysorg/sglang:v0.4.6.post4-rocm630
+FROM lmsysorg/sglang:v0.4.6.post5-rocm630
 
 # Set working directory
 # WORKDIR $PWD/app
 
@@ -36,8 +36,8 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
     pip config set global.extra-index-url "${PIP_INDEX}" && \
     python -m pip install --upgrade pip
 
-# Install sglang-0.4.6.post4 and torch-memory-saver
-RUN pip install "sglang[all]==0.4.6.post4" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
+# Install sglang-0.4.6.post5 and torch-memory-saver
+RUN pip uninstall -y cuda-python && pip install "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
 
 # Install torch-2.6.0
 RUN pip install --no-cache-dir torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
 
@@ -56,12 +56,12 @@ RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvi
     update-alternatives --set cuda /usr/local/cuda-12.4 && \
     rm -rf /usr/local/cuda-12.6
 
-# Install torch-2.6.0+cu124 + vllm-0.8.5.post1 + sglang-0.4.6.post4
+# Install torch-2.6.0+cu124 + vllm-0.8.5.post1 + sglang-0.4.6.post5
 # torch-2.6.0+cu124: cxx11abi=False
 # torch-2.6.0+cu126: cxx11abi=True
 # see https://github.com/flashinfer-ai/flashinfer/issues/911
 # Install sglang-0.4.6.post1 and torch-memory-saver
-RUN pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
+RUN pip install "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
 
 RUN pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata
 
 
@@ -22,7 +22,7 @@ docker/Dockerfile.rocm
     # Support - Traing: fsdp; Inference: vllm
     # FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
     # Support - Traing: fsdp; Inference: vllm, sglang
-    FROM lmsysorg/sglang:v0.4.6.post4-rocm630
+    FROM lmsysorg/sglang:v0.4.6.post5-rocm630
 
     # Set working directory
     # WORKDIR $PWD/app
 
@@ -11,9 +11,9 @@ To enable multi-turn rollout, make sure to configure the following fields in you
     actor_rollout_ref: 
         rollout: 
             multi_turn: True
-            name: "sglang_async"
+            name: "sglang"
 
-These configuration activates the sglang_async engine for multi-turn interaction during rollout.
+These configuration activates the sglang engine for multi-turn interaction during rollout.
 
 Custom Tool Configuration
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -42,7 +42,7 @@ For vLLM with Megatron or FSDP, please use the stable version of image ``whatcan
 
 For latest vLLM with FSDP, please refer to ``hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0``.
 
-For SGLang with FSDP, please use ``ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4`` which is provided by SGLang RL Group.
+For SGLang with FSDP, please use ``ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post5`` which is provided by SGLang RL Group.
 
 See files under ``docker/`` for NGC-based image or if you want to build your own.
 
@@ -79,7 +79,7 @@ See files under ``docker/`` for NGC-based image or if you want to build your own
     - **Flash Attenttion**: 2.7.4.post1
     - **Flash Infer**: 0.2.2.post1
     - **vLLM**: 0.8.5
-    - **SGLang**: 0.4.6.post4
+    - **SGLang**: 0.4.6.post5
     - **Megatron-LM**: core_v0.12.0
     - **TransformerEngine**: 2.3
     - **Ray**: 2.44.1
 
@@ -21,7 +21,7 @@ Please always follow the following command to install SGLang with verl.
 .. code-block:: bash
     
     pip install --upgrade pip
-    # Currently 0.4.6.post4, subject to updates at any time, please refer to the latest version specified in `setup.py`
+    # Currently 0.4.6.post5, subject to updates at any time, please refer to the latest version specified in `setup.py`
     pip install -e ".[sglang]"
 
 You can check the following dependencies are in your environment:
@@ -31,8 +31,8 @@ You can check the following dependencies are in your environment:
     - **PyTorch**: 2.6.0+cu124
     - **CUDA**: 12.4
     - **flashinfer-python**: 0.2.5+cu124torch2.6
-    - **sgLang**: 0.4.6.post4
-    - **sgl-kernel**: 0.1.2.post1
+    - **sgLang**: 0.4.6.post5
+    - **sgl-kernel**: 0.1.4
 
 Using SGLang as the Inference Backend for PPO Training on a Single Machine
 -------------------------------------------------------------------------
@@ -87,7 +87,7 @@ Why export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK?
 
 1. ``verl`` initializes a ``SGLangRollout`` module during rollout, which is used to evaluate/generate samples.
 
-2. ``SGLangRollout`` will initialize ``VerlEngine``, and further initialize a ``torch.distributed.DeviceMesh``, used to support Tensor Parallel (TP).
+2. ``SGLangRollout`` will initialize ``Engine``, and further initialize a ``torch.distributed.DeviceMesh``, used to support Tensor Parallel (TP).
 
 3. ``DeviceMesh.init()`` internally checks the free GPU memory of all participating devices. If the difference is too large (more than ~10%), it directly reports an error to avoid initialization failures or deadlocks.
 
@@ -111,7 +111,7 @@ Early workers already use up GPU memory → late workers still have empty memory
 
 **3. SGLang's TP init uses "all-device broadcast", but there's no uniform release timing**
 
-Although ``SGLangRollout`` may only involve subset of GPUs, its ``VerlEngine`` initialization calls ``torch.distributed.init_process_group()`` and broadcasts weights, so:
+Although ``SGLangRollout`` may only involve subset of GPUs, its ``Engine`` initialization calls ``torch.distributed.init_process_group()`` and broadcasts weights, so:
 
 - Non-rollout GPUs also join the communication.
 - Later on, ``DeviceMesh`` init will fail due to "inconsistent memory".
 
@@ -15,7 +15,7 @@ data:
 actor_rollout_ref:
   hybrid_engine: True
   rollout:
-    name: sglang_async
+    name: sglang
     multi_turn:
       enable: True
       max_turns: 5
 
@@ -15,7 +15,7 @@ data:
 actor_rollout_ref:
   hybrid_engine: True
   rollout:
-    name: sglang_async
+    name: sglang
     multi_turn:
       enable: True
       max_turns: 5
 
@@ -32,7 +32,7 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
-    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.name=sglang \
     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
     actor_rollout_ref.rollout.n=16 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
@@ -41,7 +41,7 @@ python3 -m verl.trainer.main_ppo \
     trainer.critic_warmup=0 \
     trainer.logger=['console','wandb'] \
     trainer.project_name='gsm8k_async_rl' \
-    trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16' \
+    trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-verify-n16' \
     trainer.n_gpus_per_node=8 \
     trainer.nnodes=1 \
     trainer.save_freq=-1 \
 
@@ -32,7 +32,7 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
-    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.name=sglang \
     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
     actor_rollout_ref.rollout.n=16 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
 
@@ -45,15 +45,15 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
-    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.name=sglang \
     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
     actor_rollout_ref.rollout.n=8 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
     algorithm.use_kl_in_reward=False \
     trainer.critic_warmup=0 \
     trainer.logger=['console','wandb'] \
     trainer.project_name='gsm8k_async_rl' \
-    trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-n8-mcore-v2505201745_seed42' \
+    trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-n8-mcore-v2505201745_seed42' \
     trainer.n_gpus_per_node=8 \
     trainer.nnodes=1 \
     trainer.save_freq=-1 \
 
@@ -17,6 +17,6 @@ torchdata
 torchvision
 transformers
 wandb
-sglang[all]==0.4.6.post4
+sglang[all]==0.4.6.post5
 torch-memory-saver>=0.0.5
-huggingface_hub
+huggingface_hub
@@ -49,7 +49,12 @@
 GPU_REQUIRES = ["liger-kernel", "flash-attn"]
 MATH_REQUIRES = ["math-verify"]  # Add math-verify as an optional dependency
 VLLM_REQUIRES = ["tensordict<=0.6.2", "vllm<=0.8.5"]
-SGLANG_REQUIRES = ["tensordict<=0.6.2", "sglang[srt,openai]==0.4.6.post4", "torch-memory-saver>=0.0.5", "torch==2.6.0"]
+SGLANG_REQUIRES = [
+    "tensordict<=0.6.2",
+    "sglang[srt,openai]==0.4.6.post5",
+    "torch-memory-saver>=0.0.5",
+    "torch==2.6.0",
+]
 
 extras_require = {
     "test": TEST_REQUIRES,
 
@@ -36,7 +36,7 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
-    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.name=sglang \
     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
     actor_rollout_ref.rollout.n=8 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
@@ -46,7 +46,7 @@ python3 -m verl.trainer.main_ppo \
     trainer.critic_warmup=0 \
     trainer.logger=['console'] \
     trainer.project_name='gsm8k_async_rl' \
-    trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-$FSDP_STRATEGY-rebased-0427-verify-n16 \
+    trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-$FSDP_STRATEGY-rebased-0427-verify-n16 \
     trainer.n_gpus_per_node=8 \
     trainer.nnodes=1 \
     trainer.save_freq=-1 \
 
@@ -78,7 +78,7 @@ if [ $SKIP_SAVE_HF_MODEL -eq 1 ]; then
     CHECKPOINT_CONTENTS=['model','optimizer','extra']
 fi
 
-ENGINES=("vllm" "sglang_async")
+ENGINES=("vllm" "sglang")
 
 exp_name="$(basename "${MODEL_ID,,}")-megatron-gsm8k-minimal"
 
 
@@ -1,3 +1,4 @@
+# Copyright 2023-2024 SGLang Team
 # Copyright 2025 Bytedance Ltd. and/or its affiliates
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +21,6 @@
 @patch.dict(
     "sys.modules",
     {
-        "verl.workers.rollout.sglang_rollout.async_sglang_rollout": MagicMock(AsyncSGLangRollout=MagicMock()),
         "verl.workers.rollout.sglang_rollout.sglang_rollout": MagicMock(SGLangRollout=MagicMock()),
     },
 )
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Copyright 2023-2024 SGLang Team`
`1`	`2`	`# Copyright 2025 Bytedance Ltd. and/or its affiliates`
`2`	`3`	`#`
`3`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`@@ -20,7 +21,6 @@`
`20`	`21`	`@patch.dict(`
`21`	`22`	`"sys.modules",`
`22`	`23`	`{`
`23`		`- "verl.workers.rollout.sglang_rollout.async_sglang_rollout": MagicMock(AsyncSGLangRollout=MagicMock()),`
`24`	`24`	`"verl.workers.rollout.sglang_rollout.sglang_rollout": MagicMock(SGLangRollout=MagicMock()),`
`25`	`25`	`},`
`26`	`26`	`)`