[docker] fix: new images for sgl056 and vllm012 have compatibility issues (#4714)

Begunner · Begunner · web-flow · commit f3a023396d5c · 2025-12-29T19:19:05.000+08:00
### What does this PR do?

&gt; TransformerEngine-v2.8 leads to unexpected crashes. Try to update it
to v2.10.
&gt; Fix other resultant compatibility issues.

---------

Co-authored-by: Begunner &lt;went@bytedance.com&gt;
diff --git a/docker/Dockerfile.stable.sglang b/docker/Dockerfile.stable.sglang
@@ -8,7 +8,7 @@ RUN pip install nvidia-mathdx
 
 RUN MAX_JOBS=128 pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
 
-RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=128 NVTE_BUILD_THREADS_PER_JOB=4 pip3 install --resume-retries 999 --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.8
+RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=128 NVTE_BUILD_THREADS_PER_JOB=4 pip3 install --resume-retries 999 --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.10
 
 RUN pip install --upgrade --no-cache-dir transformers tokenizers
 
diff --git a/docker/Dockerfile.stable.vllm b/docker/Dockerfile.stable.vllm
@@ -42,7 +42,7 @@ RUN pip install nvidia-mathdx
 
 RUN MAX_JOBS=128 pip install -v --disable-pip-version-check --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
 
-RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=128 NVTE_BUILD_THREADS_PER_JOB=4 pip3 install --resume-retries 999 --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.8
+RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=128 NVTE_BUILD_THREADS_PER_JOB=4 pip3 install --resume-retries 999 --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.10
 
 RUN pip install --upgrade transformers tokenizers
 
diff --git a/verl/__init__.py b/verl/__init__.py
@@ -49,6 +49,7 @@
 
     patch_hub()
 
+
 if is_npu_available:
     # Workaround for torch-npu's lack of support for creating nested tensors from NPU tensors.
     #

Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@`
`49`	`49`
`50`	`50`	`patch_hub()`
`51`	`51`
	`52`	`+`
`52`	`53`	`if is_npu_available:`
`53`	`54`	`# Workaround for torch-npu's lack of support for creating nested tensors from NPU tensors.`
`54`	`55`	`#`