Fix bugs (#4708)

Bobholamovic · Bobholamovic · commit f89f8c752e7f · 2025-11-05T16:57:14.000Z
* Fix bugs

* Optimize
diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
@@ -13,9 +13,9 @@ RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
 RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
-        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
+        python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     fi \
     && paddlex --install genai-vllm-server
 
diff --git a/deploy/genai_vllm_server_docker/build.sh b/deploy/genai_vllm_server_docker/build.sh
@@ -21,8 +21,8 @@ while [[ $# -gt 0 ]]; do
             shift
             ;;
         *)
-            echo "Unknown option: $1"
-            exit 1
+            echo "Unknown option: $1" >&2
+            exit 2
             ;;
     esac
 done
diff --git a/paddlex/paddlex_cli.py b/paddlex/paddlex_cli.py
@@ -36,7 +36,11 @@
     is_dep_available,
     is_paddle2onnx_plugin_available,
 )
-from .utils.env import get_gpu_compute_capability, get_paddle_cuda_version
+from .utils.env import (
+    get_gpu_compute_capability,
+    get_paddle_cuda_version,
+    is_cuda_available,
+)
 from .utils.install import install_packages, uninstall_packages
 from .utils.interactive_get_pipeline import interactive_get_pipeline
 from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
@@ -365,21 +369,24 @@ def _install_genai_deps(plugin_types):
 
         for plugin_type in plugin_types:
             if "vllm" in plugin_type or "sglang" in plugin_type:
-                try:
-                    install_packages(["wheel"], constraints="required")
-                    cap = get_gpu_compute_capability()
-                    if cap >= (12, 0):
-                        install_packages(
-                            ["xformers", "flash-attn == 2.8.3"], constraints="required"
-                        )
-                    else:
-                        install_packages(
-                            ["xformers", "flash-attn == 2.8.2"], constraints="required"
-                        )
-                except Exception:
-                    logging.error("Installation failed", exc_info=True)
-                    sys.exit(1)
-                break
+                install_packages(["xformers"], constraints="required")
+                if is_cuda_available():
+                    try:
+                        install_packages(["wheel"], constraints="required")
+                        cap = get_gpu_compute_capability()
+                        assert cap is not None
+                        if cap >= (12, 0):
+                            install_packages(
+                                ["flash-attn == 2.8.3"], constraints="required"
+                            )
+                        else:
+                            install_packages(
+                                ["flash-attn == 2.8.2"], constraints="required"
+                            )
+                    except Exception:
+                        logging.error("Installation failed", exc_info=True)
+                        sys.exit(1)
+                    break
 
         logging.info(
             "Successfully installed the generative AI plugin"