Skip to content

Commit f89f8c7

Browse files
committed
Fix bugs (#4708)
* Fix bugs * Optimize
1 parent ddacf07 commit f89f8c7

File tree

3 files changed

+27
-20
lines changed

3 files changed

+27
-20
lines changed

deploy/genai_vllm_server_docker/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ RUN python -m pip install "paddlex${PADDLEX_VERSION}"
1313

1414
ARG BUILD_FOR_SM120=false
1515
RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
16-
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
16+
python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
1717
else \
18-
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
18+
python -m pip install torch==2.8.0 https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
1919
fi \
2020
&& paddlex --install genai-vllm-server
2121

deploy/genai_vllm_server_docker/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ while [[ $# -gt 0 ]]; do
2121
shift
2222
;;
2323
*)
24-
echo "Unknown option: $1"
25-
exit 1
24+
echo "Unknown option: $1" >&2
25+
exit 2
2626
;;
2727
esac
2828
done

paddlex/paddlex_cli.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@
3636
is_dep_available,
3737
is_paddle2onnx_plugin_available,
3838
)
39-
from .utils.env import get_gpu_compute_capability, get_paddle_cuda_version
39+
from .utils.env import (
40+
get_gpu_compute_capability,
41+
get_paddle_cuda_version,
42+
is_cuda_available,
43+
)
4044
from .utils.install import install_packages, uninstall_packages
4145
from .utils.interactive_get_pipeline import interactive_get_pipeline
4246
from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
@@ -365,21 +369,24 @@ def _install_genai_deps(plugin_types):
365369

366370
for plugin_type in plugin_types:
367371
if "vllm" in plugin_type or "sglang" in plugin_type:
368-
try:
369-
install_packages(["wheel"], constraints="required")
370-
cap = get_gpu_compute_capability()
371-
if cap >= (12, 0):
372-
install_packages(
373-
["xformers", "flash-attn == 2.8.3"], constraints="required"
374-
)
375-
else:
376-
install_packages(
377-
["xformers", "flash-attn == 2.8.2"], constraints="required"
378-
)
379-
except Exception:
380-
logging.error("Installation failed", exc_info=True)
381-
sys.exit(1)
382-
break
372+
install_packages(["xformers"], constraints="required")
373+
if is_cuda_available():
374+
try:
375+
install_packages(["wheel"], constraints="required")
376+
cap = get_gpu_compute_capability()
377+
assert cap is not None
378+
if cap >= (12, 0):
379+
install_packages(
380+
["flash-attn == 2.8.3"], constraints="required"
381+
)
382+
else:
383+
install_packages(
384+
["flash-attn == 2.8.2"], constraints="required"
385+
)
386+
except Exception:
387+
logging.error("Installation failed", exc_info=True)
388+
sys.exit(1)
389+
break
383390

384391
logging.info(
385392
"Successfully installed the generative AI plugin"

0 commit comments

Comments
 (0)