[Feat] Support vLLM deployment on DCUs (#4710)

Bobholamovic · Bobholamovic · commit 54baddbc6721 · 2025-11-10T12:31:59.000Z
* Support vLLM deployment on DCUs

* Fix

* Fix DCU check
diff --git a/paddlex/inference/genai/backends/vllm.py b/paddlex/inference/genai/backends/vllm.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from ....utils import logging
 from ....utils.deps import is_genai_engine_plugin_available, require_genai_engine_plugin
 from ..configs.utils import (
     backend_config_to_args,
@@ -61,6 +62,16 @@ def run_vllm_server(host, port, model_name, model_dir, config, chat_template_pat
         },
     )
 
+    import torch
+
+    if torch.version.hip is not None and torch.version.cuda is None:
+        # For DCU
+        if "api-server-count" in config:
+            logging.warning(
+                "Key 'api-server-count' will be popped as it is not supported"
+            )
+            config.pop("api-server-count")
+
     args = backend_config_to_args(config)
     args = parser.parse_args(args)
     validate_parsed_serve_args(args)
diff --git a/paddlex/utils/env.py b/paddlex/utils/env.py
@@ -65,13 +65,18 @@ def is_cuda_available():
         import paddle.device
 
         # TODO: Check runtime availability
-        return paddle.device.is_compiled_with_cuda()
+        return (
+            paddle.device.is_compiled_with_cuda() and not paddle.is_compiled_with_rocm()
+        )
     else:
         # If Paddle is unavailable, check GPU availability using PyTorch API.
         require_deps("torch")
+
         import torch.cuda
+        import torch.version
 
-        return torch.cuda.is_available()
+        # Distinguish GPUs and DCUs by checking `torch.version.cuda`
+        return torch.cuda.is_available() and torch.version.cuda
 
 
 def get_gpu_compute_capability():
@@ -85,6 +90,7 @@ def get_gpu_compute_capability():
         else:
             # If Paddle is unavailable, retrieve GPU compute capability from PyTorch instead.
             require_deps("torch")
+
             import torch.cuda
 
             cap = torch.cuda.get_device_capability()