Merge branch 'uv' of https://github.com/huggingface/optimum-benchmark into uv

IlyasMoutawwakil · IlyasMoutawwakil · commit 7f4856621e7b · 2025-08-19T20:38:25.000+02:00
diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml
@@ -38,10 +38,6 @@ jobs:
     runs-on:
       group: aws-g5-4xlarge-plus
 
-    container:
-      image: vllm/vllm-openai:latest
-      options: --ipc host --gpus all --entrypoint /bin/bash
-
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -71,10 +67,6 @@ jobs:
     runs-on:
       group: aws-g5-12xlarge-plus
 
-    container:
-      image: vllm/vllm-openai:latest
-      options: --ipc host --gpus all --entrypoint /bin/bash
-
     steps:
       - name: Checkout
         uses: actions/checkout@v4
diff --git a/Makefile b/Makefile
@@ -292,21 +292,19 @@ test-cli-cuda-py-txi:
 	uv sync --dev --extra py-txi
 	FORCE_SEQUENTIAL=1 uv run pytest tests/test_cli.py -s -k "cli and cuda and (tgi or tei or txi)"
 
+test-cli-cuda-vllm-single:
+	uv sync --dev --extra vllm
+	FORCE_SEQUENTIAL=1 uv run pytest tests/test_cli.py -s -k "cli and cuda and vllm and not (tp or pp)"
+
+test-cli-cuda-vllm-multi:
+	uv sync --dev --extra vllm
+	FORCE_SEQUENTIAL=1 uv run pytest tests/test_cli.py -s -k "cli and cuda and vllm and (tp or pp)"
+
 test-cli-cuda-onnxruntime:
 	uv sync --dev --extra onnxruntime-gpu
 	uv run pytest tests/test_cli.py -s -k "cli and cuda and onnxruntime"
 
 #### non-uv compatible
-test-cli-cuda-vllm-single:
-	pip install uv --upgrade
-	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,vllm]
-	FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -s -k "cli and cuda and vllm and not (tp or pp)"
-
-test-cli-cuda-vllm-multi:
-	pip install uv --upgrade
-	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,vllm]
-	FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -s -k "cli and cuda and vllm and (tp or pp)"
-
 test-cli-cuda-tensorrt-llm-single:
 	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,tensorrt-llm]
 	FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
@@ -328,21 +326,19 @@ test-cli-cuda-py-txi-examples:
 	uv sync --dev --extra py-txi
 	FORCE_SEQUENTIAL=1 uv run pytest tests/test_examples.py -s -k "cli and cuda and (tgi or tei or txi)"
 
+test-cli-cuda-vllm-single-examples:
+	uv sync --dev --extra vllm
+	FORCE_SEQUENTIAL=1 uv run pytest tests/test_examples.py -s -k "cli and cuda and vllm and not (tp or pp)"
+
+test-cli-cuda-vllm-multi-examples:
+	uv sync --dev --extra vllm
+	FORCE_SEQUENTIAL=1 uv run pytest tests/test_examples.py -s -k "cli and cuda and vllm and (tp or pp)"
+
 test-cli-cuda-onnxruntime-examples:
 	uv sync --dev --extra onnxruntime-gpu
 	uv run pytest tests/test_examples.py -s -k "cli and cuda and onnxruntime"
 
 #### non-uv compatible
-test-cli-cuda-vllm-single-examples:
-	pip install uv --upgrade
-	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,vllm]
-	FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -s -k "cli and cuda and vllm and not (tp or pp)"
-
-test-cli-cuda-vllm-multi-examples:
-	pip install uv --upgrade
-	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,vllm]
-	FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -s -k "cli and cuda and vllm and (tp or pp)"
-
 test-cli-cuda-tensorrt-llm-single-examples:
 	pip install uv --upgrade
 	UV_SYSTEM_PYTHON=1 uv pip install -e .[dev,tensorrt-llm]
diff --git a/pyproject.toml b/pyproject.toml
@@ -100,11 +100,9 @@ onnxruntime-gpu = ["optimum[onnxruntime-gpu]>=1.27.0"]
 tensorrt-llm = [
     "huggingface-hub<0.26.0; extra != 'openvino' and extra != 'onnxruntime' and extra != 'onnxruntime-gpu' and extra != 'ipex' and extra != 'vllm' and extra != 'llama-cpp' and extra != 'py-txi'",
 ]
-vllm = [
-    "vllm; extra != 'openvino' and extra != 'onnxruntime' and extra != 'onnxruntime-gpu' and extra != 'ipex' and extra != 'tensorrt-llm' and extra != 'llama-cpp' and extra != 'py-txi'",
-]
 llama-cpp = ["llama-cpp-python"]
 py-txi = ["py-txi"]
+vllm = ["vllm"]
 # optional dependencies
 sentence-transformers = ["sentence-transformers"]
 gptqmodel = ["gptqmodel", "optimum"]
diff --git a/uv.lock b/uv.lock