Skip to content

Commit af60150

Browse files
authored
Added vllm-gpu markers for Nvidia and AMD GPU testcases (#1039)
1 parent 2283713 commit af60150

14 files changed

Lines changed: 55 additions & 13 deletions

pytest.ini

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ markers =
3333
kueue: Mark tests which are testing Kueue
3434
model_server_gpu: Mark tests which are testing model server with GPU resources
3535
gpu: Mark tests which require GPU resources
36-
multigpu: Mark tests which require multiple GPU resources
36+
vllm_nvidia_single_gpu: Mark tests which require GPU resources for VLLM NVIDIA deployment
37+
vllm_nvidia_multi_gpu: Mark tests which require multiple GPU resources for VLLM NVIDIA deployment
38+
vllm_amd_gpu: Mark tests which require GPU resources for VLLM AMD deployment
3739
multinode: Mark tests which require multiple nodes
3840
keda: Mark tests which are testing KEDA scaling
3941
llmd_cpu: Mark tests which are testing LLMD (LLM Deployment) with CPU resources

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_elyza_japanese_llama_2_7b_instruct.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_elyza_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_2b_instruct_preview_4k_r240917a.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
1212

1313

14+
@pytest.mark.vllm_nvidia_single_gpu
15+
@pytest.mark.vllm_amd_gpu
1416
@pytest.mark.parametrize(
1517
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
1618
[
@@ -46,7 +48,8 @@ def test_deploy_model_inference(self, vllm_inference_service, vllm_pod_resource,
4648
assert completion_responses == response_snapshot
4749

4850

49-
@pytest.mark.multigpu
51+
@pytest.mark.vllm_nvidia_multi_gpu
52+
@pytest.mark.vllm_amd_gpu
5053
@pytest.mark.parametrize(
5154
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
5255
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_redhat_lab.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_granite_lab_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_starter.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_granite_starter_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama31_8B_instruct.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_llama31_instruct_8b_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama3_8B_instruct.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_llama3_instruct_8b_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama_2_13b_chat.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3333

3434

35+
@pytest.mark.vllm_nvidia_single_gpu
36+
@pytest.mark.vllm_amd_gpu
3537
@pytest.mark.parametrize(
3638
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3739
[
@@ -78,7 +80,8 @@ def test_llamachat_raw_simple_tgis_model_inference(
7880
)
7981

8082

81-
@pytest.mark.multigpu
83+
@pytest.mark.vllm_nvidia_multi_gpu
84+
@pytest.mark.vllm_amd_gpu
8285
@pytest.mark.parametrize(
8386
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8487
[

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_merlinite_7b_lab.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
3232

3333

34+
@pytest.mark.vllm_nvidia_single_gpu
35+
@pytest.mark.vllm_amd_gpu
3436
@pytest.mark.parametrize(
3537
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
3638
[
@@ -79,7 +81,8 @@ def test_merlinite_lab_7b_raw_simple_tgis_model_inference(
7981
)
8082

8183

82-
@pytest.mark.multigpu
84+
@pytest.mark.vllm_nvidia_multi_gpu
85+
@pytest.mark.vllm_amd_gpu
8386
@pytest.mark.parametrize(
8487
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
8588
[

tests/model_serving/model_runtime/vllm/multimodal/test_granite_31_2b_vision.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")
2222

2323

24+
@pytest.mark.vllm_nvidia_single_gpu
25+
@pytest.mark.vllm_amd_gpu
2426
@pytest.mark.parametrize(
2527
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
2628
[
@@ -72,7 +74,8 @@ def test_multi_image_query_inference(
7274
validate_inference_output(model_info, chat_responses, completion_responses, response_snapshot=response_snapshot)
7375

7476

75-
@pytest.mark.multigpu
77+
@pytest.mark.vllm_nvidia_multi_gpu
78+
@pytest.mark.vllm_amd_gpu
7679
@pytest.mark.parametrize(
7780
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
7881
[

0 commit comments

Comments
 (0)