From c4905ec7b6ead6775df0a3baa1db89becb448f7f Mon Sep 17 00:00:00 2001 From: Ruodi Lu Date: Wed, 11 Mar 2026 04:43:01 +0000 Subject: [PATCH 1/2] fix perf test cases issue Signed-off-by: Ruodi Lu --- .../integration/test_lists/qa/llm_perf_core.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_lists/qa/llm_perf_core.yml b/tests/integration/test_lists/qa/llm_perf_core.yml index 2095ccb919e..6230d235bfa 100644 --- a/tests/integration/test_lists/qa/llm_perf_core.yml +++ b/tests/integration/test_lists/qa/llm_perf_core.yml @@ -5,7 +5,8 @@ llm_perf_core: # =============================================================================== # 1: All GPUs common tests(L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases) # 2: L20, L40S, H100, H20, H200 -# 3: L40S, H100, H20, H200 +# 3: L40S, H100, H20, H200 (4 GPUs) +# 3b: L40S, H100, H20, H200 (8 GPUs) # 4: H100, H20, H200 test cases # 5: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases # 6: GB200, B200, B300, GB300, RTX6000-Server test cases @@ -136,6 +137,17 @@ llm_perf_core: - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500-gpus:4] - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:4] - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:4] #llama_v3.3_70b_instruct_fp8 + + +# 3b: L40S, H100, H20, H200 (8 GPUs) +- condition: + ranges: + system_gpu_count: + gte: 8 + compute_capability: + gt: 8.0 + lte: 9.0 + tests: - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-streaming-float8-input_output_len:512,32-gpus:8] - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:8] - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,200-gpus:8] @@ -185,7 +197,6 @@ llm_perf_core: - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_audio-bench-pytorch-bfloat16-input_output_len:1000,1000-loras:1-con:250] - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_image-bench-pytorch-bfloat16-input_output_len:1000,1000-loras:1-con:250] - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:128,128] - - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] # 6: GB200, B200, B300, GB300, RTX6000-Server test cases @@ -381,6 +392,8 @@ llm_perf_core: lte: 12.0 tests: - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8] + # qwen3_235b_a22b_fp8 + - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] # 12: RTX-6000D, RTX-6000 Server test cases From d3be4bbd0478116cc11bebf01dc58a0ee08d0775 Mon Sep 17 00:00:00 2001 From: ruodil <200874449+ruodil@users.noreply.github.com> Date: Thu, 12 Mar 2026 10:35:06 +0800 Subject: [PATCH 2/2] Update tests/integration/test_lists/qa/llm_perf_core.yml Co-authored-by: yufeiwu-nv <230315618+yufeiwu-nv@users.noreply.github.com> Signed-off-by: ruodil <200874449+ruodil@users.noreply.github.com> --- tests/integration/test_lists/qa/llm_perf_core.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_lists/qa/llm_perf_core.yml b/tests/integration/test_lists/qa/llm_perf_core.yml index 6230d235bfa..c1387eeb864 100644 --- a/tests/integration/test_lists/qa/llm_perf_core.yml +++ b/tests/integration/test_lists/qa/llm_perf_core.yml @@ -139,7 +139,7 @@ llm_perf_core: - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:4] #llama_v3.3_70b_instruct_fp8 -# 3b: L40S, H100, H20, H200 (8 GPUs) +# 3b: H100, H20, H200 (8 GPUs) - condition: ranges: system_gpu_count: