@@ -75,10 +75,10 @@ jobs:
7575 PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
7676 if : ${{ inputs.type == 'light' }}
7777 run : |
78- # pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
79- # pytest -sv tests/e2e/singlecard/test_quantization.py
80- pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
81- pytest -sv tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness
78+ # pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
79+ # pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
80+ pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
81+ pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness
8282
8383 - name : Run e2e test
8484 env :
@@ -90,25 +90,25 @@ jobs:
9090 # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
9191 # the test separately.
9292
93- pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
94- pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
95- pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
96- pytest -sv tests/e2e/singlecard/test_async_scheduling.py
97- pytest -sv tests/e2e/singlecard/test_camem.py
98- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
93+ pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
94+ pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
95+ pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
96+ pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py
97+ pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
98+ pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
9999 # torch 2.8 doesn't work with lora, fix me
100- #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
101- pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
102- pytest -sv tests/e2e/singlecard/test_quantization.py
103- pytest -sv tests/e2e/singlecard/test_sampler.py
104- pytest -sv tests/e2e/singlecard/test_vlm.py
105- pytest -sv tests/e2e/singlecard/test_xlite.py
106- pytest -sv tests/e2e/singlecard/pooling/
107- pytest -sv tests/e2e/singlecard/compile/test_norm_quant_fusion.py
100+ #pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
101+ pytest -sv --durations=0 tests/e2e/singlecard/test_profile_execute_duration.py
102+ pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
103+ pytest -sv --durations=0 tests/e2e/singlecard/test_sampler.py
104+ pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py
105+ pytest -sv --durations=0 tests/e2e/singlecard/test_xlite.py
106+ pytest -sv --durations=0 tests/e2e/singlecard/pooling/
107+ pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py
108108
109109 # ------------------------------------ v1 spec decode test ------------------------------------ #
110- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
111- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
110+ pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
111+ pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
112112
113113 e2e-2-cards :
114114 name : multicard-2
@@ -170,38 +170,38 @@ jobs:
170170 VLLM_USE_MODELSCOPE : True
171171 if : ${{ inputs.type == 'light' }}
172172 run : |
173- pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
173+ pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
174174
175175 - name : Run vllm-project/vllm-ascend test (full)
176176 env :
177177 VLLM_WORKER_MULTIPROC_METHOD : spawn
178178 VLLM_USE_MODELSCOPE : True
179179 if : ${{ inputs.type == 'full' }}
180180 run : |
181- pytest -sv tests/e2e/multicard/test_quantization.py
182- pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
183- pytest -sv tests/e2e/multicard/test_full_graph_mode.py
184- pytest -sv tests/e2e/multicard/test_data_parallel.py
185- pytest -sv tests/e2e/multicard/test_expert_parallel.py
186- pytest -sv tests/e2e/multicard/test_external_launcher.py
187- pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
188- pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
181+ pytest -sv --durations=0 tests/e2e/multicard/test_quantization.py
182+ pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
183+ pytest -sv --durations=0 tests/e2e/multicard/test_full_graph_mode.py
184+ pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel.py
185+ pytest -sv --durations=0 tests/e2e/multicard/test_expert_parallel.py
186+ pytest -sv --durations=0 tests/e2e/multicard/test_external_launcher.py
187+ pytest -sv --durations=0 tests/e2e/multicard/test_single_request_aclgraph.py
188+ pytest -sv --durations=0 tests/e2e/multicard/test_fused_moe_allgather_ep.py
189189 # torch 2.8 doesn't work with lora, fix me
190- #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
190+ #pytest -sv --durations=0 tests/e2e/multicard/test_ilama_lora_tp2.py
191191
192192 # To avoid oom, we need to run the test in a single process.
193- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
194- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
195- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
196- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
197- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
198- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
199- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
200-
201- pytest -sv tests/e2e/multicard/test_prefix_caching.py
202- pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
203- pytest -sv tests/e2e/multicard/test_qwen3_moe.py
204- pytest -sv tests/e2e/multicard/test_offline_weight_load.py
193+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
194+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
195+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
196+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
197+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
198+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
199+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
200+
201+ pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py
202+ pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py
203+ pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py
204+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_weight_load.py
205205
206206 e2e-4-cards :
207207 name : multicard-4
@@ -264,10 +264,10 @@ jobs:
264264 VLLM_WORKER_MULTIPROC_METHOD : spawn
265265 VLLM_USE_MODELSCOPE : True
266266 run : |
267- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
268- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
269- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
270- pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
267+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
268+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
269+ pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
270+ pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel_tp2.py
271271
272272 - name : Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
273273 shell : bash -l {0}
@@ -283,4 +283,4 @@ jobs:
283283 VLLM_USE_MODELSCOPE : True
284284 run : |
285285 . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
286- pytest -sv tests/e2e/multicard/test_qwen3_next.py
286+ pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
0 commit comments