Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 47 additions & 47 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ jobs:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
if: ${{ inputs.type == 'light' }}
run: |
# pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
# pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
pytest -sv tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness
# pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
# pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness

- name: Run e2e test
env:
Expand All @@ -90,25 +90,25 @@ jobs:
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
# the test separately.

pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
pytest -sv tests/e2e/singlecard/test_async_scheduling.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py
pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py
pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
# torch 2.8 doesn't work with lora, fix me
#pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_sampler.py
pytest -sv tests/e2e/singlecard/test_vlm.py
pytest -sv tests/e2e/singlecard/test_xlite.py
pytest -sv tests/e2e/singlecard/pooling/
pytest -sv tests/e2e/singlecard/compile/test_norm_quant_fusion.py
#pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
pytest -sv --durations=0 tests/e2e/singlecard/test_profile_execute_duration.py
pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py
pytest -sv --durations=0 tests/e2e/singlecard/test_sampler.py
pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py
pytest -sv --durations=0 tests/e2e/singlecard/test_xlite.py
pytest -sv --durations=0 tests/e2e/singlecard/pooling/
pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py

# ------------------------------------ v1 spec decode test ------------------------------------ #
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
pytest -sv --durations=0 tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py

e2e-2-cards:
name: multicard-2
Expand Down Expand Up @@ -170,38 +170,38 @@ jobs:
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'light' }}
run: |
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep

- name: Run vllm-project/vllm-ascend test (full)
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'full' }}
run: |
pytest -sv tests/e2e/multicard/test_quantization.py
pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
pytest -sv tests/e2e/multicard/test_full_graph_mode.py
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/test_expert_parallel.py
pytest -sv tests/e2e/multicard/test_external_launcher.py
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
pytest -sv --durations=0 tests/e2e/multicard/test_quantization.py
pytest -sv --durations=0 tests/e2e/multicard/test_aclgraph_capture_replay.py
pytest -sv --durations=0 tests/e2e/multicard/test_full_graph_mode.py
pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel.py
pytest -sv --durations=0 tests/e2e/multicard/test_expert_parallel.py
pytest -sv --durations=0 tests/e2e/multicard/test_external_launcher.py
pytest -sv --durations=0 tests/e2e/multicard/test_single_request_aclgraph.py
pytest -sv --durations=0 tests/e2e/multicard/test_fused_moe_allgather_ep.py
# torch 2.8 doesn't work with lora, fix me
#pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
#pytest -sv --durations=0 tests/e2e/multicard/test_ilama_lora_tp2.py

# To avoid oom, we need to run the test in a single process.
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight

pytest -sv tests/e2e/multicard/test_prefix_caching.py
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
pytest -sv tests/e2e/multicard/test_offline_weight_load.py
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_fc2_for_qwen3_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight

pytest -sv --durations=0 tests/e2e/multicard/test_prefix_caching.py
pytest -sv --durations=0 tests/e2e/multicard/test_pipeline_parallel.py
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_moe.py
pytest -sv --durations=0 tests/e2e/multicard/test_offline_weight_load.py

e2e-4-cards:
name: multicard-4
Expand Down Expand Up @@ -264,10 +264,10 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv --durations=0 tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
pytest -sv --durations=0 tests/e2e/multicard/test_data_parallel_tp2.py

- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
shell: bash -l {0}
Expand All @@ -283,4 +283,4 @@ jobs:
VLLM_USE_MODELSCOPE: True
run: |
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
pytest -sv tests/e2e/multicard/test_qwen3_next.py
pytest -sv --durations=0 tests/e2e/multicard/test_qwen3_next.py
Loading