77
77
- vllm/
78
78
- tests/basic_correctness/test_chunked_prefill
79
79
commands :
80
- - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
81
- - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
80
+ - VLLM_ATTENTION_BACKEND=XFORMERS VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s basic_correctness/test_chunked_prefill.py
81
+ - VLLM_ATTENTION_BACKEND=FLASH_ATTN VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s basic_correctness/test_chunked_prefill.py
82
82
83
83
- label : Core Test # 10min
84
84
mirror_hardwares : [amd]
@@ -88,7 +88,11 @@ steps:
88
88
- vllm/distributed
89
89
- tests/core
90
90
commands :
91
- - pytest -v -s core
91
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core/test_scheduler.py
92
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/test_chunked_prefill_scheduler.py
93
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/block/e2e/test_correctness.py
94
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/block/e2e/test_correctness_sliding_window.py
95
+ - pytest -v -s core --ignore=core/block/e2e/test_correctness.py --ignore=core/test_scheduler.py --ignore=core/test_chunked_prefill_scheduler.py --ignore=core/block/e2e/test_correctness.py --ignore=core/block/e2e/test_correctness_sliding_window.py
92
96
93
97
- label : Entrypoints Test # 40min
94
98
working_dir : " /vllm-workspace/tests"
@@ -185,7 +189,8 @@ steps:
185
189
- vllm/
186
190
- tests/prefix_caching
187
191
commands :
188
- - pytest -v -s prefix_caching
192
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s prefix_caching/test_prefix_caching.py
193
+ - pytest -v -s prefix_caching --ignore=prefix_caching/test_prefix_caching.py
189
194
190
195
- label : Samplers Test # 36min
191
196
source_file_dependencies :
@@ -209,7 +214,8 @@ steps:
209
214
- tests/spec_decode
210
215
commands :
211
216
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
212
- - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
217
+ - VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s spec_decode/e2e/test_compatibility.py
218
+ - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py --ignore=spec_decode/e2e/test_compatibility.py
213
219
214
220
- label : LoRA Test %N # 15min each
215
221
mirror_hardwares : [amd]
@@ -391,7 +397,7 @@ steps:
391
397
- pytest -v -s ./compile/test_full_graph_multi_gpu.py
392
398
- pytest -v -s ./compile/test_wrapper.py
393
399
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
394
- - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
400
+ - TARGET_TEST_SUITE=L4 VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest basic_correctness/ -v -s -m distributed_2_gpus
395
401
# Avoid importing model tests that cause CUDA reinitialization error
396
402
- pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
397
403
- pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
0 commit comments