2222sys .path .insert (0 , str (Path (__file__ ).parent ))
2323
2424import pytest
25- import torch
26- from vllm .config import VllmConfig , set_current_vllm_config
2725
2826
2927def pytest_addoption (parser ):
@@ -35,20 +33,38 @@ def pytest_addoption(parser):
3533 parser .addoption ("--obj-ca_bundle" , default = None )
3634
3735
38- @pytest .fixture (scope = "session" , autouse = True )
39- def require_cuda ():
36+ def pytest_configure (config ):
37+ config .addinivalue_line (
38+ "markers" ,
39+ "no_cuda_required: mark a test as not requiring CUDA setup/teardown" ,
40+ )
41+
42+
43+ @pytest .fixture (autouse = True )
44+ def require_cuda (request ):
4045 """Skip all tests in this session if CUDA is not available."""
46+ if request .node .get_closest_marker ("no_cuda_required" ):
47+ return
48+
49+ import torch
50+
4151 if not torch .cuda .is_available ():
4252 pytest .skip ("CUDA not available" )
4353
4454
4555@pytest .fixture (autouse = True )
46- def cuda_teardown ():
56+ def cuda_teardown (request ):
4757 """Ensure CUDA and C++ thread-pool resources from one test are fully
4858 released before the next test starts. Without this, async destructors
4959 can cause 'cudaErrorUnknown' or stale file-open errors in subsequent tests.
5060 """
61+ if request .node .get_closest_marker ("no_cuda_required" ):
62+ yield
63+ return
64+
5165 yield
66+ import torch
67+
5268 gc .collect () # force Python GC to call C++ destructors immediately
5369 torch .cuda .synchronize () # surface any async CUDA errors in the right test
5470 torch .cuda .empty_cache () # free cached allocations so next test starts clean
@@ -61,6 +77,8 @@ def default_vllm_config():
6177 that use get_current_vllm_config() outside of a full engine context.
6278 This matches vLLM's internal test fixture pattern.
6379 """
80+ from vllm .config import VllmConfig , set_current_vllm_config
81+
6482 # Use empty VllmConfig() which provides sensible defaults
6583 with set_current_vllm_config (VllmConfig ()):
6684 yield
0 commit comments