11import gc
22import os
3+ import re
34import sys
45from enum import Enum
56
@@ -30,11 +31,23 @@ def pytest_make_parametrize_id(config, val, argname):
3031
3132@pytest .hookimpl (tryfirst = True )
3233def pytest_cmdline_main (config : pytest .Config ) -> None :
33- # Force disabling distributed framework if benchmarks are selected
34+ # Make sure that benchmarks are running on GPU and the number of workers if valid
3435 expr = Expression .compile (config .option .markexpr )
3536 is_benchmarks = expr .evaluate (MarkMatcher .from_markers ((pytest .mark .benchmarks ,)))
3637 if is_benchmarks :
37- config .option .numprocesses = 0
38+ # Make sure that GPU backend is enforced
39+ backend = config .getoption ("--backend" )
40+ if backend == "cpu" :
41+ raise ValueError ("Running benchmarks on CPU is not supported." )
42+ config .option .backend = "gpu"
43+
44+ # Make sure that the number of workers is not too large
45+ if isinstance (config .option .numprocesses , int ):
46+ max_workers = max (pytest_xdist_auto_num_workers (config ), 1 )
47+ if config .option .numprocesses > max_workers :
48+ raise ValueError (
49+ f"The number of workers for running benchmarks cannot exceed '{ max_workers } ' on this machine."
50+ )
3851
3952 # Force disabling forked for non-linux systems
4053 if not sys .platform .startswith ("linux" ):
@@ -45,21 +58,40 @@ def pytest_cmdline_main(config: pytest.Config) -> None:
4558 if show_viewer :
4659 config .option .numprocesses = 0
4760
48- # Disable low-level parallelization if distributed framework is enabled
49- # if config.option.numprocesses > 0:
50- os .environ ["OMP_NUM_THREADS" ] = "1"
51- os .environ ["OPENBLAS_NUM_THREADS" ] = "1"
52- os .environ ["MKL_NUM_THREADS" ] = "1"
53- os .environ ["VECLIB_MAXIMUM_THREADS" ] = "1"
54- os .environ ["NUMEXPR_NUM_THREADS" ] = "1"
55- os .environ ["NUMBA_NUM_THREADS" ] = "1"
61+ # Disable low-level parallelization if distributed framework is enabled.
62+ # FIXME: It should be set to `max(int(physical_core_count / num_workers), 1)`, but 'num_workers' may be unknown.
63+ if not is_benchmarks and config .option .numprocesses != 0 :
64+ os .environ ["OMP_NUM_THREADS" ] = "1"
65+ os .environ ["OPENBLAS_NUM_THREADS" ] = "1"
66+ os .environ ["MKL_NUM_THREADS" ] = "1"
67+ os .environ ["VECLIB_MAXIMUM_THREADS" ] = "1"
68+ os .environ ["NUMEXPR_NUM_THREADS" ] = "1"
69+ os .environ ["NUMBA_NUM_THREADS" ] = "1"
70+
71+
72+ def _get_gpu_indices ():
73+ nvidia_gpu_indices = os .environ .get ("CUDA_VISIBLE_DEVICES" )
74+ if nvidia_gpu_indices is not None :
75+ return tuple (sorted (map (int , nvidia_gpu_indices .split ("," ))))
76+
77+ if sys .platform == "linux" :
78+ nvidia_gpu_indices = []
79+ nvidia_gpu_interface_path = "/proc/driver/nvidia/gpus/"
80+ if os .path .exists (nvidia_gpu_interface_path ):
81+ for device_path in os .listdir (nvidia_gpu_interface_path ):
82+ with open (os .path .join (nvidia_gpu_interface_path , device_path , "information" ), "r" ) as f :
83+ gpu_id = int (re .search (r"Device Minor:\s+(\d+)" , f .read ()).group (1 ))
84+ nvidia_gpu_indices .append (gpu_id )
85+ return tuple (sorted (nvidia_gpu_indices ))
86+
87+ return (0 ,)
5688
5789
5890def pytest_xdist_auto_num_workers (config ):
59- # Get available memory (RAM & VRAM) and number of physical cores.
6091 import psutil
6192 import genesis as gs
6293
94+ # Get available memory (RAM & VRAM) and number of physical cores.
6395 physical_core_count = psutil .cpu_count (logical = False )
6496 _ , _ , ram_memory , _ = gs .utils .get_device (gs .cpu )
6597 _ , _ , vram_memory , _ = gs .utils .get_device (gs .gpu )
@@ -76,15 +108,39 @@ def pytest_xdist_auto_num_workers(config):
76108 else :
77109 ram_memory_per_worker = 7.5
78110 vram_memory_per_worker = 1.6
79- return min (
80- int (ram_memory / ram_memory_per_worker ),
81- int (vram_memory / vram_memory_per_worker ),
111+ num_workers = min (
82112 physical_core_count ,
113+ max (int (ram_memory / ram_memory_per_worker ), 1 ),
114+ max (int (vram_memory / vram_memory_per_worker ), 1 ),
83115 )
84116
117+ # Special treatment for benchmarks
118+ expr = Expression .compile (config .option .markexpr )
119+ is_benchmarks = expr .evaluate (MarkMatcher .from_markers ((pytest .mark .benchmarks ,)))
120+ if is_benchmarks :
121+ num_cpu_per_gpu = 4
122+ num_workers = min (
123+ num_workers ,
124+ len (_get_gpu_indices ()),
125+ max (int (physical_core_count / num_cpu_per_gpu ), 1 ),
126+ )
127+
128+ return num_workers
129+
130+
131+ def pytest_runtest_setup (item ):
132+ # Enforce GPU affinity that distributed framework is enabled
133+ worker_id = os .environ .get ("PYTEST_XDIST_WORKER" )
134+ if worker_id and worker_id .startswith ("gw" ):
135+ worker_num = int (worker_id [2 :])
136+ gpu_indices = _get_gpu_indices ()
137+ gpu_num = worker_num % len (gpu_indices )
138+ os .environ ["CUDA_VISIBLE_DEVICES" ] = str (gpu_indices [gpu_num ])
139+ os .environ ["TI_VISIBLE_DEVICE" ] = str (gpu_indices [gpu_num ])
140+
85141
86142def pytest_addoption (parser ):
87- parser .addoption ("--backend" , action = "store" , default = "cpu" , help = "Default simulation backend." )
143+ parser .addoption ("--backend" , action = "store" , default = None , help = "Default simulation backend." )
88144 parser .addoption ("--vis" , action = "store_true" , default = False , help = "Enable interactive viewer." )
89145
90146
@@ -97,7 +153,7 @@ def show_viewer(pytestconfig):
97153def backend (pytestconfig ):
98154 import genesis as gs
99155
100- backend = pytestconfig .getoption ("--backend" , " cpu" )
156+ backend = pytestconfig .getoption ("--backend" ) or gs . cpu
101157 if isinstance (backend , str ):
102158 return getattr (gs .constants .backend , backend )
103159 return backend
@@ -220,6 +276,7 @@ def initialize_genesis(request, backend, taichi_offline_cache):
220276 else :
221277 precision = "32"
222278 debug = False
279+
223280 try :
224281 if not taichi_offline_cache :
225282 os .environ ["TI_OFFLINE_CACHE" ] = "0"
0 commit comments