1818import sys
1919from typing import Dict , Optional , Tuple
2020
21+ import cuda .bench as bench
2122import cuda .cccl .headers as headers
2223import cuda .core .experimental as core
23- import cuda .nvbench as nvbench
2424
2525
26- def as_core_Stream (cs : nvbench .CudaStream ) -> core .Stream :
26+ def as_core_Stream (cs : bench .CudaStream ) -> core .Stream :
2727 return core .Stream .from_handle (cs .addressof ())
2828
2929
@@ -58,34 +58,34 @@ def make_sleep_kernel():
5858 return mod .get_kernel ("sleep_kernel" )
5959
6060
61- def simple (state : nvbench .State ):
61+ def simple (state : bench .State ):
6262 state .set_min_samples (1000 )
6363 sleep_dur = 1e-3
6464 krn = make_sleep_kernel ()
6565 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
6666
67- def launcher (launch : nvbench .Launch ):
67+ def launcher (launch : bench .Launch ):
6868 s = as_core_Stream (launch .get_stream ())
6969 core .launch (s , launch_config , krn , sleep_dur )
7070
7171 state .exec (launcher )
7272
7373
74- def single_float64_axis (state : nvbench .State ):
74+ def single_float64_axis (state : bench .State ):
7575 # get axis value, or default
7676 default_sleep_dur = 3.14e-4
7777 sleep_dur = state .get_float64_or_default ("Duration" , default_sleep_dur )
7878 krn = make_sleep_kernel ()
7979 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
8080
81- def launcher (launch : nvbench .Launch ):
81+ def launcher (launch : bench .Launch ):
8282 s = as_core_Stream (launch .get_stream ())
8383 core .launch (s , launch_config , krn , sleep_dur )
8484
8585 state .exec (launcher )
8686
8787
88- def default_value (state : nvbench .State ):
88+ def default_value (state : bench .State ):
8989 single_float64_axis (state )
9090
9191
@@ -120,7 +120,7 @@ def make_copy_kernel(in_type: Optional[str] = None, out_type: Optional[str] = No
120120 return mod .get_kernel (instance_name )
121121
122122
123- def copy_sweep_grid_shape (state : nvbench .State ):
123+ def copy_sweep_grid_shape (state : bench .State ):
124124 block_size = state .get_int64 ("BlockSize" )
125125 num_blocks = state .get_int64 ("NumBlocks" )
126126
@@ -140,14 +140,14 @@ def copy_sweep_grid_shape(state: nvbench.State):
140140 krn = make_copy_kernel ()
141141 launch_config = core .LaunchConfig (grid = num_blocks , block = block_size , shmem_size = 0 )
142142
143- def launcher (launch : nvbench .Launch ):
143+ def launcher (launch : bench .Launch ):
144144 s = as_core_Stream (launch .get_stream ())
145145 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
146146
147147 state .exec (launcher )
148148
149149
150- def copy_type_sweep (state : nvbench .State ):
150+ def copy_type_sweep (state : bench .State ):
151151 type_id = state .get_int64 ("TypeID" )
152152
153153 types_map : Dict [int , Tuple [type , str ]] = {
@@ -178,7 +178,7 @@ def copy_type_sweep(state: nvbench.State):
178178 krn = make_copy_kernel (value_cuda_t , value_cuda_t )
179179 launch_config = core .LaunchConfig (grid = 256 , block = 256 , shmem_size = 0 )
180180
181- def launcher (launch : nvbench .Launch ):
181+ def launcher (launch : bench .Launch ):
182182 s = as_core_Stream (launch .get_stream ())
183183 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
184184
@@ -187,20 +187,20 @@ def launcher(launch: nvbench.Launch):
187187
188188if __name__ == "__main__" :
189189 # Benchmark without axes
190- nvbench .register (simple )
190+ bench .register (simple )
191191
192192 # benchmark with no axes, that uses default value
193- nvbench .register (default_value )
193+ bench .register (default_value )
194194 # specify axis
195- nvbench .register (single_float64_axis ).add_float64_axis (
195+ bench .register (single_float64_axis ).add_float64_axis (
196196 "Duration (s)" , [7e-5 , 1e-4 , 5e-4 ]
197197 )
198198
199- copy1_bench = nvbench .register (copy_sweep_grid_shape )
199+ copy1_bench = bench .register (copy_sweep_grid_shape )
200200 copy1_bench .add_int64_axis ("BlockSize" , [2 ** x for x in range (6 , 10 , 2 )])
201201 copy1_bench .add_int64_axis ("NumBlocks" , [2 ** x for x in range (6 , 10 , 2 )])
202202
203- copy2_bench = nvbench .register (copy_type_sweep )
203+ copy2_bench = bench .register (copy_type_sweep )
204204 copy2_bench .add_int64_axis ("TypeID" , range (0 , 6 ))
205205
206- nvbench .run_all_benchmarks (sys .argv )
206+ bench .run_all_benchmarks (sys .argv )
0 commit comments