@@ -43,26 +43,27 @@ def make_sleep_kernel():
4343
4444
4545def simple (state : nvbench .State ):
46- state .setMinSamples (1000 )
46+ state .set_min_samples (1000 )
4747 sleep_dur = 1e-3
4848 krn = make_sleep_kernel ()
4949 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
5050
5151 def launcher (launch : nvbench .Launch ):
52- s = as_core_Stream (launch .getStream ())
52+ s = as_core_Stream (launch .get_stream ())
5353 core .launch (s , launch_config , krn , sleep_dur )
5454
5555 state .exec (launcher )
5656
5757
5858def single_float64_axis (state : nvbench .State ):
5959 # get axis value, or default
60- sleep_dur = state .getFloat64 ("Duration" , 3.14e-4 )
60+ default_sleep_dur = 3.14e-4
61+ sleep_dur = state .get_float64 ("Duration" , default_sleep_dur )
6162 krn = make_sleep_kernel ()
6263 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
6364
6465 def launcher (launch : nvbench .Launch ):
65- s = as_core_Stream (launch .getStream ())
66+ s = as_core_Stream (launch .get_stream ())
6667 core .launch (s , launch_config , krn , sleep_dur )
6768
6869 state .exec (launcher )
@@ -104,40 +105,40 @@ def make_copy_kernel(in_type: Optional[str] = None, out_type: Optional[str] = No
104105
105106
106107def copy_sweep_grid_shape (state : nvbench .State ):
107- block_size = state .getInt64 ("BlockSize" )
108- num_blocks = state .getInt64 ("NumBlocks" )
108+ block_size = state .get_int64 ("BlockSize" )
109+ num_blocks = state .get_int64 ("NumBlocks" )
109110
110111 # Number of int32 elements in 256MiB
111112 nbytes = 256 * 1024 * 1024
112113 num_values = nbytes // ctypes .sizeof (ctypes .c_int32 (0 ))
113114
114- state .addElementCount (num_values )
115- state .addGlobalMemoryReads (nbytes )
116- state .addGlobalMemoryWrites (nbytes )
115+ state .add_element_count (num_values )
116+ state .add_global_memory_reads (nbytes )
117+ state .add_global_memory_writes (nbytes )
117118
118- dev_id = state .getDevice ()
119- alloc_s = as_core_Stream (state .getStream ())
119+ dev_id = state .get_device ()
120+ alloc_s = as_core_Stream (state .get_stream ())
120121 input_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
121122 output_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
122123
123124 krn = make_copy_kernel ()
124125 launch_config = core .LaunchConfig (grid = num_blocks , block = block_size , shmem_size = 0 )
125126
126127 def launcher (launch : nvbench .Launch ):
127- s = as_core_Stream (launch .getStream ())
128+ s = as_core_Stream (launch .get_stream ())
128129 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
129130
130131 state .exec (launcher )
131132
132133
133134def copy_type_sweep (state : nvbench .State ):
134- type_id = state .getInt64 ("TypeID" )
135+ type_id = state .get_int64 ("TypeID" )
135136
136137 types_map = {
137- 0 : (ctypes .c_uint8 , ":: cuda::std::uint8_t" ),
138- 1 : (ctypes .c_uint16 , ":: cuda::std::uint16_t" ),
139- 2 : (ctypes .c_uint32 , ":: cuda::std::uint32_t" ),
140- 3 : (ctypes .c_uint64 , ":: cuda::std::uint64_t" ),
138+ 0 : (ctypes .c_uint8 , "cuda::std::uint8_t" ),
139+ 1 : (ctypes .c_uint16 , "cuda::std::uint16_t" ),
140+ 2 : (ctypes .c_uint32 , "cuda::std::uint32_t" ),
141+ 3 : (ctypes .c_uint64 , "cuda::std::uint64_t" ),
141142 4 : (ctypes .c_float , "float" ),
142143 5 : (ctypes .c_double , "double" ),
143144 }
@@ -149,20 +150,20 @@ def copy_type_sweep(state: nvbench.State):
149150 nbytes = 256 * 1024 * 1024
150151 num_values = nbytes // ctypes .sizeof (value_ctype (0 ))
151152
152- state .addElementCount (num_values )
153- state .addGlobalMemoryReads (nbytes )
154- state .addGlobalMemoryWrites (nbytes )
153+ state .add_element_count (num_values )
154+ state .add_global_memory_reads (nbytes )
155+ state .add_global_memory_writes (nbytes )
155156
156- dev_id = state .getDevice ()
157- alloc_s = as_core_Stream (state .getStream ())
157+ dev_id = state .get_device ()
158+ alloc_s = as_core_Stream (state .get_stream ())
158159 input_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
159160 output_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
160161
161162 krn = make_copy_kernel (value_cuda_t , value_cuda_t )
162163 launch_config = core .LaunchConfig (grid = 256 , block = 256 , shmem_size = 0 )
163164
164165 def launcher (launch : nvbench .Launch ):
165- s = as_core_Stream (launch .getStream ())
166+ s = as_core_Stream (launch .get_stream ())
166167 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
167168
168169 state .exec (launcher )
@@ -175,13 +176,15 @@ def launcher(launch: nvbench.Launch):
175176 # benchmark with no axes, that uses default value
176177 nvbench .register (default_value )
177178 # specify axis
178- nvbench .register (single_float64_axis ).addFloat64Axis ("Duration" , [7e-5 , 1e-4 , 5e-4 ])
179+ nvbench .register (single_float64_axis ).add_float64_axis (
180+ "Duration" , [7e-5 , 1e-4 , 5e-4 ]
181+ )
179182
180183 copy1_bench = nvbench .register (copy_sweep_grid_shape )
181- copy1_bench .addInt64Axis ("BlockSize" , [2 ** x for x in range (6 , 10 , 2 )])
182- copy1_bench .addInt64Axis ("NumBlocks" , [2 ** x for x in range (6 , 10 , 2 )])
184+ copy1_bench .add_int64_axis ("BlockSize" , [2 ** x for x in range (6 , 10 , 2 )])
185+ copy1_bench .add_int64_axis ("NumBlocks" , [2 ** x for x in range (6 , 10 , 2 )])
183186
184187 copy2_bench = nvbench .register (copy_type_sweep )
185- copy2_bench .addInt64Axis ("TypeID" , range (0 , 6 ))
188+ copy2_bench .add_int64_axis ("TypeID" , range (0 , 6 ))
186189
187190 nvbench .run_all_benchmarks (sys .argv )
0 commit comments