77import cuda .nvbench as nvbench
88
99
10+ def as_core_Stream (cs : nvbench .CudaStream ) -> core .Stream :
11+ return core .Stream .from_handle (cs .addressof ())
12+
13+
1014def make_sleep_kernel ():
1115 """JITs sleep_kernel(seconds)"""
1216 src = r"""
@@ -45,10 +49,7 @@ def simple(state: nvbench.State):
4549 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
4650
4751 def launcher (launch : nvbench .Launch ):
48- dev = core .Device ()
49- dev .set_current ()
50- s = dev .create_stream (launch .getStream ())
51-
52+ s = as_core_Stream (launch .getStream ())
5253 core .launch (s , launch_config , krn , sleep_dur )
5354
5455 state .exec (launcher )
@@ -61,10 +62,7 @@ def single_float64_axis(state: nvbench.State):
6162 launch_config = core .LaunchConfig (grid = 1 , block = 1 , shmem_size = 0 )
6263
6364 def launcher (launch : nvbench .Launch ):
64- dev = core .Device ()
65- dev .set_current ()
66- s = dev .create_stream (launch .getStream ())
67-
65+ s = as_core_Stream (launch .getStream ())
6866 core .launch (s , launch_config , krn , sleep_dur )
6967
7068 state .exec (launcher )
@@ -117,21 +115,16 @@ def copy_sweep_grid_shape(state: nvbench.State):
117115 state .addGlobalMemoryReads (nbytes )
118116 state .addGlobalMemoryWrites (nbytes )
119117
120- dev = core .Device (state .getDevice ())
121- dev .set_current ()
122-
123- alloc_stream = dev .create_stream (state .getStream ())
124- input_buf = core .DeviceMemoryResource (dev .device_id ).allocate (nbytes , alloc_stream )
125- output_buf = core .DeviceMemoryResource (dev .device_id ).allocate (nbytes , alloc_stream )
118+ dev_id = state .getDevice ()
119+ alloc_s = as_core_Stream (state .getStream ())
120+ input_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
121+ output_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
126122
127123 krn = make_copy_kernel ()
128124 launch_config = core .LaunchConfig (grid = num_blocks , block = block_size , shmem_size = 0 )
129125
130126 def launcher (launch : nvbench .Launch ):
131- dev = core .Device ()
132- dev .set_current ()
133- s = dev .create_stream (launch .getStream ())
134-
127+ s = as_core_Stream (launch .getStream ())
135128 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
136129
137130 state .exec (launcher )
@@ -160,21 +153,16 @@ def copy_type_sweep(state: nvbench.State):
160153 state .addGlobalMemoryReads (nbytes )
161154 state .addGlobalMemoryWrites (nbytes )
162155
163- dev = core .Device (state .getDevice ())
164- dev .set_current ()
165-
166- alloc_stream = dev .create_stream (state .getStream ())
167- input_buf = core .DeviceMemoryResource (dev .device_id ).allocate (nbytes , alloc_stream )
168- output_buf = core .DeviceMemoryResource (dev .device_id ).allocate (nbytes , alloc_stream )
156+ dev_id = state .getDevice ()
157+ alloc_s = as_core_Stream (state .getStream ())
158+ input_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
159+ output_buf = core .DeviceMemoryResource (dev_id ).allocate (nbytes , alloc_s )
169160
170161 krn = make_copy_kernel (value_cuda_t , value_cuda_t )
171162 launch_config = core .LaunchConfig (grid = 256 , block = 256 , shmem_size = 0 )
172163
173164 def launcher (launch : nvbench .Launch ):
174- dev = core .Device ()
175- dev .set_current ()
176- s = dev .create_stream (launch .getStream ())
177-
165+ s = as_core_Stream (launch .getStream ())
178166 core .launch (s , launch_config , krn , input_buf , output_buf , num_values )
179167
180168 state .exec (launcher )
0 commit comments