Skip to content

Commit b610543

Browse files
Use cuda.Stream.from_handle to create core.Stream from nvbench.CudaStream
1 parent 5f52fc5 commit b610543

File tree

4 files changed

+34
-49
lines changed

4 files changed

+34
-49
lines changed

python/examples/auto_throughput.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,9 @@ def launcher(launch: nvbench.Launch):
6363
state.exec(launcher)
6464

6565

66-
(
67-
nvbench.register(throughput_bench)
68-
.addInt64Axis("Stride", [1, 4])
69-
.addInt64Axis("ItemsPerThread", [1, 2, 3, 4])
70-
)
71-
72-
7366
if __name__ == "__main__":
74-
print(nvbench.__version__)
67+
b = nvbench.register(throughput_bench)
68+
b.addInt64Axis("Stride", [1, 2, 4])
69+
b.addInt64Axis("ItemsPerThread", [1, 2, 3, 4])
70+
7571
nvbench.run_all_benchmarks(sys.argv)

python/examples/axes.py

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
import cuda.nvbench as nvbench
88

99

10+
def as_core_Stream(cs: nvbench.CudaStream) -> core.Stream:
11+
return core.Stream.from_handle(cs.addressof())
12+
13+
1014
def make_sleep_kernel():
1115
"""JITs sleep_kernel(seconds)"""
1216
src = r"""
@@ -45,10 +49,7 @@ def simple(state: nvbench.State):
4549
launch_config = core.LaunchConfig(grid=1, block=1, shmem_size=0)
4650

4751
def launcher(launch: nvbench.Launch):
48-
dev = core.Device()
49-
dev.set_current()
50-
s = dev.create_stream(launch.getStream())
51-
52+
s = as_core_Stream(launch.getStream())
5253
core.launch(s, launch_config, krn, sleep_dur)
5354

5455
state.exec(launcher)
@@ -61,10 +62,7 @@ def single_float64_axis(state: nvbench.State):
6162
launch_config = core.LaunchConfig(grid=1, block=1, shmem_size=0)
6263

6364
def launcher(launch: nvbench.Launch):
64-
dev = core.Device()
65-
dev.set_current()
66-
s = dev.create_stream(launch.getStream())
67-
65+
s = as_core_Stream(launch.getStream())
6866
core.launch(s, launch_config, krn, sleep_dur)
6967

7068
state.exec(launcher)
@@ -117,21 +115,16 @@ def copy_sweep_grid_shape(state: nvbench.State):
117115
state.addGlobalMemoryReads(nbytes)
118116
state.addGlobalMemoryWrites(nbytes)
119117

120-
dev = core.Device(state.getDevice())
121-
dev.set_current()
122-
123-
alloc_stream = dev.create_stream(state.getStream())
124-
input_buf = core.DeviceMemoryResource(dev.device_id).allocate(nbytes, alloc_stream)
125-
output_buf = core.DeviceMemoryResource(dev.device_id).allocate(nbytes, alloc_stream)
118+
dev_id = state.getDevice()
119+
alloc_s = as_core_Stream(state.getStream())
120+
input_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
121+
output_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
126122

127123
krn = make_copy_kernel()
128124
launch_config = core.LaunchConfig(grid=num_blocks, block=block_size, shmem_size=0)
129125

130126
def launcher(launch: nvbench.Launch):
131-
dev = core.Device()
132-
dev.set_current()
133-
s = dev.create_stream(launch.getStream())
134-
127+
s = as_core_Stream(launch.getStream())
135128
core.launch(s, launch_config, krn, input_buf, output_buf, num_values)
136129

137130
state.exec(launcher)
@@ -160,21 +153,16 @@ def copy_type_sweep(state: nvbench.State):
160153
state.addGlobalMemoryReads(nbytes)
161154
state.addGlobalMemoryWrites(nbytes)
162155

163-
dev = core.Device(state.getDevice())
164-
dev.set_current()
165-
166-
alloc_stream = dev.create_stream(state.getStream())
167-
input_buf = core.DeviceMemoryResource(dev.device_id).allocate(nbytes, alloc_stream)
168-
output_buf = core.DeviceMemoryResource(dev.device_id).allocate(nbytes, alloc_stream)
156+
dev_id = state.getDevice()
157+
alloc_s = as_core_Stream(state.getStream())
158+
input_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
159+
output_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
169160

170161
krn = make_copy_kernel(value_cuda_t, value_cuda_t)
171162
launch_config = core.LaunchConfig(grid=256, block=256, shmem_size=0)
172163

173164
def launcher(launch: nvbench.Launch):
174-
dev = core.Device()
175-
dev.set_current()
176-
s = dev.create_stream(launch.getStream())
177-
165+
s = as_core_Stream(launch.getStream())
178166
core.launch(s, launch_config, krn, input_buf, output_buf, num_values)
179167

180168
state.exec(launcher)

python/examples/exec_tag_sync.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
import cuda.nvbench as nvbench
88

99

10+
def as_core_Stream(cs: nvbench.CudaStream) -> core.Stream:
11+
"Create view of native stream used by NVBench"
12+
return core.Stream.from_handle(cs.addressof())
13+
14+
1015
def make_fill_kernel(data_type: Optional[str] = None):
1116
src = r"""
1217
#include <cuda/std/cstdint>
@@ -40,11 +45,8 @@ def synchronizing_bench(state: nvbench.State):
4045
n_values = 64 * 1024 * 1024
4146
n_bytes = n_values * ctypes.sizeof(ctypes.c_int32(0))
4247

43-
dev = core.Device(state.getDevice())
44-
dev.set_current()
45-
46-
alloc_stream = dev.create_stream(state.getStream())
47-
buffer = core.DeviceMemoryResource(dev).allocate(n_bytes, alloc_stream)
48+
alloc_s = as_core_Stream(state.getStream())
49+
buffer = core.DeviceMemoryResource(state.getDevice()).allocate(n_bytes, alloc_s)
4850

4951
state.addElementCount(n_values, "Items")
5052
state.addGlobalMemoryWrites(n_bytes, "Size")
@@ -53,10 +55,7 @@ def synchronizing_bench(state: nvbench.State):
5355
launch_config = core.LaunchConfig(grid=256, block=256, shmem_size=0)
5456

5557
def launcher(launch: nvbench.Launch):
56-
dev = core.Device()
57-
dev.set_current()
58-
59-
s = dev.create_stream(launch.getStream())
58+
s = as_core_Stream(launch.getStream())
6059
core.launch(s, launch_config, krn, buffer, 0, n_values)
6160
s.sync()
6261

python/examples/skip.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
import cuda.nvbench as nvbench
66

77

8+
def as_core_Stream(cs: nvbench.CudaStream) -> core.Stream:
9+
"Create view into native stream provided by NVBench"
10+
return core.Stream.from_handle(cs.addressof())
11+
12+
813
def make_sleep_kernel():
914
"""JITs sleep_kernel(seconds)"""
1015
src = r"""
@@ -54,10 +59,7 @@ def runtime_skip(state: nvbench.State):
5459
launch_cfg = core.LaunchConfig(grid=1, block=1, shmem_size=0)
5560

5661
def launcher(launch: nvbench.Launch):
57-
dev = core.Device()
58-
dev.set_current()
59-
60-
s = dev.create_stream(launch.getStream())
62+
s = as_core_Stream(launch.getStream())
6163
core.launch(s, launch_cfg, krn, duration)
6264

6365
state.exec(launcher)

0 commit comments

Comments
 (0)