Skip to content

Commit 5568b3f

Browse files
Change test and examples from using camelCase to using snake_case as implementation changed
1 parent 9baf88b commit 5568b3f

File tree

9 files changed

+101
-105
lines changed

9 files changed

+101
-105
lines changed

python/examples/auto_throughput.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,18 @@ def kernel(stride: np.uintp, elements: np.uintp, in_arr, out_arr):
4141

4242

4343
def throughput_bench(state: nvbench.State) -> None:
44-
stride = state.getInt64("Stride")
45-
ipt = state.getInt64("ItemsPerThread")
44+
stride = state.get_int64("Stride")
45+
ipt = state.get_int64("ItemsPerThread")
4646

4747
nbytes = 128 * 1024 * 1024
4848
elements = nbytes // np.dtype(np.int32).itemsize
4949

50-
alloc_stream = as_cuda_Stream(state.getStream())
50+
alloc_stream = as_cuda_Stream(state.get_stream())
5151
inp_arr = cuda.device_array(elements, dtype=np.int32, stream=alloc_stream)
5252
out_arr = cuda.device_array(elements * ipt, dtype=np.int32, stream=alloc_stream)
5353

54-
state.addElementCount(elements, column_name="Elements")
55-
state.collectCUPTIMetrics()
54+
state.add_element_count(elements, column_name="Elements")
55+
state.collect_cupti_metrics()
5656

5757
threads_per_block = 256
5858
blocks_in_grid = (elements + threads_per_block - 1) // threads_per_block
@@ -66,7 +66,7 @@ def throughput_bench(state: nvbench.State) -> None:
6666
)
6767

6868
def launcher(launch: nvbench.Launch):
69-
exec_stream = as_cuda_Stream(launch.getStream())
69+
exec_stream = as_cuda_Stream(launch.get_stream())
7070
krn[blocks_in_grid, threads_per_block, exec_stream, 0](
7171
stride, elements, inp_arr, out_arr
7272
)
@@ -76,7 +76,7 @@ def launcher(launch: nvbench.Launch):
7676

7777
if __name__ == "__main__":
7878
b = nvbench.register(throughput_bench)
79-
b.addInt64Axis("Stride", [1, 2, 4])
80-
b.addInt64Axis("ItemsPerThread", [1, 2, 3, 4])
79+
b.add_int64_axis("Stride", [1, 2, 4])
80+
b.add_int64_axis("ItemsPerThread", [1, 2, 3, 4])
8181

8282
nvbench.run_all_benchmarks(sys.argv)

python/examples/axes.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,26 +43,27 @@ def make_sleep_kernel():
4343

4444

4545
def simple(state: nvbench.State):
46-
state.setMinSamples(1000)
46+
state.set_min_samples(1000)
4747
sleep_dur = 1e-3
4848
krn = make_sleep_kernel()
4949
launch_config = core.LaunchConfig(grid=1, block=1, shmem_size=0)
5050

5151
def launcher(launch: nvbench.Launch):
52-
s = as_core_Stream(launch.getStream())
52+
s = as_core_Stream(launch.get_stream())
5353
core.launch(s, launch_config, krn, sleep_dur)
5454

5555
state.exec(launcher)
5656

5757

5858
def single_float64_axis(state: nvbench.State):
5959
# get axis value, or default
60-
sleep_dur = state.getFloat64("Duration", 3.14e-4)
60+
default_sleep_dur = 3.14e-4
61+
sleep_dur = state.get_float64("Duration", default_sleep_dur)
6162
krn = make_sleep_kernel()
6263
launch_config = core.LaunchConfig(grid=1, block=1, shmem_size=0)
6364

6465
def launcher(launch: nvbench.Launch):
65-
s = as_core_Stream(launch.getStream())
66+
s = as_core_Stream(launch.get_stream())
6667
core.launch(s, launch_config, krn, sleep_dur)
6768

6869
state.exec(launcher)
@@ -104,40 +105,40 @@ def make_copy_kernel(in_type: Optional[str] = None, out_type: Optional[str] = No
104105

105106

106107
def copy_sweep_grid_shape(state: nvbench.State):
107-
block_size = state.getInt64("BlockSize")
108-
num_blocks = state.getInt64("NumBlocks")
108+
block_size = state.get_int64("BlockSize")
109+
num_blocks = state.get_int64("NumBlocks")
109110

110111
# Number of int32 elements in 256MiB
111112
nbytes = 256 * 1024 * 1024
112113
num_values = nbytes // ctypes.sizeof(ctypes.c_int32(0))
113114

114-
state.addElementCount(num_values)
115-
state.addGlobalMemoryReads(nbytes)
116-
state.addGlobalMemoryWrites(nbytes)
115+
state.add_element_count(num_values)
116+
state.add_global_memory_reads(nbytes)
117+
state.add_global_memory_writes(nbytes)
117118

118-
dev_id = state.getDevice()
119-
alloc_s = as_core_Stream(state.getStream())
119+
dev_id = state.get_device()
120+
alloc_s = as_core_Stream(state.get_stream())
120121
input_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
121122
output_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
122123

123124
krn = make_copy_kernel()
124125
launch_config = core.LaunchConfig(grid=num_blocks, block=block_size, shmem_size=0)
125126

126127
def launcher(launch: nvbench.Launch):
127-
s = as_core_Stream(launch.getStream())
128+
s = as_core_Stream(launch.get_stream())
128129
core.launch(s, launch_config, krn, input_buf, output_buf, num_values)
129130

130131
state.exec(launcher)
131132

132133

133134
def copy_type_sweep(state: nvbench.State):
134-
type_id = state.getInt64("TypeID")
135+
type_id = state.get_int64("TypeID")
135136

136137
types_map = {
137-
0: (ctypes.c_uint8, "::cuda::std::uint8_t"),
138-
1: (ctypes.c_uint16, "::cuda::std::uint16_t"),
139-
2: (ctypes.c_uint32, "::cuda::std::uint32_t"),
140-
3: (ctypes.c_uint64, "::cuda::std::uint64_t"),
138+
0: (ctypes.c_uint8, "cuda::std::uint8_t"),
139+
1: (ctypes.c_uint16, "cuda::std::uint16_t"),
140+
2: (ctypes.c_uint32, "cuda::std::uint32_t"),
141+
3: (ctypes.c_uint64, "cuda::std::uint64_t"),
141142
4: (ctypes.c_float, "float"),
142143
5: (ctypes.c_double, "double"),
143144
}
@@ -149,20 +150,20 @@ def copy_type_sweep(state: nvbench.State):
149150
nbytes = 256 * 1024 * 1024
150151
num_values = nbytes // ctypes.sizeof(value_ctype(0))
151152

152-
state.addElementCount(num_values)
153-
state.addGlobalMemoryReads(nbytes)
154-
state.addGlobalMemoryWrites(nbytes)
153+
state.add_element_count(num_values)
154+
state.add_global_memory_reads(nbytes)
155+
state.add_global_memory_writes(nbytes)
155156

156-
dev_id = state.getDevice()
157-
alloc_s = as_core_Stream(state.getStream())
157+
dev_id = state.get_device()
158+
alloc_s = as_core_Stream(state.get_stream())
158159
input_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
159160
output_buf = core.DeviceMemoryResource(dev_id).allocate(nbytes, alloc_s)
160161

161162
krn = make_copy_kernel(value_cuda_t, value_cuda_t)
162163
launch_config = core.LaunchConfig(grid=256, block=256, shmem_size=0)
163164

164165
def launcher(launch: nvbench.Launch):
165-
s = as_core_Stream(launch.getStream())
166+
s = as_core_Stream(launch.get_stream())
166167
core.launch(s, launch_config, krn, input_buf, output_buf, num_values)
167168

168169
state.exec(launcher)
@@ -175,13 +176,15 @@ def launcher(launch: nvbench.Launch):
175176
# benchmark with no axes, that uses default value
176177
nvbench.register(default_value)
177178
# specify axis
178-
nvbench.register(single_float64_axis).addFloat64Axis("Duration", [7e-5, 1e-4, 5e-4])
179+
nvbench.register(single_float64_axis).add_float64_axis(
180+
"Duration", [7e-5, 1e-4, 5e-4]
181+
)
179182

180183
copy1_bench = nvbench.register(copy_sweep_grid_shape)
181-
copy1_bench.addInt64Axis("BlockSize", [2**x for x in range(6, 10, 2)])
182-
copy1_bench.addInt64Axis("NumBlocks", [2**x for x in range(6, 10, 2)])
184+
copy1_bench.add_int64_axis("BlockSize", [2**x for x in range(6, 10, 2)])
185+
copy1_bench.add_int64_axis("NumBlocks", [2**x for x in range(6, 10, 2)])
183186

184187
copy2_bench = nvbench.register(copy_type_sweep)
185-
copy2_bench.addInt64Axis("TypeID", range(0, 6))
188+
copy2_bench.add_int64_axis("TypeID", range(0, 6))
186189

187190
nvbench.run_all_benchmarks(sys.argv)

python/examples/cccl_parallel_segmented_reduce.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ def as_cp_ExternalStream(
3535

3636
def segmented_reduce(state: nvbench.State):
3737
"Benchmark segmented_reduce example"
38-
n_elems = state.getInt64("numElems")
39-
n_cols = state.getInt64("numCols")
38+
n_elems = state.get_int64("numElems")
39+
n_cols = state.get_int64("numCols")
4040
n_rows = n_elems // n_cols
4141

4242
state.add_summary("numRows", n_rows)
43-
state.collectCUPTIMetrics()
43+
state.collect_cupti_metrics()
4444

45-
dev_id = state.getDevice()
46-
cp_stream = as_cp_ExternalStream(state.getStream(), dev_id)
45+
dev_id = state.get_device()
46+
cp_stream = as_cp_ExternalStream(state.get_stream(), dev_id)
4747

4848
with cp_stream:
4949
rng = cp.random.default_rng()
@@ -75,20 +75,19 @@ def scale(row_id):
7575
d_input, d_output, start_offsets, end_offsets, add_op, h_init
7676
)
7777

78-
# print(1)
79-
cccl_stream = as_cccl_Stream(state.getStream())
80-
# print(2, core_stream, core_stream.__cuda_stream__())
78+
cccl_stream = as_cccl_Stream(state.get_stream())
79+
8180
# query size of temporary storage and allocate
8281
temp_nbytes = alg(
8382
None, d_input, d_output, n_rows, start_offsets, end_offsets, h_init, cccl_stream
8483
)
8584
h_init = np.zeros(tuple(), dtype=np.int32)
86-
# print(3)
85+
8786
with cp_stream:
8887
temp_storage = cp.empty(temp_nbytes, dtype=cp.uint8)
8988

9089
def launcher(launch: nvbench.Launch):
91-
s = as_cccl_Stream(launch.getStream())
90+
s = as_cccl_Stream(launch.get_stream())
9291
alg(
9392
temp_storage,
9493
d_input,
@@ -105,7 +104,7 @@ def launcher(launch: nvbench.Launch):
105104

106105
if __name__ == "__main__":
107106
b = nvbench.register(segmented_reduce)
108-
b.addInt64Axis("numElems", [2**20, 2**22, 2**24])
109-
b.addInt64Axis("numCols", [1024, 2048, 4096, 8192])
107+
b.add_int64_axis("numElems", [2**20, 2**22, 2**24])
108+
b.add_int64_axis("numCols", [1024, 2048, 4096, 8192])
110109

111110
nvbench.run_all_benchmarks(sys.argv)

python/examples/cpu_only.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ def launcher(launch: nvbench.Launch):
1313

1414
if __name__ == "__main__":
1515
b = nvbench.register(throughput_bench)
16-
b.setIsCPUOnly(True)
16+
b.set_is_cpu_only(True)
1717

1818
nvbench.run_all_benchmarks(sys.argv)

python/examples/cupy_extract.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,34 @@ def as_cp_ExternalStream(
1212

1313

1414
def cupy_extract_by_mask(state: nvbench.State):
15-
n_cols = state.getInt64("numCols")
16-
n_rows = state.getInt64("numRows")
15+
n_cols = state.get_int64("numCols")
16+
n_rows = state.get_int64("numRows")
1717

18-
dev_id = state.getDevice()
19-
cp_s = as_cp_ExternalStream(state.getStream(), dev_id)
18+
dev_id = state.get_device()
19+
cp_s = as_cp_ExternalStream(state.get_stream(), dev_id)
2020

21-
state.collectCUPTIMetrics()
22-
state.addElementCount(n_rows * n_cols, "# Elements")
23-
state.addGlobalMemoryReads(
21+
state.collect_cupti_metrics()
22+
state.add_element_count(n_rows * n_cols, "# Elements")
23+
state.add_global_memory_reads(
2424
n_rows * n_cols * (cp.dtype(cp.int32).itemsize + cp.dtype("?").itemsize)
2525
)
26-
state.addGlobalMemoryWrites(n_rows * n_cols * (cp.dtype(cp.int32).itemsize))
26+
state.add_global_memory_writes(n_rows * n_cols * (cp.dtype(cp.int32).itemsize))
2727

2828
with cp_s:
2929
X = cp.full((n_cols, n_rows), fill_value=3, dtype=cp.int32)
3030
mask = cp.ones((n_cols, n_rows), dtype="?")
3131
_ = X[mask]
3232

3333
def launcher(launch: nvbench.Launch):
34-
with as_cp_ExternalStream(launch.getStream(), dev_id):
34+
with as_cp_ExternalStream(launch.get_stream(), dev_id):
3535
_ = X[mask]
3636

3737
state.exec(launcher, sync=True)
3838

3939

4040
if __name__ == "__main__":
4141
b = nvbench.register(cupy_extract_by_mask)
42-
b.addInt64Axis("numCols", [1024, 2048, 4096, 2 * 4096])
43-
b.addInt64Axis("numRows", [1024, 2048, 4096, 2 * 4096])
42+
b.add_int64_axis("numCols", [1024, 2048, 4096, 2 * 4096])
43+
b.add_int64_axis("numRows", [1024, 2048, 4096, 2 * 4096])
4444

4545
nvbench.run_all_benchmarks(sys.argv)

python/examples/exec_tag_sync.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,17 @@ def synchronizing_bench(state: nvbench.State):
4545
n_values = 64 * 1024 * 1024
4646
n_bytes = n_values * ctypes.sizeof(ctypes.c_int32(0))
4747

48-
alloc_s = as_core_Stream(state.getStream())
49-
buffer = core.DeviceMemoryResource(state.getDevice()).allocate(n_bytes, alloc_s)
48+
alloc_s = as_core_Stream(state.get_stream())
49+
buffer = core.DeviceMemoryResource(state.get_device()).allocate(n_bytes, alloc_s)
5050

51-
state.addElementCount(n_values, "Items")
52-
state.addGlobalMemoryWrites(n_bytes, "Size")
51+
state.add_element_count(n_values, "Items")
52+
state.add_global_memory_writes(n_bytes, "Size")
5353

5454
krn = make_fill_kernel()
5555
launch_config = core.LaunchConfig(grid=256, block=256, shmem_size=0)
5656

5757
def launcher(launch: nvbench.Launch):
58-
s = as_core_Stream(launch.getStream())
58+
s = as_core_Stream(launch.get_stream())
5959
core.launch(s, launch_config, krn, buffer, 0, n_values)
6060
s.sync()
6161

python/examples/skip.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def make_sleep_kernel():
4242

4343

4444
def runtime_skip(state: nvbench.State):
45-
duration = state.getFloat64("Duration")
46-
kramble = state.getString("Kramble")
45+
duration = state.get_float64("Duration")
46+
kramble = state.get_string("Kramble")
4747

4848
# Skip Baz benchmarks with 0.8 ms duration
4949
if kramble == "Baz" and duration < 0.8e-3:
@@ -59,15 +59,15 @@ def runtime_skip(state: nvbench.State):
5959
launch_cfg = core.LaunchConfig(grid=1, block=1, shmem_size=0)
6060

6161
def launcher(launch: nvbench.Launch):
62-
s = as_core_Stream(launch.getStream())
62+
s = as_core_Stream(launch.get_stream())
6363
core.launch(s, launch_cfg, krn, duration)
6464

6565
state.exec(launcher)
6666

6767

6868
if __name__ == "__main__":
6969
b = nvbench.register(runtime_skip)
70-
b.addFloat64Axis("Duration", [1e-4 + k * 0.25e-3 for k in range(5)])
71-
b.addStringAxis("Kramble", ["Foo", "Bar", "Baz"])
70+
b.add_float64_axis("Duration", [1e-4 + k * 0.25e-3 for k in range(5)])
71+
b.add_string_axis("Kramble", ["Foo", "Bar", "Baz"])
7272

7373
nvbench.run_all_benchmarks(sys.argv)

python/examples/throughput.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,33 +41,27 @@ def kernel(stride: np.uintp, elements: np.uintp, in_arr, out_arr):
4141

4242

4343
def throughput_bench(state: nvbench.State) -> None:
44-
stride = state.getInt64("Stride")
45-
ipt = state.getInt64("ItemsPerThread")
44+
stride = state.get_int64("Stride")
45+
ipt = state.get_int64("ItemsPerThread")
4646

4747
nbytes = 128 * 1024 * 1024
4848
elements = nbytes // np.dtype(np.int32).itemsize
4949

50-
alloc_stream = as_cuda_Stream(state.getStream())
50+
alloc_stream = as_cuda_Stream(state.get_stream())
5151
inp_arr = cuda.device_array(elements, dtype=np.int32, stream=alloc_stream)
5252
out_arr = cuda.device_array(elements * ipt, dtype=np.int32, stream=alloc_stream)
5353

54-
state.addElementCount(elements, column_name="Elements")
55-
state.addGlobalMemoryReads(inp_arr.nbytes, column_name="Datasize")
56-
state.addGlobalMemoryWrites(inp_arr.nbytes)
54+
state.add_element_count(elements, column_name="Elements")
55+
state.add_global_memory_reads(inp_arr.nbytes, column_name="Datasize")
56+
state.add_global_memory_writes(inp_arr.nbytes)
5757

5858
threads_per_block = 256
5959
blocks_in_grid = (elements + threads_per_block - 1) // threads_per_block
6060

6161
krn = make_kernel(ipt)
6262

63-
# warm-up call ensures that kernel is loaded into context
64-
# before blocking kernel is launched
65-
krn[blocks_in_grid, threads_per_block, alloc_stream, 0](
66-
stride, elements, inp_arr, out_arr
67-
)
68-
6963
def launcher(launch: nvbench.Launch):
70-
exec_stream = as_cuda_Stream(launch.getStream())
64+
exec_stream = as_cuda_Stream(launch.get_stream())
7165
krn[blocks_in_grid, threads_per_block, exec_stream, 0](
7266
stride, elements, inp_arr, out_arr
7367
)
@@ -77,7 +71,7 @@ def launcher(launch: nvbench.Launch):
7771

7872
if __name__ == "__main__":
7973
b = nvbench.register(throughput_bench)
80-
b.addInt64Axis("Stride", [1, 2, 4])
81-
b.addInt64Axis("ItemsPerThread", [1, 2, 3, 4])
74+
b.add_int64_axis("Stride", [1, 2, 4])
75+
b.add_int64_axis("ItemsPerThread", [1, 2, 3, 4])
8276

8377
nvbench.run_all_benchmarks(sys.argv)

0 commit comments

Comments
 (0)