Skip to content

Commit ae6728f

Browse files
Add example for benchmarking CuPy function
1 parent 4772e9b commit ae6728f

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

python/examples/cupy_extract.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import sys
2+
3+
import cuda.nvbench as nvbench
4+
import cupy as cp
5+
6+
7+
def as_cp_ExternalStream(
8+
cs: nvbench.CudaStream, dev_id: int = -1
9+
) -> cp.cuda.ExternalStream:
10+
h = cs.addressof()
11+
return cp.cuda.ExternalStream(h, dev_id)
12+
13+
14+
def cupy_extract_by_mask(state: nvbench.State):
15+
n_cols = state.getInt64("numCols")
16+
n_rows = state.getInt64("numRows")
17+
18+
dev_id = state.getDevice()
19+
cp_s = as_cp_ExternalStream(state.getStream(), dev_id)
20+
21+
state.collectCUPTIMetrics()
22+
state.addElementCount(n_rows * n_cols, "# Elements")
23+
state.addGlobalMemoryReads(
24+
n_rows * n_cols * (cp.dtype(cp.int32).itemsize + cp.dtype("?").itemsize)
25+
)
26+
state.addGlobalMemoryWrites(n_rows * n_cols * (cp.dtype(cp.int32).itemsize))
27+
28+
with cp_s:
29+
X = cp.full((n_cols, n_rows), fill_value=3, dtype=cp.int32)
30+
mask = cp.ones((n_cols, n_rows), dtype="?")
31+
_ = X[mask]
32+
33+
def launcher(launch: nvbench.Launch):
34+
with as_cp_ExternalStream(launch.getStream(), dev_id):
35+
_ = X[mask]
36+
37+
state.exec(launcher, sync=True)
38+
39+
40+
if __name__ == "__main__":
41+
b = nvbench.register(cupy_extract_by_mask)
42+
b.addInt64Axis("numCols", [1024, 2048, 4096, 2 * 4096])
43+
b.addInt64Axis("numRows", [1024, 2048, 4096, 2 * 4096])
44+
45+
nvbench.run_all_benchmarks(sys.argv)

0 commit comments

Comments
 (0)