Skip to content

Commit 7ea9e59

Browse files
cpu_only -> cpu_activity
Change example to illustrate timing CPU work. First example does only CPU work (sleeps), use CPU-only timer. Second examples does both CPU and GPU work (sleeps in either case). Use cold-run timer with/without sync tag to measure both CPU and GPU times.
1 parent d020fac commit 7ea9e59

File tree

2 files changed

+81
-34
lines changed

2 files changed

+81
-34
lines changed

python/examples/cpu_activity.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import sys
2+
import time
3+
4+
import cuda.cccl.headers as headers
5+
import cuda.core.experimental as core
6+
import cuda.nvbench as nvbench
7+
8+
host_sleep_duration = 0.1
9+
10+
11+
def cpu_only_sleep_bench(state: nvbench.State) -> None:
12+
def launcher(launch: nvbench.Launch):
13+
time.sleep(host_sleep_duration)
14+
15+
state.exec(launcher)
16+
17+
18+
def as_core_Stream(cs: nvbench.CudaStream) -> core.Stream:
19+
return core.Stream.from_handle(cs.addressof())
20+
21+
22+
def make_sleep_kernel():
23+
"""JITs sleep_kernel(seconds)"""
24+
src = r"""
25+
#include <cuda/std/cstdint>
26+
#include <cuda/std/chrono>
27+
28+
// Each launched thread just sleeps for `seconds`.
29+
__global__ void sleep_kernel(double seconds) {
30+
namespace chrono = ::cuda::std::chrono;
31+
using hr_clock = chrono::high_resolution_clock;
32+
33+
auto duration = static_cast<cuda::std::int64_t>(seconds * 1e9);
34+
const auto ns = chrono::nanoseconds(duration);
35+
36+
const auto start = hr_clock::now();
37+
const auto finish = start + ns;
38+
39+
auto now = hr_clock::now();
40+
while (now < finish)
41+
{
42+
now = hr_clock::now();
43+
}
44+
}
45+
"""
46+
incl = headers.get_include_paths()
47+
opts = core.ProgramOptions(include_path=str(incl.libcudacxx))
48+
prog = core.Program(src, code_type="c++", options=opts)
49+
mod = prog.compile("cubin", name_expressions=("sleep_kernel",))
50+
return mod.get_kernel("sleep_kernel")
51+
52+
53+
def mixed_sleep_bench(state: nvbench.State) -> None:
54+
sync = state.get_string("Sync")
55+
sync_flag = sync == "Do sync"
56+
57+
gpu_sleep_dur = 225e-3
58+
krn = make_sleep_kernel()
59+
launch_config = core.LaunchConfig(grid=1, block=1, shmem_size=0)
60+
61+
def launcher(launch: nvbench.Launch):
62+
# host overhead
63+
time.sleep(host_sleep_duration)
64+
# GPU computation
65+
s = as_core_Stream(launch.get_stream())
66+
core.launch(s, launch_config, krn, gpu_sleep_dur)
67+
68+
state.exec(launcher, sync=sync_flag)
69+
70+
71+
if __name__ == "__main__":
72+
# time function only doing work (sleeping) on the host
73+
# using CPU timer only
74+
b = nvbench.register(cpu_only_sleep_bench)
75+
b.set_is_cpu_only(True)
76+
77+
# time the function that does work on both GPU and CPU
78+
b2 = nvbench.register(mixed_sleep_bench)
79+
b2.add_string_axis("Sync", ["Do not sync", "Do sync"])
80+
81+
nvbench.run_all_benchmarks(sys.argv)

python/examples/cpu_only.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)