Skip to content

Commit 3de9dc9

Browse files
Merge pull request #250 from oleksandr-pavlyk/measure-cold-with-blocking-kernel-to-start-cpu-timer-in-kernel-timer-start
Include host work of benched fn in CPU time when using blocking kernel
2 parents e5a04c8 + 2ab76a8 commit 3de9dc9

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

nvbench/detail/measure_cold.cuh

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,16 @@ struct measure_cold_base::kernel_launch_timer
153153
m_measure.gpu_frequency_start();
154154
}
155155
m_measure.m_cuda_timer.start(m_measure.m_launch.get_stream());
156-
if (m_disable_blocking_kernel)
157-
{
158-
m_measure.m_cpu_timer.start();
159-
}
156+
// start CPU timer irrespective of use of blocking kernel
157+
// Ref: https://github.com/NVIDIA/nvbench/issues/249
158+
m_measure.m_cpu_timer.start();
160159
}
161160

162161
__forceinline__ void stop()
163162
{
164163
m_measure.m_cuda_timer.stop(m_measure.m_launch.get_stream());
165164
if (!m_disable_blocking_kernel)
166165
{
167-
m_measure.m_cpu_timer.start();
168166
m_measure.unblock_stream();
169167
}
170168
if (m_measure.m_check_throttling)

0 commit comments

Comments
 (0)