3030#include < nvbench/detail/kernel_launcher_timer_wrapper.cuh>
3131#include < nvbench/detail/l2flush.cuh>
3232#include < nvbench/detail/statistics.cuh>
33+ #include < nvbench/detail/gpu_frequency.cuh>
3334
3435#include < cuda_runtime.h>
3536
@@ -64,6 +65,8 @@ protected:
6465 bool is_finished ();
6566 void run_trials_epilogue ();
6667 void generate_summaries ();
68+ void gpu_frequency_start () { m_gpu_frequency.start (m_launch.get_stream ()); }
69+ void gpu_frequency_stop () { m_gpu_frequency.stop (m_launch.get_stream ()); }
6770
6871 void check_skip_time (nvbench::float64_t warmup_time);
6972
@@ -88,6 +91,7 @@ protected:
8891
8992 nvbench::criterion_params m_criterion_params;
9093 nvbench::stopping_criterion_base& m_stopping_criterion;
94+ nvbench::detail::gpu_frequency m_gpu_frequency;
9195
9296 bool m_disable_blocking_kernel{false };
9397 bool m_run_once{false };
@@ -97,6 +101,10 @@ protected:
97101 nvbench::float64_t m_skip_time{};
98102 nvbench::float64_t m_timeout{};
99103
104+ nvbench::float32_t m_throttle_threshold{0 .75f }; // [% of peak SM clock rate]
105+ nvbench::float32_t m_throttle_recovery_delay{0 .0f }; // [seconds]
106+ bool m_discard_on_throttle{false };
107+
100108 nvbench::int64_t m_total_samples{};
101109
102110 nvbench::float64_t m_min_cuda_time{};
@@ -128,6 +136,10 @@ struct measure_cold_base::kernel_launch_timer
128136 {
129137 m_measure.block_stream ();
130138 }
139+ if (!m_measure.m_run_once )
140+ {
141+ m_measure.gpu_frequency_start ();
142+ }
131143 m_measure.m_cuda_timer .start (m_measure.m_launch .get_stream ());
132144 if (m_disable_blocking_kernel)
133145 {
@@ -143,6 +155,10 @@ struct measure_cold_base::kernel_launch_timer
143155 m_measure.m_cpu_timer .start ();
144156 m_measure.unblock_stream ();
145157 }
158+ if (!m_measure.m_run_once )
159+ {
160+ m_measure.gpu_frequency_stop ();
161+ }
146162 m_measure.sync_stream ();
147163 m_measure.m_cpu_timer .stop ();
148164 }
0 commit comments