@@ -81,6 +81,9 @@ void measure_cold_base::initialize()
8181 m_total_samples = 0 ;
8282 m_max_time_exceeded = false ;
8383
84+ m_dynamic_throttle_recovery_delay = m_throttle_recovery_delay;
85+ m_throttle_discard_count = 0 ;
86+
8487 m_cuda_times.clear ();
8588 m_cpu_times.clear ();
8689
@@ -99,29 +102,41 @@ void measure_cold_base::record_measurements()
99102
100103 if (m_gpu_frequency.has_throttled (default_clock_rate, m_throttle_threshold))
101104 {
105+ if (m_throttle_discard_count > 2 )
106+ {
107+ // Throttling detected in multiple consecutive trials. The delay is not sufficient to
108+ // recover. Increase the delay by no more than half of a second:
109+ m_dynamic_throttle_recovery_delay += std::min (m_dynamic_throttle_recovery_delay * 3 .f / 2 .f ,
110+ 0 .5f );
111+ }
112+
102113 if (auto printer_opt_ref = m_state.get_benchmark ().get_printer (); printer_opt_ref.has_value ())
103114 {
104115 auto &printer = printer_opt_ref.value ().get ();
105116 printer.log (nvbench::log_level::warn,
106117 fmt::format (" GPU throttled below threshold ({:0.2f} MHz / {:0.2f} MHz) "
107- " ({:0.0f}% < {:0.0f}%) on sample {}. Discarding previous sample "
108- " and pausing for {}s." ,
118+ " ({:0.0f}% < {:0.0f}%) on sample {}. Discarding previous trial "
119+ " and pausing for {:0.3f }s." ,
109120 current_clock_rate / 1000000 .0f ,
110121 default_clock_rate / 1000000 .0f ,
111122 100 .0f * (current_clock_rate / default_clock_rate),
112123 100 .0f * m_throttle_threshold,
113124 m_total_samples,
114- m_throttle_recovery_delay ));
125+ m_dynamic_throttle_recovery_delay ));
115126 }
116127
117- if (m_throttle_recovery_delay > 0 .0f )
128+ if (m_dynamic_throttle_recovery_delay > 0 .0f )
118129 { // let the GPU cool down
119- std::this_thread::sleep_for (std::chrono::duration<float >(m_throttle_recovery_delay));
130+ std::this_thread::sleep_for (
131+ std::chrono::duration<float >(m_dynamic_throttle_recovery_delay));
120132 }
121133
134+ m_throttle_discard_count += 1 ;
135+
122136 // ignore this measurement
123137 return ;
124138 }
139+ m_throttle_discard_count = 0 ;
125140
126141 m_sm_clock_rate_accumulator += current_clock_rate;
127142 }
0 commit comments