@@ -53,7 +53,6 @@ measure_cold_base::measure_cold_base(state &exec_state)
5353 {
5454 m_cuda_times.reserve (static_cast <std::size_t >(m_min_samples));
5555 m_cpu_times.reserve (static_cast <std::size_t >(m_min_samples));
56- m_sm_clock_rates.reserve (static_cast <std::size_t >(m_min_samples));
5756 }
5857}
5958
@@ -72,18 +71,18 @@ void measure_cold_base::check()
7271
7372void measure_cold_base::initialize ()
7473{
75- m_min_cuda_time = std::numeric_limits<nvbench::float64_t >::max ();
76- m_max_cuda_time = std::numeric_limits<nvbench::float64_t >::lowest ();
77- m_total_cuda_time = 0 .;
78- m_min_cpu_time = std::numeric_limits<nvbench::float64_t >::max ();
79- m_max_cpu_time = std::numeric_limits<nvbench::float64_t >::lowest ();
80- m_total_cpu_time = 0 .;
81- m_total_samples = 0 ;
82- m_max_time_exceeded = false ;
74+ m_min_cuda_time = std::numeric_limits<nvbench::float64_t >::max ();
75+ m_max_cuda_time = std::numeric_limits<nvbench::float64_t >::lowest ();
76+ m_total_cuda_time = 0 .;
77+ m_min_cpu_time = std::numeric_limits<nvbench::float64_t >::max ();
78+ m_max_cpu_time = std::numeric_limits<nvbench::float64_t >::lowest ();
79+ m_total_cpu_time = 0 .;
80+ m_sm_clock_rate_accumulator = 0 .;
81+ m_total_samples = 0 ;
82+ m_max_time_exceeded = false ;
8383
8484 m_cuda_times.clear ();
8585 m_cpu_times.clear ();
86- m_sm_clock_rates.clear ();
8786
8887 m_stopping_criterion.initialize (m_criterion_params);
8988}
@@ -94,21 +93,22 @@ void measure_cold_base::record_measurements()
9493{
9594 if (!m_run_once)
9695 {
97- auto peak_clock_rate = static_cast <float >(m_state.get_device ()->get_sm_default_clock_rate ());
96+ const auto current_clock_rate = m_gpu_frequency.get_clock_frequency ();
97+ const auto default_clock_rate =
98+ static_cast <float >(m_state.get_device ()->get_sm_default_clock_rate ());
9899
99- if (m_gpu_frequency.has_throttled (peak_clock_rate , m_throttle_threshold))
100+ if (m_gpu_frequency.has_throttled (default_clock_rate , m_throttle_threshold))
100101 {
101102 if (auto printer_opt_ref = m_state.get_benchmark ().get_printer (); printer_opt_ref.has_value ())
102103 {
103- auto current_clock_rate = m_gpu_frequency.get_clock_frequency ();
104- auto &printer = printer_opt_ref.value ().get ();
104+ auto &printer = printer_opt_ref.value ().get ();
105105 printer.log (nvbench::log_level::warn,
106106 fmt::format (" GPU throttled below threshold ({:0.2f} MHz / {:0.2f} MHz) "
107107 " ({:0.0f}% < {:0.0f}%) on sample {}. Discarding previous sample "
108108 " and pausing for {}s." ,
109109 current_clock_rate / 1000000 .0f ,
110- peak_clock_rate / 1000000 .0f ,
111- 100 .0f * (current_clock_rate / peak_clock_rate ),
110+ default_clock_rate / 1000000 .0f ,
111+ 100 .0f * (current_clock_rate / default_clock_rate ),
112112 100 .0f * m_throttle_threshold,
113113 m_total_samples,
114114 m_throttle_recovery_delay));
@@ -123,7 +123,7 @@ void measure_cold_base::record_measurements()
123123 return ;
124124 }
125125
126- m_sm_clock_rates. push_back (peak_clock_rate) ;
126+ m_sm_clock_rate_accumulator += current_clock_rate ;
127127 }
128128
129129 // Update and record timers and counters:
@@ -338,16 +338,30 @@ void measure_cold_base::generate_summaries()
338338 summ.set_string (" hide" , " Hidden by default." );
339339 }
340340
341- if (!m_sm_clock_rates. empty () )
341+ if (m_sm_clock_rate_accumulator != 0 . )
342342 {
343- auto &summ = m_state.add_summary (" nv/cold/sm_clock_rate/mean" );
344- summ.set_string (" name" , " Clock Rate" );
345- summ.set_string (" hint" , " frequency" );
346- summ.set_string (" description" , " Mean SM clock rate" );
347- summ.set_string (" hide" , " Hidden by default." );
348- summ.set_float64 (" value" ,
349- nvbench::detail::statistics::compute_mean (m_sm_clock_rates.cbegin (),
350- m_sm_clock_rates.cend ()));
343+ const auto clock_mean = m_sm_clock_rate_accumulator / d_samples;
344+
345+ {
346+ auto &summ = m_state.add_summary (" nv/cold/sm_clock_rate/mean" );
347+ summ.set_string (" name" , " Clock Rate" );
348+ summ.set_string (" hint" , " frequency" );
349+ summ.set_string (" description" , " Mean SM clock rate" );
350+ summ.set_string (" hide" , " Hidden by default." );
351+ summ.set_float64 (" value" , clock_mean);
352+ }
353+
354+ {
355+ const auto default_clock_rate =
356+ static_cast <nvbench::float64_t >(m_state.get_device ()->get_sm_default_clock_rate ());
357+
358+ auto &summ = m_state.add_summary (" nv/cold/sm_clock_rate/scaling/percent" );
359+ summ.set_string (" name" , " Clock Scaling" );
360+ summ.set_string (" hint" , " percentage" );
361+ summ.set_string (" description" , " Mean SM clock rate as a percentage of default clock rate." );
362+ summ.set_string (" hide" , " Hidden by default." );
363+ summ.set_float64 (" value" , clock_mean / default_clock_rate);
364+ }
351365 }
352366
353367 // Log if a printer exists:
0 commit comments