Skip to content

Commit 2ba2d11

Browse files
committed
Report mean SM clock rate
1 parent f29f7ac commit 2ba2d11

File tree

4 files changed

+42
-0
lines changed

4 files changed

+42
-0
lines changed

nvbench/detail/measure_cold.cu

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
5454
{
5555
m_cuda_times.reserve(static_cast<std::size_t>(m_min_samples));
5656
m_cpu_times.reserve(static_cast<std::size_t>(m_min_samples));
57+
m_sm_clock_rates.reserve(static_cast<std::size_t>(m_min_samples));
5758
}
5859
}
5960

@@ -83,6 +84,7 @@ void measure_cold_base::initialize()
8384

8485
m_cuda_times.clear();
8586
m_cpu_times.clear();
87+
m_sm_clock_rates.clear();
8688

8789
m_stopping_criterion.initialize(m_criterion_params);
8890
}
@@ -94,6 +96,7 @@ void measure_cold_base::record_measurements()
9496
if (!m_run_once)
9597
{
9698
auto peak_clock_rate = static_cast<float>(m_state.get_device()->get_sm_default_clock_rate());
99+
m_sm_clock_rates.push_back(peak_clock_rate);
97100

98101
if (m_gpu_frequency.has_throttled(peak_clock_rate, m_throttle_threshold))
99102
{
@@ -338,6 +341,17 @@ void measure_cold_base::generate_summaries()
338341
summ.set_string("hide", "Hidden by default.");
339342
}
340343

344+
if (!m_sm_clock_rates.empty())
345+
{
346+
auto &summ = m_state.add_summary("nv/cold/sm_clock_rate/mean");
347+
summ.set_string("name", "Clock Rate");
348+
summ.set_string("hint", "frequency");
349+
summ.set_string("description", "Mean SM clock rate");
350+
summ.set_string("hide", "Hidden by default.");
351+
summ.set_float64("value", nvbench::detail::statistics::compute_mean(m_sm_clock_rates.cbegin(),
352+
m_sm_clock_rates.cend()));
353+
}
354+
341355
// Log if a printer exists:
342356
if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value())
343357
{

nvbench/detail/measure_cold.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
#include <utility>
3838
#include <vector>
39+
#include "nvbench/types.cuh"
3940

4041
namespace nvbench
4142
{
@@ -117,6 +118,7 @@ protected:
117118

118119
std::vector<nvbench::float64_t> m_cuda_times;
119120
std::vector<nvbench::float64_t> m_cpu_times;
121+
std::vector<nvbench::float32_t> m_sm_clock_rates;
120122

121123
bool m_max_time_exceeded{};
122124
};

nvbench/markdown_printer.cu

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ void markdown_printer::do_print_benchmark_results(const printer_base::benchmark_
294294
{
295295
table.add_cell(row, tag, header, this->do_format_item_rate(summ));
296296
}
297+
else if (hint == "frequency")
298+
{
299+
table.add_cell(row, tag, header, this->do_format_frequency(summ));
300+
}
297301
else if (hint == "bytes")
298302
{
299303
table.add_cell(row, tag, header, this->do_format_bytes(summ));
@@ -399,6 +403,27 @@ std::string markdown_printer::do_format_item_rate(const summary &data)
399403
}
400404
}
401405

406+
std::string markdown_printer::do_format_frequency(const nvbench::summary &data)
407+
{
408+
const auto frequency_hz = data.get_float64("value");
409+
if (frequency_hz >= 1e9)
410+
{
411+
return fmt::format("{:0.3f} GHz", frequency_hz * 1e-9);
412+
}
413+
else if (frequency_hz >= 1e6)
414+
{
415+
return fmt::format("{:0.3f} MHz", frequency_hz * 1e-6);
416+
}
417+
else if (frequency_hz >= 1e3)
418+
{
419+
return fmt::format("{:0.3f} KHz", frequency_hz * 1e-3);
420+
}
421+
else
422+
{
423+
return fmt::format("{:0.3f} Hz", frequency_hz);
424+
}
425+
}
426+
402427
std::string markdown_printer::do_format_bytes(const summary &data)
403428
{
404429
const auto bytes = static_cast<nvbench::float64_t>(data.get_int64("value"));

nvbench/markdown_printer.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ protected:
6464
virtual std::string do_format_default(const nvbench::summary &data);
6565
virtual std::string do_format_duration(const nvbench::summary &seconds);
6666
virtual std::string do_format_item_rate(const nvbench::summary &items_per_sec);
67+
virtual std::string do_format_frequency(const nvbench::summary &frequency_hz);
6768
virtual std::string do_format_bytes(const nvbench::summary &bytes);
6869
virtual std::string do_format_byte_rate(const nvbench::summary &bytes_per_sec);
6970
virtual std::string do_format_sample_size(const nvbench::summary &count);

0 commit comments

Comments
 (0)