Skip to content

Commit e405757

Browse files
committed
Disable throttling when sync exec tag is used.
1 parent 0573ffa commit e405757

File tree

4 files changed

+12
-5
lines changed

4 files changed

+12
-5
lines changed

docs/cli_help.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,16 @@
135135

136136
* `--throttle-threshold <value>`
137137
* Set the GPU throttle threshold as percentage of the device's default clock rate.
138-
* Default is 75%.
138+
* Default is 75.
139+
* Set to 0 to disable throttle detection entirely.
140+
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
139141
* Applies to the most recent `--benchmark`, or all benchmarks if specified
140142
before any `--benchmark` arguments.
141143

142144
* `--throttle-recovery-delay <value>`
143145
* Set the GPU throttle recovery delay in seconds.
144146
* Default is 0.05 seconds.
147+
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
145148
* Applies to the most recent `--benchmark`, or all benchmarks if specified
146149
before any `--benchmark` arguments.
147150

nvbench/detail/measure_cold.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ measure_cold_base::measure_cold_base(state &exec_state)
4343
exec_state.get_stopping_criterion())}
4444
, m_disable_blocking_kernel{exec_state.get_disable_blocking_kernel()}
4545
, m_run_once{exec_state.get_run_once()}
46+
, m_check_throttling(!exec_state.get_run_once() && exec_state.get_throttle_threshold() > 0.f)
4647
, m_min_samples{exec_state.get_min_samples()}
4748
, m_skip_time{exec_state.get_skip_time()}
4849
, m_timeout{exec_state.get_timeout()}
@@ -94,7 +95,7 @@ void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); }
9495

9596
void measure_cold_base::record_measurements()
9697
{
97-
if (!m_run_once)
98+
if (m_check_throttling)
9899
{
99100
const auto current_clock_rate = m_gpu_frequency.get_clock_frequency();
100101
const auto default_clock_rate =

nvbench/detail/measure_cold.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ protected:
9595

9696
bool m_disable_blocking_kernel{false};
9797
bool m_run_once{false};
98+
bool m_check_throttling;
9899

99100
nvbench::int64_t m_min_samples{};
100101

@@ -142,7 +143,7 @@ struct measure_cold_base::kernel_launch_timer
142143
{
143144
m_measure.block_stream();
144145
}
145-
if (!m_measure.m_run_once)
146+
if (m_measure.m_check_throttling)
146147
{
147148
m_measure.gpu_frequency_start();
148149
}
@@ -161,7 +162,7 @@ struct measure_cold_base::kernel_launch_timer
161162
m_measure.m_cpu_timer.start();
162163
m_measure.unblock_stream();
163164
}
164-
if (!m_measure.m_run_once)
165+
if (m_measure.m_check_throttling)
165166
{
166167
m_measure.gpu_frequency_stop();
167168
}

nvbench/detail/state_exec.cuh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
101101
"`set_is_cpu_only(true)` is NOT called when defining the benchmark.");
102102
}
103103

104-
// Syncing will cause the blocking kernel pattern to deadlock:
105104
if constexpr (modifier_tags & sync)
106105
{
106+
// Syncing will cause the blocking kernel pattern to deadlock:
107107
this->set_disable_blocking_kernel(true);
108+
// Syncing will cause the throttle frequency measurements to be skewed heavily:
109+
this->set_throttle_threshold(0.f);
108110
}
109111

110112
if (this->is_skipped())

0 commit comments

Comments
 (0)