Skip to content

Commit 3536061

Browse files
committed
Remove run_once exec_tag.
Similar to `no_block`, this is a runtime variable that doesn't need to be encoded statically. It was not exposed publicly and existing solely as an implementation detail of `state::exec`, introducing unnecessary complexity there.
1 parent 851d7aa commit 3536061

File tree

2 files changed

+31
-49
lines changed

2 files changed

+31
-49
lines changed

nvbench/detail/state_exec.cuh

Lines changed: 28 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
4545
{
4646
using KL = typename std::remove_reference<KernelLauncher>::type;
4747
using namespace nvbench::exec_tag::impl;
48+
4849
static_assert(is_exec_tag_v<ExecTags>,
4950
"`ExecTags` argument must be a member (or combination of members) from "
5051
"`nvbench::exec_tag`.");
@@ -55,30 +56,6 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
5556
constexpr auto modifier_tags = tags & modifier_mask;
5657
constexpr auto measure_tags = tags & measure_mask;
5758

58-
if ((modifier_tags & no_gpu) && !this->get_is_cpu_only())
59-
{
60-
throw std::runtime_error("The `nvbench::exec_tag::no_gpu` tag requires that "
61-
"`set_is_cpu_only(true)` is called when defining the benchmark.");
62-
}
63-
64-
if ((modifier_tags & gpu) && this->get_is_cpu_only())
65-
{
66-
throw std::runtime_error("The `nvbench::exec_tag::gpu` tag requires that "
67-
"`set_is_cpu_only(true)` is NOT called when defining the benchmark.");
68-
}
69-
70-
// "run once" should disable batch measurements:
71-
// TODO This should just be a runtime branch in measure_cold. Currently this causes two versions
72-
// of measure_cold to be compiled. We don't expose the `run_once` tag to users, it should be
73-
// removed.
74-
// TODO CPU measurements should support run_once as well.
75-
if (!(modifier_tags & run_once) && this->get_run_once())
76-
{
77-
constexpr auto run_once_tags = modifier_tags | run_once | (measure_tags & ~hot);
78-
this->exec(run_once_tags, std::forward<KernelLauncher>(kernel_launcher));
79-
return;
80-
}
81-
8259
// If no measurements selected, pick some defaults based on the modifiers:
8360
if constexpr (!measure_tags)
8461
{
@@ -112,6 +89,18 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
11289
return;
11390
}
11491

92+
if ((modifier_tags & no_gpu) && !this->get_is_cpu_only())
93+
{
94+
throw std::runtime_error("The `nvbench::exec_tag::no_gpu` tag requires that "
95+
"`set_is_cpu_only(true)` is called when defining the benchmark.");
96+
}
97+
98+
if ((modifier_tags & gpu) && this->get_is_cpu_only())
99+
{
100+
throw std::runtime_error("The `nvbench::exec_tag::gpu` tag requires that "
101+
"`set_is_cpu_only(true)` is NOT called when defining the benchmark.");
102+
}
103+
115104
// Syncing will cause the blocking kernel pattern to deadlock:
116105
if constexpr (modifier_tags & sync)
117106
{
@@ -154,16 +143,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
154143

155144
if constexpr (tags & timer)
156145
{
157-
// Estimate bandwidth here
158146
#ifdef NVBENCH_HAS_CUPTI
159-
if constexpr (!(modifier_tags & run_once))
147+
if (this->is_cupti_required() && !this->get_run_once())
160148
{
161-
if (this->is_cupti_required())
162-
{
163-
using measure_t = nvbench::detail::measure_cupti<KL>;
164-
measure_t measure{*this, kernel_launcher};
165-
measure();
166-
}
149+
using measure_t = nvbench::detail::measure_cupti<KL>;
150+
measure_t measure{*this, kernel_launcher};
151+
measure();
167152
}
168153
#endif
169154

@@ -176,16 +161,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
176161
using wrapper_t = nvbench::detail::kernel_launch_timer_wrapper<KL>;
177162
wrapper_t wrapper{kernel_launcher};
178163

179-
// Estimate bandwidth here
180164
#ifdef NVBENCH_HAS_CUPTI
181-
if constexpr (!(modifier_tags & run_once))
165+
if (this->is_cupti_required() && !this->get_run_once())
182166
{
183-
if (this->is_cupti_required())
184-
{
185-
using measure_t = nvbench::detail::measure_cupti<wrapper_t>;
186-
measure_t measure{*this, wrapper};
187-
measure();
188-
}
167+
using measure_t = nvbench::detail::measure_cupti<wrapper_t>;
168+
measure_t measure{*this, wrapper};
169+
measure();
189170
}
190171
#endif
191172

@@ -201,9 +182,13 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
201182
static_assert(!(tags & timer), "Hot measurement doesn't support the `timer` exec_tag.");
202183
static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag.");
203184
static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag.");
204-
using measure_t = nvbench::detail::measure_hot<KL>;
205-
measure_t measure{*this, kernel_launcher};
206-
measure();
185+
186+
if (!this->get_run_once())
187+
{
188+
using measure_t = nvbench::detail::measure_hot<KL>;
189+
measure_t measure{*this, kernel_launcher};
190+
measure();
191+
}
207192
}
208193
}
209194
}

nvbench/exec_tag.cuh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@ enum class exec_flag
3333
// Modifiers:
3434
timer = 0x01, // KernelLauncher uses manual timing
3535
sync = 0x02, // KernelLauncher has indicated that it will sync
36-
run_once = 0x04, // Only run the benchmark once (for profiling).
37-
gpu = 0x08, // Don't instantiate `measure_cpu_only`.
38-
no_gpu = 0x10, // No GPU measurements should be instantiated.
39-
no_batch = 0x20, // `measure_hot` will not be used.
36+
gpu = 0x04, // Don't instantiate `measure_cpu_only`.
37+
no_gpu = 0x08, // No GPU measurements should be instantiated.
38+
no_batch = 0x10, // `measure_hot` will not be used.
4039
modifier_mask = 0xFF,
4140

4241
// Measurement types to instantiate. Derived from modifiers.
@@ -97,7 +96,6 @@ struct tag
9796
using none_t = tag<nvbench::detail::exec_flag::none>;
9897
using timer_t = tag<nvbench::detail::exec_flag::timer>;
9998
using sync_t = tag<nvbench::detail::exec_flag::sync>;
100-
using run_once_t = tag<nvbench::detail::exec_flag::run_once>;
10199
using gpu_t = tag<nvbench::detail::exec_flag::gpu>;
102100
using no_gpu_t = tag<nvbench::detail::exec_flag::no_gpu>;
103101
using no_batch_t = tag<nvbench::detail::exec_flag::no_batch>;
@@ -111,7 +109,6 @@ using measure_mask_t = tag<nvbench::detail::exec_flag::measure_mask>;
111109
constexpr inline none_t none;
112110
constexpr inline timer_t timer;
113111
constexpr inline sync_t sync;
114-
constexpr inline run_once_t run_once;
115112
constexpr inline gpu_t gpu;
116113
constexpr inline no_gpu_t no_gpu;
117114
constexpr inline no_batch_t no_batch;

0 commit comments

Comments
 (0)