@@ -45,6 +45,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
4545{
4646 using KL = typename std::remove_reference<KernelLauncher>::type;
4747 using namespace nvbench ::exec_tag::impl;
48+
4849 static_assert (is_exec_tag_v<ExecTags>,
4950 " `ExecTags` argument must be a member (or combination of members) from "
5051 " `nvbench::exec_tag`." );
@@ -55,30 +56,6 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
5556 constexpr auto modifier_tags = tags & modifier_mask;
5657 constexpr auto measure_tags = tags & measure_mask;
5758
58- if ((modifier_tags & no_gpu) && !this ->get_is_cpu_only ())
59- {
60- throw std::runtime_error (" The `nvbench::exec_tag::no_gpu` tag requires that "
61- " `set_is_cpu_only(true)` is called when defining the benchmark." );
62- }
63-
64- if ((modifier_tags & gpu) && this ->get_is_cpu_only ())
65- {
66- throw std::runtime_error (" The `nvbench::exec_tag::gpu` tag requires that "
67- " `set_is_cpu_only(true)` is NOT called when defining the benchmark." );
68- }
69-
70- // "run once" should disable batch measurements:
71- // TODO This should just be a runtime branch in measure_cold. Currently this causes two versions
72- // of measure_cold to be compiled. We don't expose the `run_once` tag to users, it should be
73- // removed.
74- // TODO CPU measurements should support run_once as well.
75- if (!(modifier_tags & run_once) && this ->get_run_once ())
76- {
77- constexpr auto run_once_tags = modifier_tags | run_once | (measure_tags & ~hot);
78- this ->exec (run_once_tags, std::forward<KernelLauncher>(kernel_launcher));
79- return ;
80- }
81-
8259 // If no measurements selected, pick some defaults based on the modifiers:
8360 if constexpr (!measure_tags)
8461 {
@@ -112,6 +89,18 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
11289 return ;
11390 }
11491
92+ if ((modifier_tags & no_gpu) && !this ->get_is_cpu_only ())
93+ {
94+ throw std::runtime_error (" The `nvbench::exec_tag::no_gpu` tag requires that "
95+ " `set_is_cpu_only(true)` is called when defining the benchmark." );
96+ }
97+
98+ if ((modifier_tags & gpu) && this ->get_is_cpu_only ())
99+ {
100+ throw std::runtime_error (" The `nvbench::exec_tag::gpu` tag requires that "
101+ " `set_is_cpu_only(true)` is NOT called when defining the benchmark." );
102+ }
103+
115104 // Syncing will cause the blocking kernel pattern to deadlock:
116105 if constexpr (modifier_tags & sync)
117106 {
@@ -154,16 +143,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
154143
155144 if constexpr (tags & timer)
156145 {
157- // Estimate bandwidth here
158146#ifdef NVBENCH_HAS_CUPTI
159- if constexpr (!(modifier_tags & run_once ))
147+ if ( this -> is_cupti_required () && ! this -> get_run_once ( ))
160148 {
161- if (this ->is_cupti_required ())
162- {
163- using measure_t = nvbench::detail::measure_cupti<KL>;
164- measure_t measure{*this , kernel_launcher};
165- measure ();
166- }
149+ using measure_t = nvbench::detail::measure_cupti<KL>;
150+ measure_t measure{*this , kernel_launcher};
151+ measure ();
167152 }
168153#endif
169154
@@ -176,16 +161,12 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
176161 using wrapper_t = nvbench::detail::kernel_launch_timer_wrapper<KL>;
177162 wrapper_t wrapper{kernel_launcher};
178163
179- // Estimate bandwidth here
180164#ifdef NVBENCH_HAS_CUPTI
181- if constexpr (!(modifier_tags & run_once ))
165+ if ( this -> is_cupti_required () && ! this -> get_run_once ( ))
182166 {
183- if (this ->is_cupti_required ())
184- {
185- using measure_t = nvbench::detail::measure_cupti<wrapper_t >;
186- measure_t measure{*this , wrapper};
187- measure ();
188- }
167+ using measure_t = nvbench::detail::measure_cupti<wrapper_t >;
168+ measure_t measure{*this , wrapper};
169+ measure ();
189170 }
190171#endif
191172
@@ -201,9 +182,13 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
201182 static_assert (!(tags & timer), " Hot measurement doesn't support the `timer` exec_tag." );
202183 static_assert (!(tags & no_batch), " Hot measurement doesn't support the `no_batch` exec_tag." );
203184 static_assert (!(tags & no_gpu), " Hot measurement doesn't support the `no_gpu` exec_tag." );
204- using measure_t = nvbench::detail::measure_hot<KL>;
205- measure_t measure{*this , kernel_launcher};
206- measure ();
185+
186+ if (!this ->get_run_once ())
187+ {
188+ using measure_t = nvbench::detail::measure_hot<KL>;
189+ measure_t measure{*this , kernel_launcher};
190+ measure ();
191+ }
207192 }
208193 }
209194}
0 commit comments