@@ -45,6 +45,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
4545{
4646 using KL = typename std::remove_reference<KernelLauncher>::type;
4747 using namespace nvbench ::exec_tag::impl;
48+
4849 static_assert (is_exec_tag_v<ExecTags>,
4950 " `ExecTags` argument must be a member (or combination of members) from "
5051 " `nvbench::exec_tag`." );
@@ -55,41 +56,6 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
5556 constexpr auto modifier_tags = tags & modifier_mask;
5657 constexpr auto measure_tags = tags & measure_mask;
5758
58- if ((modifier_tags & no_gpu) && !this ->get_is_cpu_only ())
59- {
60- throw std::runtime_error (" The `nvbench::exec_tag::no_gpu` tag requires that "
61- " `set_is_cpu_only(true)` is called when defining the benchmark." );
62- }
63-
64- if ((modifier_tags & gpu) && this ->get_is_cpu_only ())
65- {
66- throw std::runtime_error (" The `nvbench::exec_tag::gpu` tag requires that "
67- " `set_is_cpu_only(true)` is NOT called when defining the benchmark." );
68- }
69-
70- // "run once" should disable batch measurements:
71- // TODO This should just be a runtime branch in measure_cold. Currently this causes two versions
72- // of measure_cold to be compiled. We don't expose the `run_once` tag to users, it should be
73- // removed.
74- // TODO CPU measurements should support run_once as well.
75- if (!(modifier_tags & run_once) && this ->get_run_once ())
76- {
77- constexpr auto run_once_tags = modifier_tags | run_once | (measure_tags & ~hot);
78- this ->exec (run_once_tags, std::forward<KernelLauncher>(kernel_launcher));
79- return ;
80- }
81-
82- // TODO The `no_block` tag should be removed and replaced with a runtime branch in measure_cold
83- // and measure_hot. Currently this causes unnecesaary codegen. Note that the `sync` exec_tag
84- // implies `no_block` when refactoring.
85- if (!(measure_tags & cpu_only) && !(modifier_tags & no_block) &&
86- this ->get_disable_blocking_kernel ())
87- {
88- constexpr auto no_block_tags = tags | no_block;
89- this ->exec (no_block_tags, std::forward<KernelLauncher>(kernel_launcher));
90- return ;
91- }
92-
9359 // If no measurements selected, pick some defaults based on the modifiers:
9460 if constexpr (!measure_tags)
9561 {
@@ -123,6 +89,24 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
12389 return ;
12490 }
12591
92+ if ((modifier_tags & no_gpu) && !this ->get_is_cpu_only ())
93+ {
94+ throw std::runtime_error (" The `nvbench::exec_tag::no_gpu` tag requires that "
95+ " `set_is_cpu_only(true)` is called when defining the benchmark." );
96+ }
97+
98+ if ((modifier_tags & gpu) && this ->get_is_cpu_only ())
99+ {
100+ throw std::runtime_error (" The `nvbench::exec_tag::gpu` tag requires that "
101+ " `set_is_cpu_only(true)` is NOT called when defining the benchmark." );
102+ }
103+
104+ // Syncing will cause the blocking kernel pattern to deadlock:
105+ if constexpr (modifier_tags & sync)
106+ {
107+ this ->set_disable_blocking_kernel (true );
108+ }
109+
126110 if (this ->is_skipped ())
127111 {
128112 return ;
@@ -157,23 +141,18 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
157141 {
158142 static_assert (!(tags & no_gpu), " Cold measurement doesn't support the `no_gpu` exec_tag." );
159143
160- constexpr bool use_blocking_kernel = !(tags & no_block);
161144 if constexpr (tags & timer)
162145 {
163- // Estimate bandwidth here
164146#ifdef NVBENCH_HAS_CUPTI
165- if constexpr (!(modifier_tags & run_once ))
147+ if ( this -> is_cupti_required () && ! this -> get_run_once ( ))
166148 {
167- if (this ->is_cupti_required ())
168- {
169- using measure_t = nvbench::detail::measure_cupti<KL>;
170- measure_t measure{*this , kernel_launcher};
171- measure ();
172- }
149+ using measure_t = nvbench::detail::measure_cupti<KL>;
150+ measure_t measure{*this , kernel_launcher};
151+ measure ();
173152 }
174153#endif
175154
176- using measure_t = nvbench::detail::measure_cold<KL, use_blocking_kernel >;
155+ using measure_t = nvbench::detail::measure_cold<KL>;
177156 measure_t measure{*this , kernel_launcher};
178157 measure ();
179158 }
@@ -182,20 +161,16 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
182161 using wrapper_t = nvbench::detail::kernel_launch_timer_wrapper<KL>;
183162 wrapper_t wrapper{kernel_launcher};
184163
185- // Estimate bandwidth here
186164#ifdef NVBENCH_HAS_CUPTI
187- if constexpr (!(modifier_tags & run_once ))
165+ if ( this -> is_cupti_required () && ! this -> get_run_once ( ))
188166 {
189- if (this ->is_cupti_required ())
190- {
191- using measure_t = nvbench::detail::measure_cupti<wrapper_t >;
192- measure_t measure{*this , wrapper};
193- measure ();
194- }
167+ using measure_t = nvbench::detail::measure_cupti<wrapper_t >;
168+ measure_t measure{*this , wrapper};
169+ measure ();
195170 }
196171#endif
197172
198- using measure_t = nvbench::detail::measure_cold<wrapper_t , use_blocking_kernel >;
173+ using measure_t = nvbench::detail::measure_cold<wrapper_t >;
199174 measure_t measure (*this , wrapper);
200175 measure ();
201176 }
@@ -207,10 +182,13 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
207182 static_assert (!(tags & timer), " Hot measurement doesn't support the `timer` exec_tag." );
208183 static_assert (!(tags & no_batch), " Hot measurement doesn't support the `no_batch` exec_tag." );
209184 static_assert (!(tags & no_gpu), " Hot measurement doesn't support the `no_gpu` exec_tag." );
210- constexpr bool use_blocking_kernel = !(tags & no_block);
211- using measure_t = nvbench::detail::measure_hot<KL, use_blocking_kernel>;
212- measure_t measure{*this , kernel_launcher};
213- measure ();
185+
186+ if (!this ->get_run_once ())
187+ {
188+ using measure_t = nvbench::detail::measure_hot<KL>;
189+ measure_t measure{*this , kernel_launcher};
190+ measure ();
191+ }
214192 }
215193 }
216194}
0 commit comments