@@ -171,3 +171,35 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
171171 nvbench::float64_t >;
172172NVBENCH_BENCH_TYPES (copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
173173 .set_type_axes_names({" In" , " Out" });
174+
175+ // ==================================================================================
176+ // Passing `type_list` of typenames, and `enum_type_list` to build cartesian product
177+ // of typenames and integral constants
178+
179+ template <typename ValueT, uint BLOCK_DIM>
180+ void copy_type_and_block_size_sweep (nvbench::state &state,
181+ nvbench::type_list<ValueT, nvbench::enum_type<BLOCK_DIM>>)
182+ {
183+ const std::size_t nelems = 256 * 1024 * 1024 / sizeof (ValueT);
184+ thrust::device_vector<ValueT> inp (nelems);
185+ thrust::device_vector<ValueT> out (nelems);
186+
187+ const uint gridSize = (nelems + BLOCK_DIM - 1 ) / BLOCK_DIM;
188+
189+ const ValueT *inp_p = thrust::raw_pointer_cast (inp.data ());
190+ ValueT *out_p = thrust::raw_pointer_cast (out.data ());
191+
192+ state.add_element_count (nelems, " ElementCount" );
193+ state.add_global_memory_reads <ValueT>(nelems, " Input" );
194+ state.add_global_memory_writes <ValueT>(nelems, " Output" );
195+
196+ state.exec ([&](nvbench::launch &launch) {
197+ nvbench::copy_kernel<<<gridSize, BLOCK_DIM, 0 , launch.get_stream()>>> (inp_p, out_p, nelems);
198+ });
199+ }
200+
201+ using types_list =
202+ nvbench::type_list<nvbench::int8_t , nvbench::int16_t , nvbench::int32_t , nvbench::int64_t >;
203+ using block_sizes = nvbench::enum_type_list<64u , 128u , 196u , 256u , 320u , 512u >;
204+ NVBENCH_BENCH_TYPES (copy_type_and_block_size_sweep, NVBENCH_TYPE_AXES(types_list, block_sizes))
205+ .set_type_axes_names({" Type" , " BlockSize" });
0 commit comments