Skip to content

Commit 5c327b4

Browse files
Add an benchmark example parametrized by typename and integral constant.
Add a variation of copy_type_sweep kernel, where block size is controlled via integral constant passed as template parameter.
1 parent 935bb0b commit 5c327b4

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed

examples/axes.cu

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,35 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
171171
nvbench::float64_t>;
172172
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
173173
.set_type_axes_names({"In", "Out"});
174+
175+
// ==================================================================================
176+
// Passing `type_list` of typenames, and `enum_type_list` to build cartesian product
177+
// of typenames and integral constants
178+
179+
template <typename ValueT, uint BLOCK_DIM>
180+
void copy_type_and_block_size_sweep(nvbench::state &state,
181+
nvbench::type_list<ValueT, nvbench::enum_type<BLOCK_DIM>>)
182+
{
183+
const std::size_t nelems = 256 * 1024 * 1024 / sizeof(ValueT);
184+
thrust::device_vector<ValueT> inp(nelems);
185+
thrust::device_vector<ValueT> out(nelems);
186+
187+
const uint gridSize = (nelems + BLOCK_DIM - 1) / BLOCK_DIM;
188+
189+
const ValueT *inp_p = thrust::raw_pointer_cast(inp.data());
190+
ValueT *out_p = thrust::raw_pointer_cast(out.data());
191+
192+
state.add_element_count(nelems, "ElementCount");
193+
state.add_global_memory_reads<ValueT>(nelems, "Input");
194+
state.add_global_memory_writes<ValueT>(nelems, "Output");
195+
196+
state.exec([&](nvbench::launch &launch) {
197+
nvbench::copy_kernel<<<gridSize, BLOCK_DIM, 0, launch.get_stream()>>>(inp_p, out_p, nelems);
198+
});
199+
}
200+
201+
using types_list =
202+
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
203+
using block_sizes = nvbench::enum_type_list<64u, 128u, 196u, 256u, 320u, 512u>;
204+
NVBENCH_BENCH_TYPES(copy_type_and_block_size_sweep, NVBENCH_TYPE_AXES(types_list, block_sizes))
205+
.set_type_axes_names({"Type", "BlockSize"});

0 commit comments

Comments
 (0)