Skip to content

Commit f8fa5cd

Browse files
authored
Merge branch 'main' into pre-commit-ci-update-config
2 parents 0bfa2b1 + e7cc1e3 commit f8fa5cd

File tree

1 file changed

+41
-2
lines changed

1 file changed

+41
-2
lines changed

examples/axes.cu

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void copy_sweep_grid_shape(nvbench::state &state)
6868
state.add_global_memory_writes<nvbench::int32_t>(num_values);
6969

7070
// Allocate device memory:
71-
thrust::device_vector<nvbench::int32_t> in(num_values, 0);
71+
thrust::device_vector<nvbench::int32_t> in(num_values, 1);
7272
thrust::device_vector<nvbench::int32_t> out(num_values, 0);
7373

7474
state.exec([block_size,
@@ -102,7 +102,7 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
102102
state.add_global_memory_writes<ValueType>(num_values);
103103

104104
// Allocate device memory:
105-
thrust::device_vector<ValueType> in(num_values, 0);
105+
thrust::device_vector<ValueType> in(num_values, ValueType{17});
106106
thrust::device_vector<ValueType> out(num_values, 0);
107107

108108
state.exec([num_values,
@@ -171,3 +171,42 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
171171
nvbench::float64_t>;
172172
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
173173
.set_type_axes_names({"In", "Out"});
174+
175+
// ==================================================================================
176+
// Passing list of typenames and `enum_type_list` to build cartesian product
177+
// of typenames and integral constants
178+
179+
// define constant wrapper helper type
180+
template <auto V, typename T = decltype(V)>
181+
using cw_t = std::integral_constant<T, V>;
182+
183+
template <typename ValueT, unsigned BLOCK_DIM>
184+
void copy_type_and_block_size_sweep(nvbench::state &state,
185+
nvbench::type_list<ValueT, cw_t<BLOCK_DIM>>)
186+
{
187+
const std::size_t nelems = 256 * 1024 * 1024 / sizeof(ValueT);
188+
ValueT fill_value{42};
189+
thrust::device_vector<ValueT> inp(nelems, fill_value);
190+
thrust::device_vector<ValueT> out(nelems, ValueT{});
191+
192+
// use cuda::ceil_div(nelems, BLOCK_DIM) with CCCL 2.8 and newer
193+
const auto gridSize = (nelems + BLOCK_DIM - 1) / BLOCK_DIM;
194+
195+
const ValueT *inp_p = thrust::raw_pointer_cast(inp.data());
196+
ValueT *out_p = thrust::raw_pointer_cast(out.data());
197+
198+
state.add_element_count(nelems, "ElementCount");
199+
state.add_global_memory_reads<ValueT>(nelems, "Input");
200+
state.add_global_memory_writes<ValueT>(nelems, "Output");
201+
202+
state.exec([&](nvbench::launch &launch) {
203+
nvbench::copy_kernel<<<gridSize, BLOCK_DIM, 0, launch.get_stream()>>>(inp_p, out_p, nelems);
204+
});
205+
}
206+
207+
template <auto... V>
208+
using cw_list = nvbench::type_list<cw_t<V>...>;
209+
210+
using block_sizes = cw_list<64u, 128u, 196u, 256u, 320u, 512u>;
211+
NVBENCH_BENCH_TYPES(copy_type_and_block_size_sweep, NVBENCH_TYPE_AXES(ctcs_types, block_sizes))
212+
.set_type_axes_names({"Type", "BlockSize"});

0 commit comments

Comments
 (0)