-
Notifications
You must be signed in to change notification settings - Fork 359
Expand file tree
/
Copy pathbasic.cu
More file actions
44 lines (35 loc) · 1.64 KB
/
basic.cu
File metadata and controls
44 lines (35 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//
#include <thrust/device_vector.h>
#include <cuda/memory_pool>
#include <cuda/std/complex>
#include <cuda/std/execution>
#include <cuda/stream>
#include "nvbench_helper.cuh"
template <typename T>
static void basic(nvbench::state& state, nvbench::type_list<T>)
{
const auto elements = static_cast<std::size_t>(state.get_int64("Elements"));
thrust::device_vector<T> in = generate(elements, bit_entropy::_1_000, T{0}, T{42});
thrust::device_vector<T> out(elements, thrust::no_init);
const auto count = cuda::std::count(cuda::execution::gpu, in.begin(), in.end(), T{42});
state.add_element_count(elements);
state.add_global_memory_reads<T>(elements);
state.add_global_memory_writes<T>(elements - count);
caching_allocator_t alloc{};
state.exec(
nvbench::exec_tag::gpu | nvbench::exec_tag::no_batch | nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
do_not_optimize(cuda::std::remove_copy(cuda_policy(alloc, launch), in.begin(), in.end(), out.begin(), T{42}));
});
}
NVBENCH_BENCH_TYPES(basic, NVBENCH_TYPE_AXES(fundamental_types))
.set_name("base")
.set_type_axes_names({"T{ct}"})
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4));