|
| 1 | +/* |
| 2 | + * Copyright 2021-2025 NVIDIA Corporation |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 with the LLVM exception |
| 5 | + * (the "License"); you may not use this file except in compliance with |
| 6 | + * the License. |
| 7 | + * |
| 8 | + * You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://llvm.org/foundation/relicensing/LICENSE.txt |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | + |
| 19 | +#pragma once |
| 20 | + |
| 21 | +#include <nvbench/cpu_timer.cuh> |
| 22 | +#include <nvbench/exec_tag.cuh> |
| 23 | +#include <nvbench/launch.cuh> |
| 24 | +#include <nvbench/stopping_criterion.cuh> |
| 25 | + |
| 26 | +#include <nvbench/detail/kernel_launcher_timer_wrapper.cuh> |
| 27 | +#include <nvbench/detail/statistics.cuh> |
| 28 | + |
| 29 | +#include <utility> |
| 30 | +#include <vector> |
| 31 | + |
| 32 | +namespace nvbench |
| 33 | +{ |
| 34 | + |
| 35 | +struct state; |
| 36 | + |
| 37 | +namespace detail |
| 38 | +{ |
| 39 | + |
| 40 | +// non-templated code goes here: |
| 41 | +struct measure_cpu_only_base |
| 42 | +{ |
| 43 | + explicit measure_cpu_only_base(nvbench::state &exec_state); |
| 44 | + measure_cpu_only_base(const measure_cpu_only_base &) = delete; |
| 45 | + measure_cpu_only_base(measure_cpu_only_base &&) = delete; |
| 46 | + measure_cpu_only_base &operator=(const measure_cpu_only_base &) = delete; |
| 47 | + measure_cpu_only_base &operator=(measure_cpu_only_base &&) = delete; |
| 48 | + |
| 49 | +protected: |
| 50 | + void check(); |
| 51 | + void initialize(); |
| 52 | + void run_trials_prologue(); |
| 53 | + void record_measurements(); |
| 54 | + bool is_finished(); |
| 55 | + void run_trials_epilogue(); |
| 56 | + void generate_summaries(); |
| 57 | + |
| 58 | + void check_skip_time(nvbench::float64_t warmup_time); |
| 59 | + |
| 60 | + nvbench::state &m_state; |
| 61 | + |
| 62 | + // Required to satisfy the KernelLauncher interface: |
| 63 | + nvbench::launch m_launch; |
| 64 | + |
| 65 | + nvbench::cpu_timer m_cpu_timer; |
| 66 | + nvbench::cpu_timer m_walltime_timer; |
| 67 | + |
| 68 | + nvbench::criterion_params m_criterion_params; |
| 69 | + nvbench::stopping_criterion_base& m_stopping_criterion; |
| 70 | + |
| 71 | + bool m_run_once{false}; |
| 72 | + |
| 73 | + nvbench::int64_t m_min_samples{}; |
| 74 | + |
| 75 | + nvbench::float64_t m_skip_time{}; |
| 76 | + nvbench::float64_t m_timeout{}; |
| 77 | + |
| 78 | + nvbench::int64_t m_total_samples{}; |
| 79 | + nvbench::float64_t m_total_cpu_time{}; |
| 80 | + nvbench::float64_t m_cpu_noise{}; // rel stdev |
| 81 | + |
| 82 | + std::vector<nvbench::float64_t> m_cpu_times; |
| 83 | + |
| 84 | + bool m_max_time_exceeded{}; |
| 85 | +}; |
| 86 | + |
| 87 | +template <typename KernelLauncher> |
| 88 | +struct measure_cpu_only : public measure_cpu_only_base |
| 89 | +{ |
| 90 | + measure_cpu_only(nvbench::state &state, KernelLauncher &kernel_launcher) |
| 91 | + : measure_cpu_only_base(state) |
| 92 | + , m_kernel_launcher{kernel_launcher} |
| 93 | + {} |
| 94 | + |
| 95 | + void operator()() |
| 96 | + { |
| 97 | + this->check(); |
| 98 | + this->initialize(); |
| 99 | + this->run_warmup(); |
| 100 | + |
| 101 | + this->run_trials_prologue(); |
| 102 | + this->run_trials(); |
| 103 | + this->run_trials_epilogue(); |
| 104 | + |
| 105 | + this->generate_summaries(); |
| 106 | + } |
| 107 | + |
| 108 | +private: |
| 109 | + // Run the kernel once, measuring the CPU time. If under skip_time, skip the |
| 110 | + // measurement. |
| 111 | + void run_warmup() |
| 112 | + { |
| 113 | + if (m_run_once) |
| 114 | + { // Skip warmups |
| 115 | + return; |
| 116 | + } |
| 117 | + |
| 118 | + this->launch_kernel(m_cpu_timer); |
| 119 | + this->check_skip_time(m_cpu_timer.get_duration()); |
| 120 | + } |
| 121 | + |
| 122 | + void run_trials() |
| 123 | + { |
| 124 | + do |
| 125 | + { |
| 126 | + this->launch_kernel(m_cpu_timer); |
| 127 | + this->record_measurements(); |
| 128 | + } while (!this->is_finished()); |
| 129 | + } |
| 130 | + |
| 131 | + template <typename TimerT> |
| 132 | + __forceinline__ void launch_kernel(TimerT &timer) |
| 133 | + { |
| 134 | + m_kernel_launcher(m_launch, timer); |
| 135 | + } |
| 136 | + |
| 137 | + KernelLauncher &m_kernel_launcher; |
| 138 | +}; |
| 139 | + |
| 140 | +} // namespace detail |
| 141 | +} // namespace nvbench |
0 commit comments