File tree Expand file tree Collapse file tree 2 files changed +16
-6
lines changed Expand file tree Collapse file tree 2 files changed +16
-6
lines changed Original file line number Diff line number Diff line change @@ -44,6 +44,16 @@ device_info::device_info(int id)
4444 , m_nvml_device(nullptr )
4545{
4646 NVBENCH_CUDA_CALL (cudaGetDeviceProperties (&m_prop, m_id));
47+
48+ int val{};
49+ NVBENCH_CUDA_CALL (cudaDeviceGetAttribute (&val, cudaDevAttrClockRate, m_id));
50+ // kHz -> Hz
51+ m_sm_default_clock_rate = static_cast <std::size_t >(val) * 1000 ;
52+
53+ NVBENCH_CUDA_CALL (cudaDeviceGetAttribute (&val, cudaDevAttrMemoryClockRate, m_id));
54+ // kHz -> Hz
55+ m_global_memory_bus_peak_clock_rate = static_cast <std::size_t >(val) * 1000 ;
56+
4757 // NVML's lifetime should extend for the entirety of the process, so store in a
4858 // global.
4959 [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager ();
Original file line number Diff line number Diff line change @@ -106,10 +106,7 @@ struct device_info
106106 }
107107
108108 // / @return The default clock rate of the SM in Hz.
109- [[nodiscard]] std::size_t get_sm_default_clock_rate () const
110- { // kHz -> Hz
111- return static_cast <std::size_t >(m_prop.clockRate ) * 1000 ;
112- }
109+ [[nodiscard]] std::size_t get_sm_default_clock_rate () const { return m_sm_default_clock_rate; }
113110
114111 // / @return The number of physical streaming multiprocessors on this device.
115112 [[nodiscard]] int get_number_of_sms () const { return m_prop.multiProcessorCount ; }
@@ -143,8 +140,8 @@ struct device_info
143140
144141 // / @return The peak clock rate of the global memory bus in Hz.
145142 [[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate () const
146- { // kHz -> Hz
147- return static_cast <std:: size_t >(m_prop. memoryClockRate ) * 1000 ;
143+ {
144+ return m_global_memory_bus_peak_clock_rate ;
148145 }
149146
150147 // / @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
200197 int m_id;
201198 cudaDeviceProp m_prop;
202199 nvmlDevice_st *m_nvml_device;
200+
201+ std::size_t m_sm_default_clock_rate;
202+ std::size_t m_global_memory_bus_peak_clock_rate;
203203};
204204
205205// get_ptx_version implementation; this needs to stay in the header so it will
You can’t perform that action at this time.
0 commit comments