Skip to content

Commit 0c56311

Browse files
committed
Fetch clock rates using cudaDeviceGetAttribute.
1 parent 33fc77a commit 0c56311

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

nvbench/device_info.cu

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ device_info::device_info(int id)
4444
, m_nvml_device(nullptr)
4545
{
4646
NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
47+
48+
int val{};
49+
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
50+
// kHz -> Hz
51+
m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
52+
53+
NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
54+
// kHz -> Hz
55+
m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
56+
4757
// NVML's lifetime should extend for the entirety of the process, so store in a
4858
// global.
4959
[[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();

nvbench/device_info.cuh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,7 @@ struct device_info
106106
}
107107

108108
/// @return The default clock rate of the SM in Hz.
109-
[[nodiscard]] std::size_t get_sm_default_clock_rate() const
110-
{ // kHz -> Hz
111-
return static_cast<std::size_t>(m_prop.clockRate) * 1000;
112-
}
109+
[[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }
113110

114111
/// @return The number of physical streaming multiprocessors on this device.
115112
[[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
@@ -143,8 +140,8 @@ struct device_info
143140

144141
/// @return The peak clock rate of the global memory bus in Hz.
145142
[[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
146-
{ // kHz -> Hz
147-
return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
143+
{
144+
return m_global_memory_bus_peak_clock_rate;
148145
}
149146

150147
/// @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
200197
int m_id;
201198
cudaDeviceProp m_prop;
202199
nvmlDevice_st *m_nvml_device;
200+
201+
std::size_t m_sm_default_clock_rate;
202+
std::size_t m_global_memory_bus_peak_clock_rate;
203203
};
204204

205205
// get_ptx_version implementation; this needs to stay in the header so it will

0 commit comments

Comments
 (0)