Merge pull request #211 from alliepiper/clock_api

alliepiper · web-flow · commit eadb9133222a · 2025-04-14T17:12:42.000-04:00
Fetch clock rates using cudaDeviceGetAttribute.
diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu
@@ -44,6 +44,16 @@ device_info::device_info(int id)
     , m_nvml_device(nullptr)
 {
   NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id));
+
+  int val{};
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrClockRate, m_id));
+  // kHz -> Hz
+  m_sm_default_clock_rate = static_cast<std::size_t>(val) * 1000;
+
+  NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&val, cudaDevAttrMemoryClockRate, m_id));
+  // kHz -> Hz
+  m_global_memory_bus_peak_clock_rate = static_cast<std::size_t>(val) * 1000;
+
   // NVML's lifetime should extend for the entirety of the process, so store in a
   // global.
   [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager();
diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh
@@ -106,10 +106,7 @@ struct device_info
   }
 
   /// @return The default clock rate of the SM in Hz.
-  [[nodiscard]] std::size_t get_sm_default_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.clockRate) * 1000;
-  }
+  [[nodiscard]] std::size_t get_sm_default_clock_rate() const { return m_sm_default_clock_rate; }
 
   /// @return The number of physical streaming multiprocessors on this device.
   [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; }
@@ -143,8 +140,8 @@ struct device_info
 
   /// @return The peak clock rate of the global memory bus in Hz.
   [[nodiscard]] std::size_t get_global_memory_bus_peak_clock_rate() const
-  { // kHz -> Hz
-    return static_cast<std::size_t>(m_prop.memoryClockRate) * 1000;
+  {
+    return m_global_memory_bus_peak_clock_rate;
   }
 
   /// @return The width of the global memory bus in bits.
@@ -200,6 +197,9 @@ private:
   int m_id;
   cudaDeviceProp m_prop;
   nvmlDevice_st *m_nvml_device;
+
+  std::size_t m_sm_default_clock_rate;
+  std::size_t m_global_memory_bus_peak_clock_rate;
 };
 
 // get_ptx_version implementation; this needs to stay in the header so it will