Skip to content

Commit e502eb7

Browse files
committed
Add NVML Provider and GPU Energy Monitoring Tools
This commit adds comprehensive NVML-based GPU energy monitoring: NVML Provider: - NVMLProvider class for GPU power monitoring (provider_nvml.cpp/hpp) - Device discovery and power usage APIs - Comprehensive error handling and status checking - Support for multiple GPUs with individual device queries Kokkos Tools: - kp_nvml_power: Background power monitoring with daemon - kp_nvml_direct_power: Direct power measurement tool - kp_nvml_energy_consumption: Energy consumption tracking Testing: - Unit tests for NVML provider functionality - Integration tests with daemon system - Fast sampling tests (20ms intervals) Features: - Configurable sampling intervals - CSV export of power data with timestamps - Thread-safe data collection - Comprehensive power statistics (min/max/avg/total energy) Depends on: feature/energy-profiler-daemon
1 parent 10c87de commit e502eb7

File tree

11 files changed

+2208
-6
lines changed

11 files changed

+2208
-6
lines changed

profiling/energy-profiler/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
# Energy Profiler Infrastructure with Daemon Support
2-
# This provides the foundational infrastructure for energy profiling tools:
1+
# Energy Profiler Infrastructure with NVML Provider
2+
# Complete infrastructure for NVML-based GPU energy profiling:
33
# - Timer system for kernel and region tracking
44
# - Common utilities (filename generation, error handling)
55
# - Tool interface definitions
66
# - Basic kernel timer tool
77
# - Daemon system for background monitoring tasks
8+
# - NVML provider for NVIDIA GPU power monitoring
9+
# - Multiple NVML-based profiling tools
810

911
option(KOKKOS_TOOLS_ENABLE_ENERGY_PROFILER_VERBOSE "Enable verbose output for energy profiler tools" OFF)
1012

profiling/energy-profiler/kokkos-tools/CMakeLists.txt

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
# Energy Profiler Infrastructure - Kokkos Tools
2-
# Basic kernel timer tool using the timing infrastructure
1+
# Energy Profiler Infrastructure - Kokkos Tools with NVML Support
2+
# Basic kernel timer tool and NVML-based energy monitoring tools
3+
4+
# Find Threads package for pthread support (needed by std::thread in daemon.cpp)
5+
find_package(Threads REQUIRED)
36

47
# Basic kernel timer tool (no energy monitoring dependencies)
58
kp_add_library(kp_energy_kernel_timer kp_energy_kernel_timer.cpp
@@ -12,6 +15,59 @@ target_include_directories(kp_energy_kernel_timer PRIVATE
1215
)
1316

1417
# Add verbose output option
15-
if(KOKKOS_TOOLS_ENABLE_ENERGY_PROFILER_VERBOSE)
16-
target_compile_definitions(kp_energy_kernel_timer PRIVATE KOKKOS_TOOLS_ENABLE_VERBOSE_OUTPUT)
18+
if(ENABLE_ENERGY_PROFILER_VERBOSE)
19+
target_compile_definitions(kp_energy_kernel_timer PRIVATE ENABLE_VERBOSE_OUTPUT)
20+
endif()
21+
22+
# NVML-based tools
23+
find_package(CUDAToolkit QUIET)
24+
25+
if (CUDAToolkit_FOUND)
26+
find_package(CUDA::nvml QUIET)
27+
if(TARGET CUDA::nvml)
28+
message(STATUS "Found CUDA NVML, making NVML power profiler available.")
29+
30+
# NVML Power Tool (with daemon)
31+
kp_add_library(kp_nvml_power kp_nvml_power.cpp
32+
../common/daemon.cpp
33+
../common/filename_prefix.cpp
34+
../common/timer_system.cpp
35+
../provider/provider_nvml.cpp
36+
)
37+
target_link_libraries(kp_nvml_power PRIVATE CUDA::nvml Threads::Threads)
38+
39+
target_include_directories(kp_nvml_power PRIVATE
40+
${CMAKE_CURRENT_SOURCE_DIR}
41+
)
42+
43+
# NVML Direct Power Tool (with daemon)
44+
kp_add_library(kp_nvml_direct_power kp_nvml_direct_power.cpp
45+
../common/daemon.cpp
46+
../common/filename_prefix.cpp
47+
../common/timer_system.cpp
48+
../provider/provider_nvml.cpp
49+
)
50+
target_link_libraries(kp_nvml_direct_power PRIVATE CUDA::nvml Threads::Threads)
51+
52+
target_include_directories(kp_nvml_direct_power PRIVATE
53+
${CMAKE_CURRENT_SOURCE_DIR}
54+
)
55+
56+
# NVML Energy Consumption Tool (no daemon needed)
57+
kp_add_library(kp_nvml_energy_consumption kp_nvml_energy_consumption.cpp
58+
../common/filename_prefix.cpp
59+
../common/timer_system.cpp
60+
../provider/provider_nvml.cpp
61+
)
62+
target_link_libraries(kp_nvml_energy_consumption PRIVATE CUDA::nvml)
63+
64+
target_include_directories(kp_nvml_energy_consumption PRIVATE
65+
${CMAKE_CURRENT_SOURCE_DIR}
66+
)
67+
else()
68+
message(STATUS "CUDA::nvml target not found, skipping NVML power profiler.")
69+
endif()
70+
else()
71+
message(STATUS "CUDAToolkit not found, skipping NVML power profiler.")
72+
endif()
1773
endif()

0 commit comments

Comments
 (0)