From 97c884b7e7d1a5b09319e9df5f3d4146261a62f7 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Sat, 12 Jul 2025 00:34:04 +0000 Subject: [PATCH 01/11] energy-profiler: add basic structure and documentation --- profiling/energy-profiler/CMakeLists.txt | 4 ++++ profiling/energy-profiler/readme.md | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 profiling/energy-profiler/CMakeLists.txt create mode 100644 profiling/energy-profiler/readme.md diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt new file mode 100644 index 000000000..a2ace2132 --- /dev/null +++ b/profiling/energy-profiler/CMakeLists.txt @@ -0,0 +1,4 @@ +cmake_minimum_required(VERSION 3.16) + +add_subdirectory(variorum) +add_subdirectory(nvml) diff --git a/profiling/energy-profiler/readme.md b/profiling/energy-profiler/readme.md new file mode 100644 index 000000000..124be31d6 --- /dev/null +++ b/profiling/energy-profiler/readme.md @@ -0,0 +1,17 @@ +# Energy Profiling Tools for Kokkos + +Collection of tools for energy profiling in Kokkos applications. + +## Available Tools + +- **Variorum**: Multi-vendor support (NVIDIA, AMD, Intel) +- **NVML**: NVIDIA GPU specific + +Each tool directory contains: +- Source code +- Build/usage instructions +- Documentation on output format + +## Daemon Mechanism + +A "daemon" mechanism is used to collect power data during Kokkos application execution. This allows for continuous power monitoring with minimal overhead or more generally for data sampling at a specified interval. \ No newline at end of file From 5931cab1fbd7bfdf966907ee500bd6227fa812a4 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Sat, 12 Jul 2025 00:34:20 +0000 Subject: [PATCH 02/11] energy-profiler: add NVML support for GPU energy monitoring --- profiling/energy-profiler/nvml/CMakeLists.txt | 17 + .../nvml/kp_nvml_power_profiler.cpp | 127 +++++++ .../nvml/kp_nvml_power_profiler.hpp | 70 ++++ .../energy-profiler/nvml/kp_power_nvml.cpp | 355 ++++++++++++++++++ profiling/energy-profiler/nvml/readme.md | 23 ++ 5 files changed, 592 insertions(+) create mode 100644 profiling/energy-profiler/nvml/CMakeLists.txt create mode 100644 profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp create mode 100644 profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp create mode 100644 profiling/energy-profiler/nvml/kp_power_nvml.cpp create mode 100644 profiling/energy-profiler/nvml/readme.md diff --git a/profiling/energy-profiler/nvml/CMakeLists.txt b/profiling/energy-profiler/nvml/CMakeLists.txt new file mode 100644 index 000000000..72a92f9b2 --- /dev/null +++ b/profiling/energy-profiler/nvml/CMakeLists.txt @@ -0,0 +1,17 @@ +find_package(CUDAToolkit QUIET) + +if (CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + message(STATUS "Found CUDA NVML, making NVML power profiler available.") +else() + message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") + return() +endif() + +kp_add_library(kp_power_nvml kp_power_nvml.cpp kp_nvml_power_profiler.cpp) + +target_link_libraries(kp_power_nvml PRIVATE CUDA::nvml pthread) + +target_include_directories(kp_power_nvml PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp new file mode 100644 index 000000000..6a9a15de6 --- /dev/null +++ b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp @@ -0,0 +1,127 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "kp_nvml_power_profiler.hpp" +#include +#include +#include + +namespace KokkosTools { +namespace NVMLPowerProfiler { + +void DataManager::add_power_data_point(int64_t timestamp, double power) { + power_data_points.push_back({timestamp, power}); +} + +void DataManager::start_region(const std::string& name, RegionType type) { + TimingInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + active_regions.push_back(region); +} + +void DataManager::end_region() { + if (!active_regions.empty()) { + auto& region = active_regions.back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + + if (region.type == RegionType::UserRegion) { + completed_regions.push_back(region); + } else { + completed_kernels.push_back(region); + } + active_regions.pop_back(); + } +} + +const char* DataManager::region_type_to_string(RegionType type) const { + switch (type) { + case RegionType::ParallelFor: return "parallel_for"; + case RegionType::ParallelReduce: return "parallel_reduce"; + case RegionType::ParallelScan: return "parallel_scan"; + case RegionType::UserRegion: return "user_region"; + default: return "unknown"; + } +} + +void DataManager::write_power_data(const std::string& filename) const { + FILE* csv_file = fopen(filename.c_str(), "w"); + if (csv_file) { + fprintf(csv_file, "timestamp_epoch_ns,power_watts\n"); + for (const auto& point : power_data_points) { + fprintf(csv_file, "%" PRId64 ",%.6f\n", point.timestamp_ns, point.power_watts); + } + fclose(csv_file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Power CSV data written to %s/%s (%" PRIu64 " data points)\n", + cwd, filename.c_str(), static_cast(power_data_points.size())); + } +} + +void DataManager::write_kernel_data(const std::string& filename) const { + if (completed_kernels.empty()) return; + + FILE* regions_file = fopen(filename.c_str(), "w"); + if (regions_file) { + fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); + for (const auto& region : completed_kernels) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", + region.name.c_str(), region_type_to_string(region.type), start_ns, + end_ns, (int64_t)region.duration.count()); + } + fclose(regions_file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Kernel timing CSV written to %s/%s\n", cwd, filename.c_str()); + } +} + +void DataManager::write_region_data(const std::string& filename) const { + if (completed_regions.empty()) return; + + FILE* regions_file = fopen(filename.c_str(), "w"); + if (regions_file) { + fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); + for (const auto& region : completed_regions) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", + region.name.c_str(), region_type_to_string(region.type), start_ns, + end_ns, (int64_t)region.duration.count()); + } + fclose(regions_file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Region timing CSV written to %s/%s\n", cwd, filename.c_str()); + } +} + +} // namespace NVMLPowerProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp new file mode 100644 index 000000000..2f03423c7 --- /dev/null +++ b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp @@ -0,0 +1,70 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KP_POWER_PROFILER_HPP +#define KP_POWER_PROFILER_HPP + +#include +#include +#include +#include + +namespace KokkosTools { +namespace NVMLPowerProfiler { + +enum class RegionType { + Unknown, + ParallelFor, + ParallelReduce, + ParallelScan, + UserRegion +}; + +struct PowerDataPoint { + int64_t timestamp_ns; + double power_watts; +}; + +struct TimingInfo { + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + std::chrono::nanoseconds duration; +}; + +class DataManager { +public: + void add_power_data_point(int64_t timestamp, double power); + void start_region(const std::string& name, RegionType type); + void end_region(); + void write_power_data(const std::string& filename) const; + void write_kernel_data(const std::string& filename) const; + void write_region_data(const std::string& filename) const; + +private: + const char* region_type_to_string(RegionType type) const; + + std::deque power_data_points; + std::deque completed_kernels; + std::deque completed_regions; + std::deque active_regions; +}; + +} // namespace NVMLPowerProfiler +} // namespace KokkosTools + +#endif // KP_POWER_PROFILER_HPP diff --git a/profiling/energy-profiler/nvml/kp_power_nvml.cpp b/profiling/energy-profiler/nvml/kp_power_nvml.cpp new file mode 100644 index 000000000..900839272 --- /dev/null +++ b/profiling/energy-profiler/nvml/kp_power_nvml.cpp @@ -0,0 +1,355 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * Kokkos NVML Power Profiler + * Simple Kokkos profiling tool that monitors GPU power consumption using NVML + * Polls nvmlDeviceGetPowerUsage() every 20ms in a background thread + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "kp_core.hpp" +#include "kp_nvml_power_profiler.hpp" + +namespace KokkosTools { +namespace NVMLPowerProfiler { + +// State variables +std::atomic g_stop_requested(false); +std::deque g_nvml_devices; +std::unique_ptr g_monitoring_thread; +std::condition_variable g_sleep_cv; +std::mutex g_sleep_mutex; +DataManager g_data_manager; +std::chrono::high_resolution_clock::time_point g_start_time; + +// Get current time in nanoseconds since epoch +int64_t get_current_epoch_ns() { + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); +} + +void nvml_power_monitoring_thread_func(std::chrono::milliseconds interval) { + auto start_time = std::chrono::high_resolution_clock::now(); + int64_t interval_count = 0; + int64_t delayed_intervals = 0; + + while (!g_stop_requested.load()) { + auto next_check_time = start_time + ((interval_count + 1) * interval); + + { + std::unique_lock sleep_lock(g_sleep_mutex); + if (g_sleep_cv.wait_until(sleep_lock, next_check_time, + [] { return g_stop_requested.load(); })) { + break; + } + } + + auto current_time = std::chrono::high_resolution_clock::now(); + interval_count++; + + auto expected_time = start_time + (interval_count * interval); + auto delay = current_time - expected_time; + + if (delay > interval / 2) { + delayed_intervals++; + } + + double current_power_sum_W = 0.0; + + for (size_t i = 0; i < g_nvml_devices.size(); ++i) { + if (g_nvml_devices[i] == nullptr) continue; + + unsigned int power_mW; + nvmlReturn_t result = + nvmlDeviceGetPowerUsage(g_nvml_devices[i], &power_mW); + + if (NVML_SUCCESS == result) { + double current_power_W = static_cast(power_mW) / 1000.0; + current_power_sum_W += current_power_W; + } + } + + int64_t timestamp_ns = get_current_epoch_ns(); + g_data_manager.add_power_data_point(timestamp_ns, current_power_sum_W); + + if (interval_count % 100 == 0 && delayed_intervals > 0) { + printf("KokkosP NVML Power: Timing info - %" PRId64 " intervals, %" PRId64 + " delayed (%.1f%%)\n", + interval_count, delayed_intervals, + (100.0 * delayed_intervals) / interval_count); + } + } + + if (interval_count > 0) { + auto total_duration = + std::chrono::high_resolution_clock::now() - start_time; + auto actual_avg_interval = total_duration / interval_count; + printf( + "KokkosP NVML Power: Monitoring completed - %" PRId64 + " intervals, avg interval: %.1f ms (expected: %" PRId64 " ms)\n", + interval_count, + std::chrono::duration(actual_avg_interval).count(), + static_cast(interval.count())); + } +} + +bool initialize_nvml() { + nvmlReturn_t result = nvmlInit(); + if (NVML_SUCCESS != result) { + std::cerr << "KokkosP NVML Power: Failed to initialize NVML: " + << nvmlErrorString(result) << "\n"; + return false; + } + + unsigned int device_count; + result = nvmlDeviceGetCount(&device_count); + if (NVML_SUCCESS != result) { + std::cerr << "KokkosP NVML Power: Failed to get device count: " + << nvmlErrorString(result) << "\n"; + nvmlShutdown(); + return false; + } + + if (device_count == 0) { + std::cerr << "KokkosP NVML Power: No NVIDIA devices found\n"; + nvmlShutdown(); + return false; + } + + g_nvml_devices.resize(device_count); + + printf("KokkosP NVML Power: Found %u NVIDIA device(s)\n", device_count); + + for (unsigned int i = 0; i < device_count; ++i) { + result = nvmlDeviceGetHandleByIndex(i, &g_nvml_devices[i]); + if (NVML_SUCCESS != result) { + std::cerr << "KokkosP NVML Power: Failed to get handle for device " << i + << "\n"; + g_nvml_devices[i] = nullptr; + continue; + } + + char device_name[NVML_DEVICE_NAME_BUFFER_SIZE]; + result = nvmlDeviceGetName(g_nvml_devices[i], device_name, + NVML_DEVICE_NAME_BUFFER_SIZE); + if (NVML_SUCCESS == result) { + printf("KokkosP NVML Power: Device %u: %s\n", i, device_name); + } + + nvmlEnableState_t pmmode; + result = nvmlDeviceGetPowerManagementMode(g_nvml_devices[i], &pmmode); + if (NVML_SUCCESS == result && pmmode == NVML_FEATURE_ENABLED) { + printf("KokkosP NVML Power: Device %u: Power management enabled\n", i); + } else { + printf( + "KokkosP NVML Power: Device %u: Power management disabled or not " + "supported\n", + i); + } + } + + return true; +} + +void finalize_nvml() { + if (!g_nvml_devices.empty()) { + nvmlShutdown(); + } + g_nvml_devices.clear(); +} + +// Kokkos profiler interface functions +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + printf( + "======================================================================" + "\n"); + printf("KokkosP: NVML Power Profiler Initialized\n"); + printf("KokkosP: Sequence: %d, Interface Version: %llu, Devices: %u\n", + loadSeq, (unsigned long long)interfaceVer, devInfoCount); + printf( + "======================================================================" + "\n"); + + g_start_time = std::chrono::high_resolution_clock::now(); + + if (!initialize_nvml()) { + printf( + "KokkosP NVML Power: Failed to initialize NVML, power monitoring " + "disabled\n"); + return; + } + + int interval_ms = 20; + if (const char* interval_env = + std::getenv("KOKKOS_NVML_POWER_INTERVAL")) { + try { + interval_ms = std::stoi(interval_env); + if (interval_ms <= 0) { + interval_ms = 20; + throw std::invalid_argument("Interval must be positive"); + } + printf("KokkosP NVML Power: Using custom interval: %d ms\n", interval_ms); + } catch (const std::exception& e) { + printf("KokkosP NVML Power: Invalid interval value, using default 20ms\n"); + } + } else { + printf("KokkosP NVML Power: Using default interval: 20 ms\n"); + } + + g_stop_requested.store(false); + + g_monitoring_thread = std::make_unique( + nvml_power_monitoring_thread_func, + std::chrono::milliseconds(interval_ms)); + + printf("KokkosP NVML Power: Power monitoring started\n"); +} + +void kokkosp_finalize_library() { + auto end_time = std::chrono::high_resolution_clock::now(); + + printf( + "======================================================================" + "\n"); + printf("KokkosP: NVML Power Profiler Finalization\n"); + + if (g_monitoring_thread) { + g_stop_requested.store(true); + g_sleep_cv.notify_all(); + g_monitoring_thread->join(); + g_monitoring_thread.reset(); + } + + auto duration = std::chrono::duration_cast( + end_time - g_start_time); + double elapsed_seconds = duration.count() / 1000.0; + + printf("KokkosP NVML Power: Total execution time: %.3f seconds\n", + elapsed_seconds); + + char hostname[256]; + gethostname(hostname, 256); + int pid = (int)getpid(); + + char power_filename[512]; + snprintf(power_filename, 512, "%s-%d-nvml-power.csv", hostname, pid); + g_data_manager.write_power_data(power_filename); + + char kernels_filename[512]; + snprintf(kernels_filename, 512, "%s-%d-nvml-kernels.csv", hostname, pid); + g_data_manager.write_kernel_data(kernels_filename); + + char regions_filename[512]; + snprintf(regions_filename, 512, "%s-%d-nvml-regions.csv", hostname, pid); + g_data_manager.write_region_data(regions_filename); + + finalize_nvml(); + printf( + "======================================================================" + "\n"); +} + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + g_data_manager.start_region(name, RegionType::ParallelFor); +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + g_data_manager.end_region(); +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + g_data_manager.start_region(name, RegionType::ParallelScan); +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + g_data_manager.end_region(); +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + g_data_manager.start_region(name, RegionType::ParallelReduce); +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + g_data_manager.end_region(); +} + +void kokkosp_push_profile_region(char const* regionName) { + g_data_manager.start_region(regionName, RegionType::UserRegion); +} + +void kokkosp_pop_profile_region() { + g_data_manager.end_region(); +} + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace NVMLPowerProfiler +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::NVMLPowerProfiler; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) + +} // extern "C" \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/readme.md b/profiling/energy-profiler/nvml/readme.md new file mode 100644 index 000000000..fd7511f4b --- /dev/null +++ b/profiling/energy-profiler/nvml/readme.md @@ -0,0 +1,23 @@ +# NVML Power Profiler + +A Kokkos profiling tool that uses NVML to collect power data from NVIDIA GPUs. + +## Setup + +Requires CUDA toolkit with NVML. +1. Install the CUDA toolkit that includes NVML. +2. Compile this module with the main CMake build. + +## Output Files + +- `hostname-pid-nvml-power-raw.csv`: Power measurements + - Format: `timestamp_epoch_ns,device_id,power_watts` +- `hostname-pid-nvml-regions.csv`: Region timings + - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns` + +## Usage + +```bash +export KOKKOS_PROFILE_LIBRARY=/path/to/kp_power_nvml.so +./your_kokkos_application +``` \ No newline at end of file From 7d9565afa383795d21b1378b17adefc152971288 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Sat, 12 Jul 2025 00:36:28 +0000 Subject: [PATCH 03/11] energy-profiler: add variorum support for gpu energy monitoring --- .../energy-profiler/variorum/CMakeLists.txt | 13 + profiling/energy-profiler/variorum/Makefile | 27 ++ .../variorum/kp_power_variorum.cpp | 175 +++++++++ profiling/energy-profiler/variorum/readme.md | 38 ++ .../variorum/variorum_power_profiler.cpp | 369 ++++++++++++++++++ .../variorum/variorum_power_profiler.hpp | 133 +++++++ 6 files changed, 755 insertions(+) create mode 100644 profiling/energy-profiler/variorum/CMakeLists.txt create mode 100644 profiling/energy-profiler/variorum/Makefile create mode 100644 profiling/energy-profiler/variorum/kp_power_variorum.cpp create mode 100644 profiling/energy-profiler/variorum/readme.md create mode 100644 profiling/energy-profiler/variorum/variorum_power_profiler.cpp create mode 100644 profiling/energy-profiler/variorum/variorum_power_profiler.hpp diff --git a/profiling/energy-profiler/variorum/CMakeLists.txt b/profiling/energy-profiler/variorum/CMakeLists.txt new file mode 100644 index 000000000..ca923766b --- /dev/null +++ b/profiling/energy-profiler/variorum/CMakeLists.txt @@ -0,0 +1,13 @@ +kp_add_library(kp_power_variorum kp_power_variorum.cpp + variorum_power_profiler.cpp +) + +target_include_directories(kp_power_variorum PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +target_link_libraries(kp_power_variorum PRIVATE variorum::variorum) + +if(USE_MPI) + target_link_libraries(kp_power_variorum PRIVATE MPI::MPI_CXX) +endif() \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/Makefile b/profiling/energy-profiler/variorum/Makefile new file mode 100644 index 000000000..a2ca30e58 --- /dev/null +++ b/profiling/energy-profiler/variorum/Makefile @@ -0,0 +1,27 @@ +#Variorum Dependancy +VAR_LIB=-L$(VARIORUM_ROOT)/lib +VAR_INC=-I$(VARIORUM_ROOT)/include +LINK_FLAG=-lvariorum + +CXX=mpicxx +CXXFLAGS=-O3 -std=c++20 -g +SHARED_CXXFLAGS=-shared -fPIC +all: build/kp_power_variorum.so + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}/../all + +SOURCES = kp_power_variorum.cpp \ + variorum_power_profiler.cpp + +HEADERS = variorum_power_profiler.hpp + +build/kp_power_variorum.so: $(SOURCES) $(HEADERS) | build + $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(VAR_LIB) $(VAR_INC) -o $@ $(SOURCES) $(LINK_FLAG) + +build: + mkdir -p build + +clean: + rm -rf build/*.so diff --git a/profiling/energy-profiler/variorum/kp_power_variorum.cpp b/profiling/energy-profiler/variorum/kp_power_variorum.cpp new file mode 100644 index 000000000..c90722a44 --- /dev/null +++ b/profiling/energy-profiler/variorum/kp_power_variorum.cpp @@ -0,0 +1,175 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * Kokkos Power Profiler - Specialized for Variorum + * Simplified version focused on Variorum energy monitoring with integrated + * timing + */ + +#include +#include + +#include "kp_core.hpp" +#include "variorum_power_profiler.hpp" + +namespace KokkosTools { +namespace PowerProfiler { + +// --- Core Initialization --- +VariorumPowerProfiler power_profiler; + +// --- Library Initialization/Finalization --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + printf("-----------------------------------------------------------\n"); + printf( + "KokkosP: Power Profiler (sequence is %d, version: %lu, devices: %u)\n", + loadSeq, interfaceVer, devInfoCount); + printf("-----------------------------------------------------------\n"); + power_profiler.initialize(); +} + +void kokkosp_finalize_library() { + if (power_profiler.is_initialized()) { + power_profiler.finalize(); + } else { + std::cerr + << "PowerProfiler: Core not initialized, skipping finalization.\n"; + } + printf("-----------------------------------------------------------\n"); + printf("KokkosP: Finalization of Power Profiler. Complete.\n"); + printf("-----------------------------------------------------------\n"); +} + +// --- Kernels Launch/End --- + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + if (power_profiler.is_initialized()) { + power_profiler.begin_kernel(*kID, std::string(name), KernelType::FOR); + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot begin parallel for.\n"; + } +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + if (power_profiler.is_initialized()) { + power_profiler.end_kernel(kID); + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot end parallel for.\n"; + } +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + if (power_profiler.is_initialized() && kID) { + power_profiler.begin_kernel(*kID, std::string(name), KernelType::SCAN); + } else { + std::cerr << "PowerProfiler: Core not initialized or kID is null, " + "cannot begin parallel scan.\n"; + } +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + if (power_profiler.is_initialized()) { + power_profiler.end_kernel(kID); + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot end parallel scan.\n"; + } +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + if (power_profiler.is_initialized() && kID) { + power_profiler.begin_kernel(*kID, std::string(name), KernelType::REDUCE); + } else { + std::cerr << "PowerProfiler: Core not initialized or kID is null, " + "cannot begin parallel reduce.\n"; + } +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + if (power_profiler.is_initialized()) { + power_profiler.end_kernel(kID); + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot end parallel reduce.\n"; + } +} + +void kokkosp_push_profile_region(char const* regionName) { + if (power_profiler.is_initialized()) { + power_profiler.push_region(std::string(regionName)); + // printf("KokkosP: Entering profiling region: %s\n", regionName); + // Commented out to avoid excessive output + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot push profile region.\n"; + } +} + +void kokkosp_pop_profile_region() { + if (power_profiler.is_initialized()) { + power_profiler.pop_region(); + } else { + std::cerr + << "PowerProfiler: Core not initialized, cannot pop profile region.\n"; + } +} + +// --- Event Set Configuration --- + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace PowerProfiler +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::PowerProfiler; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +} diff --git a/profiling/energy-profiler/variorum/readme.md b/profiling/energy-profiler/variorum/readme.md new file mode 100644 index 000000000..6413c8b44 --- /dev/null +++ b/profiling/energy-profiler/variorum/readme.md @@ -0,0 +1,38 @@ +# Variorum Energy Profiler + +A Kokkos profiling tool that uses Variorum to collect power measurements from supported hardware. + +## Setup + +1. Install Variorum library +2. Compile this module with Variorum using the main CMake build system. + +## Configuration + +Environment variables: +- `KOKKOS_TOOLS_POWER_MONITOR_INTERVAL`: Sampling interval in microseconds (default: 20000) +- `KOKKOS_TOOLS_POWER_OUTPUT_PATH`: Base path for output files (optional) + +## Output Files + +The profiler generates three CSV files: +- `hostname-pid-power.csv`: Raw power readings with absolute epoch timestamps + - Format: `timestamp_epoch_ns,device_id,power_watts` +- `hostname-pid-regions.csv`: Timing for user-defined regions + - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns` +- `hostname-pid-kernels.csv`: Timing for Kokkos kernels + - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns,kernel_id` + +Power readings are in watts and timestamps are in nanoseconds since the epoch. + +## Usage + +```bash +export KOKKOS_PROFILE_LIBRARY=/path/to/variorum_energy_profiler.so +./your_kokkos_application +``` + +> Note: You might need to set the `LD_LIBRARY_PATH` to include the Variorum library path if it's not in a standard location: +> ```bash +> export LD_LIBRARY_PATH=/path/to/variorum/lib:$LD_LIBRARY_PATH +> ``` \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/variorum_power_profiler.cpp b/profiling/energy-profiler/variorum/variorum_power_profiler.cpp new file mode 100644 index 000000000..4ab445917 --- /dev/null +++ b/profiling/energy-profiler/variorum/variorum_power_profiler.cpp @@ -0,0 +1,369 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "variorum_power_profiler.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace KokkosTools { +namespace PowerProfiler { + +std::string kernel_type_to_string(KernelType type) { + switch (type) { + case KernelType::FOR: return "FOR"; + case KernelType::SCAN: return "SCAN"; + case KernelType::REDUCE: return "REDUCE"; + default: return "UNKNOWN"; + } +} + +VariorumPowerProfiler::VariorumPowerProfiler() { + if (const char* interval = + std::getenv("KOKKOS_TOOLS_POWER_MONITOR_INTERVAL")) { + try { + auto interval_us = std::stoul(interval); + monitor_interval_ = std::chrono::microseconds(interval_us); + } catch (const std::exception& e) { + std::cerr + << "PowerProfiler: Invalid monitor interval, using default 20ms\n"; + } + } + + if (const char* output_path = std::getenv("KOKKOS_TOOLS_POWER_OUTPUT_PATH")) { + output_file_path_ = output_path; + } +} + +VariorumPowerProfiler::~VariorumPowerProfiler() { + if (initialized_) { + finalize(); + } +} + +bool VariorumPowerProfiler::initialize() { + if (initialized_) { + return true; + } + + if (!initialize_variorum()) { + std::cerr << "PowerProfiler: Failed to initialize Variorum\n"; + return false; + } + + available_devices_ = get_available_devices(); + if (available_devices_.empty()) { + std::cerr << "PowerProfiler: No energy monitoring devices found\n"; + return false; + } + + start_monitoring(); + initialized_ = true; + + std::cout << "PowerProfiler: Initialized with " << available_devices_.size() + << " devices, monitoring interval: " << monitor_interval_.count() + << "μs\n"; + + return true; +} + +void VariorumPowerProfiler::finalize() { + if (!initialized_) { + return; + } + + stop_monitoring(); + generate_outputs(); + initialized_ = false; +} + +bool VariorumPowerProfiler::initialize_variorum() { return true; } + +VariorumPowerProfiler::unique_json_ptr +VariorumPowerProfiler::get_variorum_json_data() const { + char* json_string_c_raw = nullptr; + int variorum_error = variorum_get_power_json(&json_string_c_raw); + + if (variorum_error != 0) { + std::cerr << "PowerProfiler: variorum_get_power_json() failed. Error code: " + << variorum_error << "\n"; + return unique_json_ptr(nullptr); + } + + unique_cstring json_string_c(json_string_c_raw); + + if (!json_string_c) { + std::cerr << "PowerProfiler: variorum_get_power_json() returned success " + "but a null pointer.\n"; + return unique_json_ptr(nullptr); + } + + json_error_t error; + json_t* root_ptr = json_loads(json_string_c.get(), 0, &error); + + if (!root_ptr) { + std::cerr << "PowerProfiler: Failed to parse JSON: " << error.text << "\n"; + return unique_json_ptr(nullptr); + } + + return unique_json_ptr(root_ptr); +} + +std::deque VariorumPowerProfiler::get_available_devices() const { + std::set found_device_ids; + unique_json_ptr root = get_variorum_json_data(); + + if (!root) { + return {}; + } + + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + return {}; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + const char* key; + json_t* value; + json_object_foreach(power_gpu_watts, key, value) { + std::string s_key(key); + if (s_key.length() > 4 && s_key.substr(0, 4) == "GPU_") { + try { + uint32_t device_id = std::stoul(s_key.substr(4)); + found_device_ids.insert(device_id); + } catch (const std::invalid_argument& e) { + std::cerr << "PowerProfiler: Could not parse GPU ID from key: " + << s_key << " (" << e.what() << ")\n"; + } catch (const std::out_of_range& e) { + std::cerr << "PowerProfiler: GPU ID out of range from key: " + << s_key << " (" << e.what() << ")\n"; + } + } + } + } + } + + return std::deque(found_device_ids.begin(), found_device_ids.end()); +} + +EnergyReading VariorumPowerProfiler::get_current_energy_reading() const { + EnergyReading reading; + reading.timestamp = get_current_time(); + reading.epoch_timestamp = get_current_epoch_time(); + + unique_json_ptr root = get_variorum_json_data(); + if (!root) { + return reading; + } + + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + return reading; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + for (uint32_t device_id : available_devices_) { + std::string gpu_key = "GPU_" + std::to_string(device_id); + json_t* power_value = json_object_get(power_gpu_watts, gpu_key.c_str()); + + if (json_is_number(power_value)) { + reading.gpu_power_watts[device_id] = json_number_value(power_value); + } + } + } + } + + return reading; +} + +void VariorumPowerProfiler::start_monitoring() { + monitoring_active_ = true; + monitoring_thread_ = std::make_unique( + &VariorumPowerProfiler::monitoring_thread_function, this); +} + +void VariorumPowerProfiler::stop_monitoring() { + monitoring_active_ = false; + if (monitoring_thread_ && monitoring_thread_->joinable()) { + monitoring_thread_->join(); + } +} + +void VariorumPowerProfiler::monitoring_thread_function() { + while (monitoring_active_) { + EnergyReading reading = get_current_energy_reading(); + energy_readings_.push_back(reading); + std::this_thread::sleep_for(monitor_interval_); + } +} + +void VariorumPowerProfiler::begin_kernel(uint64_t kernel_id, + const std::string& name, + KernelType type) { + KernelTiming timing; + timing.kernel_id = kernel_id; + timing.name = name; + timing.type = type; + timing.start_time = get_current_time(); + timing.epoch_start_time = get_current_epoch_time(); + + active_kernels_[kernel_id] = timing; +} + +void VariorumPowerProfiler::end_kernel(uint64_t kernel_id) { + auto it = active_kernels_.find(kernel_id); + if (it != active_kernels_.end()) { + it->second.end_time = get_current_time(); + it->second.epoch_end_time = get_current_epoch_time(); + it->second.duration = std::chrono::duration_cast( + it->second.end_time - it->second.start_time); + + completed_kernels_.push_back(it->second); + active_kernels_.erase(it); + } +} + +void VariorumPowerProfiler::push_region(const std::string& name, + const std::string& type) { + RegionTiming region; + region.name = name; + region.type = type.empty() ? "DEFAULT" : type; + region.start_time = get_current_time(); + region.epoch_start_time = get_current_epoch_time(); + + active_regions_.push_back(region); +} + +void VariorumPowerProfiler::pop_region() { + if (!active_regions_.empty()) { + auto& region = active_regions_.back(); + region.end_time = get_current_time(); + region.epoch_end_time = get_current_epoch_time(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + + completed_regions_.push_back(region); + active_regions_.pop_back(); + } +} + +std::chrono::time_point +VariorumPowerProfiler::get_current_time() const { + return std::chrono::steady_clock::now(); +} + +std::chrono::system_clock::time_point +VariorumPowerProfiler::get_current_epoch_time() const { + return std::chrono::system_clock::now(); +} + +void VariorumPowerProfiler::generate_outputs() { output_to_csv(); } + +void VariorumPowerProfiler::output_to_csv() const { + char hostname[256]; + gethostname(hostname, 256); + int pid = (int)getpid(); + + // Create power data CSV file + char power_filename[512]; + snprintf(power_filename, 512, "%s-%d-power.csv", hostname, pid); + std::ofstream power_csv(power_filename); + if (power_csv.is_open()) { + power_csv << "timestamp_epoch_ns,device_id,power_watts\n"; + for (const auto& reading : energy_readings_) { + auto epoch_ns = std::chrono::duration_cast( + reading.epoch_timestamp.time_since_epoch()) + .count(); + + for (const auto& [device_id, power] : reading.gpu_power_watts) { + power_csv << epoch_ns << "," << device_id << "," << power << "\n"; + } + } + power_csv.close(); + std::cout << "Power measurements written to " << power_filename << " (" + << energy_readings_.size() << " readings)\n"; + } + + // Create regions CSV file + char regions_filename[512]; + snprintf(regions_filename, 512, "%s-%d-regions.csv", hostname, pid); + std::ofstream regions_csv(regions_filename); + if (regions_csv.is_open() && !completed_regions_.empty()) { + regions_csv << "name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns," + "duration_ns\n"; + for (const auto& region : completed_regions_) { + auto start_epoch_ns = + std::chrono::duration_cast( + region.epoch_start_time.time_since_epoch()) + .count(); + auto end_epoch_ns = std::chrono::duration_cast( + region.epoch_end_time.time_since_epoch()) + .count(); + + regions_csv << "\"" << region.name << "\"," << "\"" << region.type + << "\"," << start_epoch_ns << "," << end_epoch_ns << "," + << region.duration.count() << "\n"; + } + regions_csv.close(); + std::cout << "Region timings written to " << regions_filename << " (" + << completed_regions_.size() << " regions)\n"; + } + + // Create kernels CSV file (can be considered part of regions with specific + // type) + if (!completed_kernels_.empty()) { + char kernels_filename[512]; + snprintf(kernels_filename, 512, "%s-%d-kernels.csv", hostname, pid); + std::ofstream kernels_csv(kernels_filename); + if (kernels_csv.is_open()) { + kernels_csv << "name,type,start_timestamp_epoch_ns,end_timestamp_epoch_" + "ns,duration_ns,kernel_id\n"; + for (const auto& kernel : completed_kernels_) { + auto start_epoch_ns = + std::chrono::duration_cast( + kernel.epoch_start_time.time_since_epoch()) + .count(); + auto end_epoch_ns = + std::chrono::duration_cast( + kernel.epoch_end_time.time_since_epoch()) + .count(); + + kernels_csv << "\"" << kernel.name << "\"," << "\"" + << kernel_type_to_string(kernel.type) << "\"," + << start_epoch_ns << "," << end_epoch_ns << "," + << kernel.duration.count() << "," << kernel.kernel_id + << "\n"; + } + kernels_csv.close(); + std::cout << "Kernel timings written to " << kernels_filename << " (" + << completed_kernels_.size() << " kernels)\n"; + } + } +} + +} // namespace PowerProfiler +} // namespace KokkosTools \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/variorum_power_profiler.hpp b/profiling/energy-profiler/variorum/variorum_power_profiler.hpp new file mode 100644 index 000000000..7a04efc9e --- /dev/null +++ b/profiling/energy-profiler/variorum/variorum_power_profiler.hpp @@ -0,0 +1,133 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +#include +} + +namespace KokkosTools { +namespace PowerProfiler { + +enum class KernelType { FOR, SCAN, REDUCE }; + +struct EnergyReading { + std::chrono::system_clock::time_point epoch_timestamp; + std::chrono::time_point timestamp; + std::map gpu_power_watts; +}; + +struct KernelTiming { + uint64_t kernel_id; + std::string name; + KernelType type; + std::chrono::system_clock::time_point epoch_start_time; + std::chrono::system_clock::time_point epoch_end_time; + std::chrono::time_point start_time; + std::chrono::time_point end_time; + std::chrono::nanoseconds duration; +}; + +struct RegionTiming { + std::string name; + std::string type; + std::chrono::system_clock::time_point epoch_start_time; + std::chrono::system_clock::time_point epoch_end_time; + std::chrono::time_point start_time; + std::chrono::time_point end_time; + std::chrono::nanoseconds duration; +}; + +class VariorumPowerProfiler { + public: + VariorumPowerProfiler(); + ~VariorumPowerProfiler(); + + bool initialize(); + void finalize(); + + void begin_kernel(uint64_t kernel_id, const std::string& name, + KernelType type); + void end_kernel(uint64_t kernel_id); + + void push_region(const std::string& name, const std::string& type = ""); + void pop_region(); + + bool is_initialized() const { return initialized_; } + + private: + struct JsonDeleter { + void operator()(json_t* json) const { + if (json) json_decref(json); + } + }; + using unique_json_ptr = std::unique_ptr; + + struct CFreeDeleter { + void operator()(char* ptr) const { + if (ptr) free(ptr); + } + }; + using unique_cstring = std::unique_ptr; + + bool initialize_variorum(); + unique_json_ptr get_variorum_json_data() const; + EnergyReading get_current_energy_reading() const; + std::deque get_available_devices() const; + + void monitoring_thread_function(); + void start_monitoring(); + void stop_monitoring(); + + void generate_outputs(); + void output_to_csv() const; + + std::chrono::time_point get_current_time() const; + std::chrono::system_clock::time_point get_current_epoch_time() const; + + std::chrono::microseconds monitor_interval_{20000}; + std::string output_file_path_{"power_profile_output"}; + + bool initialized_{false}; + std::deque available_devices_; + + std::atomic monitoring_active_{false}; + std::unique_ptr monitoring_thread_; + + std::deque energy_readings_; + std::deque completed_kernels_; + std::deque completed_regions_; + + std::unordered_map active_kernels_; + std::deque active_regions_; +}; + +// Utility function to convert KernelType to string +std::string kernel_type_to_string(KernelType type); + +} // namespace PowerProfiler +} // namespace KokkosTools \ No newline at end of file From 48780b0f30a9905d254281bc96dd95228857f631 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Sat, 12 Jul 2025 00:36:39 +0000 Subject: [PATCH 04/11] energy-profiler: integrate into main build system --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc3b1e63b..48fef3053 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ endif() if(KOKKOSTOOLS_HAS_VARIORUM) add_subdirectory(profiling/variorum-connector) + add_subdirectory(profiling/energy-profiler) endif() # GPU profilers From fbf4f7938cf69a6abe651b69f3f5a0339af33d19 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Tue, 15 Jul 2025 15:28:06 -0400 Subject: [PATCH 05/11] energy-profiler: add NVML power tool --- profiling/energy-profiler/CMakeLists.txt | 1 + .../energy-profiler/nvml-power/CMakeLists.txt | 17 ++ .../nvml-power/kp_nvml_energy_connector.cpp | 109 ++++++++++ .../nvml-power/kp_nvml_energy_profiler.cpp | 200 ++++++++++++++++++ .../nvml-power/kp_nvml_energy_profiler.hpp | 77 +++++++ 5 files changed, 404 insertions(+) create mode 100644 profiling/energy-profiler/nvml-power/CMakeLists.txt create mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp create mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp create mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt index a2ace2132..56891d014 100644 --- a/profiling/energy-profiler/CMakeLists.txt +++ b/profiling/energy-profiler/CMakeLists.txt @@ -2,3 +2,4 @@ cmake_minimum_required(VERSION 3.16) add_subdirectory(variorum) add_subdirectory(nvml) +add_subdirectory(nvml-power) diff --git a/profiling/energy-profiler/nvml-power/CMakeLists.txt b/profiling/energy-profiler/nvml-power/CMakeLists.txt new file mode 100644 index 000000000..6d5f45b74 --- /dev/null +++ b/profiling/energy-profiler/nvml-power/CMakeLists.txt @@ -0,0 +1,17 @@ +find_package(CUDAToolkit QUIET) + +if (CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + message(STATUS "Found CUDA NVML, making NVML power profiler available.") +else() + message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") + return() +endif() + +kp_add_library(kp_energy_nvml kp_nvml_energy_connector.cpp kp_nvml_energy_profiler.cpp) + +target_link_libraries(kp_energy_nvml PRIVATE CUDA::nvml pthread) + +target_include_directories(kp_energy_nvml PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) \ No newline at end of file diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp new file mode 100644 index 000000000..243ddd1a3 --- /dev/null +++ b/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp @@ -0,0 +1,109 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "kp_nvml_energy_profiler.hpp" +#include +#include + +using namespace KokkosTools::NVMLEnergyProfiler; + +extern "C" { + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount __attribute__((unused)), + void* deviceInfo __attribute__((unused))) { + const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); + if (tool_verbose != nullptr) { + printf("KokkosP NVML Energy: library loaded (sequence is %d, version: %" PRIu64 ")\n", + loadSeq, interfaceVer); + } + + g_data_manager = new DataManager(); + if (!g_data_manager->initialize()) { + printf("KokkosP NVML Energy: Failed to initialize, profiling disabled\n"); + delete g_data_manager; + g_data_manager = nullptr; + } +} + +void kokkosp_finalize_library() { + const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); + if (tool_verbose != nullptr) { + printf("KokkosP NVML Energy: finalizing library\n"); + } + + if (g_data_manager) { + // Write output files + g_data_manager->write_kernel_data("kokkos_kernels_energy.csv"); + g_data_manager->write_region_data("kokkos_regions_energy.csv"); + + delete g_data_manager; + g_data_manager = nullptr; + } +} + +void kokkosp_begin_parallel_for(const char* name, uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelFor); + } +} + +void kokkosp_end_parallel_for(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelReduce); + } +} + +void kokkosp_end_parallel_reduce(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_begin_parallel_scan(const char* name, uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelScan); + } +} + +void kokkosp_end_parallel_scan(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_push_profile_region(const char* name) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::UserRegion); + } +} + +void kokkosp_pop_profile_region() { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +} // extern "C" diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp new file mode 100644 index 000000000..3ead3d95b --- /dev/null +++ b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp @@ -0,0 +1,200 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "kp_nvml_energy_profiler.hpp" +#include +#include +#include + +namespace KokkosTools { +namespace NVMLEnergyProfiler { + +DataManager* g_data_manager = nullptr; + +DataManager::DataManager() : nvml_initialized(false) {} + +DataManager::~DataManager() { + finalize(); +} + +bool DataManager::initialize() { + nvmlReturn_t result = nvmlInit(); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to initialize NVML: %s\n", nvmlErrorString(result)); + return false; + } + + // Get the first GPU device + result = nvmlDeviceGetHandleByIndex(0, &device); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to get device handle: %s\n", nvmlErrorString(result)); + nvmlShutdown(); + return false; + } + + // Test if energy monitoring is available + unsigned long long energy; + result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Energy monitoring not available: %s\n", nvmlErrorString(result)); + nvmlShutdown(); + return false; + } + + nvml_initialized = true; + printf("KokkosP NVML Energy: Initialized successfully\n"); + return true; +} + +void DataManager::finalize() { + if (nvml_initialized) { + nvmlShutdown(); + nvml_initialized = false; + } +} + +unsigned long long DataManager::get_current_energy_mj() const { + if (!nvml_initialized) return 0; + + unsigned long long energy; + nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to get energy consumption: %s\n", nvmlErrorString(result)); + return 0; + } + return energy; +} + +void DataManager::start_region(const std::string& name, RegionType type) { + TimingEnergyInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + region.start_energy_mj = get_current_energy_mj(); + active_regions.push_back(region); +} + +void DataManager::end_region() { + if (!active_regions.empty()) { + auto& region = active_regions.back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.end_energy_mj = get_current_energy_mj(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + region.delta_energy_mj = region.end_energy_mj - region.start_energy_mj; + + // Power (W) = (delta_energy_mj / 1000) [Joules] / (duration_ns / 1e9) [seconds] + // => Power = (delta_energy_mj * 1e6) / duration_ns + if (region.duration.count() > 0) { + region.average_power_w = (static_cast(region.delta_energy_mj) * 1e6) / + static_cast(region.duration.count()); + } else { + region.average_power_w = 0.0; + } + + if (region.type == RegionType::UserRegion) { + completed_regions.push_back(region); + } else { + completed_kernels.push_back(region); + } + active_regions.pop_back(); + } +} + +const char* DataManager::region_type_to_string(RegionType type) const { + switch (type) { + case RegionType::ParallelFor: return "parallel_for"; + case RegionType::ParallelReduce: return "parallel_reduce"; + case RegionType::ParallelScan: return "parallel_scan"; + case RegionType::UserRegion: return "user_region"; + default: return "unknown"; + } +} + +void DataManager::write_kernel_data(const std::string& filename) const { + if (completed_kernels.empty()) return; + + FILE* kernels_file = fopen(filename.c_str(), "w"); + if (kernels_file) { + fprintf(kernels_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); + for (const auto& kernel : completed_kernels) { + auto start_ns = std::chrono::duration_cast( + kernel.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + kernel.end_time.time_since_epoch()) + .count(); + fprintf(kernels_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", + kernel.name.c_str(), + region_type_to_string(kernel.type), + start_ns, + end_ns, + (int64_t)kernel.duration.count(), + kernel.start_energy_mj, + kernel.end_energy_mj, + kernel.delta_energy_mj, + kernel.average_power_w); + } + fclose(kernels_file); + char cwd[256]; + if (getcwd(cwd, 256) != nullptr) { + printf("KokkosP NVML Energy: Kernel energy CSV written to %s/%s (%" PRIu64 " kernels)\n", + cwd, filename.c_str(), static_cast(completed_kernels.size())); + } else { + printf("KokkosP NVML Energy: Kernel energy CSV written to %s (%" PRIu64 " kernels)\n", + filename.c_str(), static_cast(completed_kernels.size())); + } + } +} + +void DataManager::write_region_data(const std::string& filename) const { + if (completed_regions.empty()) return; + + FILE* regions_file = fopen(filename.c_str(), "w"); + if (regions_file) { + fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); + for (const auto& region : completed_regions) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", + region.name.c_str(), + region_type_to_string(region.type), + start_ns, + end_ns, + (int64_t)region.duration.count(), + region.start_energy_mj, + region.end_energy_mj, + region.delta_energy_mj, + region.average_power_w); + } + fclose(regions_file); + char cwd[256]; + if (getcwd(cwd, 256) != nullptr) { + printf("KokkosP NVML Energy: Region energy CSV written to %s/%s (%" PRIu64 " regions)\n", + cwd, filename.c_str(), static_cast(completed_regions.size())); + } else { + printf("KokkosP NVML Energy: Region energy CSV written to %s (%" PRIu64 " regions)\n", + filename.c_str(), static_cast(completed_regions.size())); + } + } +} + +} // namespace NVMLEnergyProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp new file mode 100644 index 000000000..5500deaf9 --- /dev/null +++ b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp @@ -0,0 +1,77 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KP_NVML_ENERGY_PROFILER_HPP +#define KP_NVML_ENERGY_PROFILER_HPP + +#include +#include +#include +#include + +namespace KokkosTools { +namespace NVMLEnergyProfiler { + +enum class RegionType { + ParallelFor, + ParallelReduce, + ParallelScan, + UserRegion +}; + +struct TimingEnergyInfo { + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + std::chrono::nanoseconds duration; + unsigned long long start_energy_mj; // millijoules at start + unsigned long long end_energy_mj; // millijoules at end + unsigned long long delta_energy_mj; // energy consumed during region + double average_power_w; // average power in Watts +}; + +class DataManager { +private: + std::vector completed_kernels; + std::vector completed_regions; + std::vector active_regions; + nvmlDevice_t device; + bool nvml_initialized; + + const char* region_type_to_string(RegionType type) const; + unsigned long long get_current_energy_mj() const; + +public: + DataManager(); + ~DataManager(); + + bool initialize(); + void finalize(); + + void start_region(const std::string& name, RegionType type); + void end_region(); + + void write_kernel_data(const std::string& filename) const; + void write_region_data(const std::string& filename) const; +}; + +extern DataManager* g_data_manager; + +} // namespace NVMLEnergyProfiler +} // namespace KokkosTools + +#endif // KP_NVML_ENERGY_PROFILER_HPP From 51e900bdd2d5dd93de083ff62d662cca9e87de08 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Mon, 21 Jul 2025 10:42:32 -0400 Subject: [PATCH 06/11] energy-profiler: refactor NVML power profiler and update output formats --- .../energy-profiler/nvml-power/CMakeLists.txt | 2 +- .../nvml-power/kp_energy_nvml.cpp | 120 +++++++ .../nvml-power/kp_nvml_energy_connector.cpp | 109 ------- .../nvml-power/kp_nvml_energy_profiler.cpp | 294 +++++++++--------- .../nvml-power/kp_nvml_energy_profiler.hpp | 71 ++--- .../energy-profiler/nvml-power/readme.md | 24 ++ .../nvml/kp_nvml_power_profiler.cpp | 159 +++++----- .../nvml/kp_nvml_power_profiler.hpp | 32 +- .../energy-profiler/nvml/kp_power_nvml.cpp | 18 +- profiling/energy-profiler/nvml/readme.md | 12 +- 10 files changed, 443 insertions(+), 398 deletions(-) create mode 100644 profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp delete mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp create mode 100644 profiling/energy-profiler/nvml-power/readme.md diff --git a/profiling/energy-profiler/nvml-power/CMakeLists.txt b/profiling/energy-profiler/nvml-power/CMakeLists.txt index 6d5f45b74..60a679d05 100644 --- a/profiling/energy-profiler/nvml-power/CMakeLists.txt +++ b/profiling/energy-profiler/nvml-power/CMakeLists.txt @@ -8,7 +8,7 @@ else() return() endif() -kp_add_library(kp_energy_nvml kp_nvml_energy_connector.cpp kp_nvml_energy_profiler.cpp) +kp_add_library(kp_energy_nvml kp_energy_nvml.cpp kp_nvml_energy_profiler.cpp) target_link_libraries(kp_energy_nvml PRIVATE CUDA::nvml pthread) diff --git a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp new file mode 100644 index 000000000..b0a4fdbb7 --- /dev/null +++ b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp @@ -0,0 +1,120 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "kp_nvml_energy_profiler.hpp" +#include +#include + +using namespace KokkosTools::NVMLEnergyProfiler; + +extern "C" { + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount __attribute__((unused)), + void* deviceInfo __attribute__((unused))) { + const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); + if (tool_verbose != nullptr) { + printf( + "KokkosP NVML Energy: library loaded (sequence is %d, version: %" PRIu64 + ")\n", + loadSeq, interfaceVer); + } + + g_data_manager = new DataManager(); + if (!g_data_manager->initialize()) { + printf("KokkosP NVML Energy: Failed to initialize, profiling disabled\n"); + delete g_data_manager; + g_data_manager = nullptr; + } +} + +void kokkosp_finalize_library() { + const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); + if (tool_verbose != nullptr) { + printf("KokkosP NVML Energy: finalizing library\n"); + } + + char hostname[256]; + gethostname(hostname, 256); + int pid = (int)getpid(); + + if (g_data_manager) { + // Write output files + g_data_manager->write_kernel_data("%s-%d-nvml-power-kernels.csv", hostname, + pid); + g_data_manager->write_region_data("%s-%d-nvml-power-regions.csv", hostname, + pid); + + delete g_data_manager; + g_data_manager = nullptr; + } +} + +void kokkosp_begin_parallel_for(const char* name, + uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelFor); + } +} + +void kokkosp_end_parallel_for(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_begin_parallel_reduce(const char* name, + uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelReduce); + } +} + +void kokkosp_end_parallel_reduce(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_begin_parallel_scan(const char* name, + uint32_t devid __attribute__((unused)), + uint64_t* kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::ParallelScan); + } +} + +void kokkosp_end_parallel_scan(uint64_t kernid __attribute__((unused))) { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +void kokkosp_push_profile_region(const char* name) { + if (g_data_manager) { + g_data_manager->start_region(std::string(name), RegionType::UserRegion); + } +} + +void kokkosp_pop_profile_region() { + if (g_data_manager) { + g_data_manager->end_region(); + } +} + +} // extern "C" diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp deleted file mode 100644 index 243ddd1a3..000000000 --- a/profiling/energy-profiler/nvml-power/kp_nvml_energy_connector.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include "kp_nvml_energy_profiler.hpp" -#include -#include - -using namespace KokkosTools::NVMLEnergyProfiler; - -extern "C" { - -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t devInfoCount __attribute__((unused)), - void* deviceInfo __attribute__((unused))) { - const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); - if (tool_verbose != nullptr) { - printf("KokkosP NVML Energy: library loaded (sequence is %d, version: %" PRIu64 ")\n", - loadSeq, interfaceVer); - } - - g_data_manager = new DataManager(); - if (!g_data_manager->initialize()) { - printf("KokkosP NVML Energy: Failed to initialize, profiling disabled\n"); - delete g_data_manager; - g_data_manager = nullptr; - } -} - -void kokkosp_finalize_library() { - const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); - if (tool_verbose != nullptr) { - printf("KokkosP NVML Energy: finalizing library\n"); - } - - if (g_data_manager) { - // Write output files - g_data_manager->write_kernel_data("kokkos_kernels_energy.csv"); - g_data_manager->write_region_data("kokkos_regions_energy.csv"); - - delete g_data_manager; - g_data_manager = nullptr; - } -} - -void kokkosp_begin_parallel_for(const char* name, uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelFor); - } -} - -void kokkosp_end_parallel_for(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelReduce); - } -} - -void kokkosp_end_parallel_reduce(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_begin_parallel_scan(const char* name, uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelScan); - } -} - -void kokkosp_end_parallel_scan(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_push_profile_region(const char* name) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::UserRegion); - } -} - -void kokkosp_pop_profile_region() { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -} // extern "C" diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp index 3ead3d95b..0f2723f6e 100644 --- a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp +++ b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp @@ -26,175 +26,181 @@ DataManager* g_data_manager = nullptr; DataManager::DataManager() : nvml_initialized(false) {} -DataManager::~DataManager() { - finalize(); -} +DataManager::~DataManager() { finalize(); } bool DataManager::initialize() { - nvmlReturn_t result = nvmlInit(); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to initialize NVML: %s\n", nvmlErrorString(result)); - return false; - } - - // Get the first GPU device - result = nvmlDeviceGetHandleByIndex(0, &device); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to get device handle: %s\n", nvmlErrorString(result)); - nvmlShutdown(); - return false; - } - - // Test if energy monitoring is available - unsigned long long energy; - result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Energy monitoring not available: %s\n", nvmlErrorString(result)); - nvmlShutdown(); - return false; - } - - nvml_initialized = true; - printf("KokkosP NVML Energy: Initialized successfully\n"); - return true; + nvmlReturn_t result = nvmlInit(); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to initialize NVML: %s\n", + nvmlErrorString(result)); + return false; + } + + // Get the first GPU device + result = nvmlDeviceGetHandleByIndex(0, &device); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to get device handle: %s\n", + nvmlErrorString(result)); + nvmlShutdown(); + return false; + } + + // Test if energy monitoring is available + unsigned long long energy; + result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Energy monitoring not available: %s\n", + nvmlErrorString(result)); + nvmlShutdown(); + return false; + } + + nvml_initialized = true; + printf("KokkosP NVML Energy: Initialized successfully\n"); + return true; } void DataManager::finalize() { - if (nvml_initialized) { - nvmlShutdown(); - nvml_initialized = false; - } + if (nvml_initialized) { + nvmlShutdown(); + nvml_initialized = false; + } } unsigned long long DataManager::get_current_energy_mj() const { - if (!nvml_initialized) return 0; - - unsigned long long energy; - nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to get energy consumption: %s\n", nvmlErrorString(result)); - return 0; - } - return energy; + if (!nvml_initialized) return 0; + + unsigned long long energy; + nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); + if (result != NVML_SUCCESS) { + printf("KokkosP NVML Energy: Failed to get energy consumption: %s\n", + nvmlErrorString(result)); + return 0; + } + return energy; } void DataManager::start_region(const std::string& name, RegionType type) { - TimingEnergyInfo region; - region.name = name; - region.type = type; - region.start_time = std::chrono::high_resolution_clock::now(); - region.start_energy_mj = get_current_energy_mj(); - active_regions.push_back(region); + TimingEnergyInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + region.start_energy_mj = get_current_energy_mj(); + active_regions.push_back(region); } void DataManager::end_region() { - if (!active_regions.empty()) { - auto& region = active_regions.back(); - region.end_time = std::chrono::high_resolution_clock::now(); - region.end_energy_mj = get_current_energy_mj(); - region.duration = std::chrono::duration_cast( - region.end_time - region.start_time); - region.delta_energy_mj = region.end_energy_mj - region.start_energy_mj; - - // Power (W) = (delta_energy_mj / 1000) [Joules] / (duration_ns / 1e9) [seconds] - // => Power = (delta_energy_mj * 1e6) / duration_ns - if (region.duration.count() > 0) { - region.average_power_w = (static_cast(region.delta_energy_mj) * 1e6) / - static_cast(region.duration.count()); - } else { - region.average_power_w = 0.0; - } - - if (region.type == RegionType::UserRegion) { - completed_regions.push_back(region); - } else { - completed_kernels.push_back(region); - } - active_regions.pop_back(); + if (!active_regions.empty()) { + auto& region = active_regions.back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.end_energy_mj = get_current_energy_mj(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + region.delta_energy_mj = region.end_energy_mj - region.start_energy_mj; + + // Power (W) = (delta_energy_mj / 1000) [Joules] / (duration_ns / 1e9) + // [seconds] + // => Power = (delta_energy_mj * 1e6) / duration_ns + if (region.duration.count() > 0) { + region.average_power_w = + (static_cast(region.delta_energy_mj) * 1e6) / + static_cast(region.duration.count()); + } else { + region.average_power_w = 0.0; } + + if (region.type == RegionType::UserRegion) { + completed_regions.push_back(region); + } else { + completed_kernels.push_back(region); + } + active_regions.pop_back(); + } } const char* DataManager::region_type_to_string(RegionType type) const { - switch (type) { - case RegionType::ParallelFor: return "parallel_for"; - case RegionType::ParallelReduce: return "parallel_reduce"; - case RegionType::ParallelScan: return "parallel_scan"; - case RegionType::UserRegion: return "user_region"; - default: return "unknown"; - } + switch (type) { + case RegionType::ParallelFor: return "parallel_for"; + case RegionType::ParallelReduce: return "parallel_reduce"; + case RegionType::ParallelScan: return "parallel_scan"; + case RegionType::UserRegion: return "user_region"; + default: return "unknown"; + } } void DataManager::write_kernel_data(const std::string& filename) const { - if (completed_kernels.empty()) return; - - FILE* kernels_file = fopen(filename.c_str(), "w"); - if (kernels_file) { - fprintf(kernels_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); - for (const auto& kernel : completed_kernels) { - auto start_ns = std::chrono::duration_cast( - kernel.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - kernel.end_time.time_since_epoch()) - .count(); - fprintf(kernels_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", - kernel.name.c_str(), - region_type_to_string(kernel.type), - start_ns, - end_ns, - (int64_t)kernel.duration.count(), - kernel.start_energy_mj, - kernel.end_energy_mj, - kernel.delta_energy_mj, - kernel.average_power_w); - } - fclose(kernels_file); - char cwd[256]; - if (getcwd(cwd, 256) != nullptr) { - printf("KokkosP NVML Energy: Kernel energy CSV written to %s/%s (%" PRIu64 " kernels)\n", - cwd, filename.c_str(), static_cast(completed_kernels.size())); - } else { - printf("KokkosP NVML Energy: Kernel energy CSV written to %s (%" PRIu64 " kernels)\n", - filename.c_str(), static_cast(completed_kernels.size())); - } + if (completed_kernels.empty()) return; + + FILE* kernels_file = fopen(filename.c_str(), "w"); + if (kernels_file) { + fprintf(kernels_file, + "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_" + "energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); + for (const auto& kernel : completed_kernels) { + auto start_ns = std::chrono::duration_cast( + kernel.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + kernel.end_time.time_since_epoch()) + .count(); + fprintf(kernels_file, + "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", + kernel.name.c_str(), region_type_to_string(kernel.type), start_ns, + end_ns, (int64_t)kernel.duration.count(), kernel.start_energy_mj, + kernel.end_energy_mj, kernel.delta_energy_mj, + kernel.average_power_w); } + fclose(kernels_file); + char cwd[256]; + if (getcwd(cwd, 256) != nullptr) { + printf("KokkosP NVML Energy: Kernel energy CSV written to %s/%s (%" PRIu64 + " kernels)\n", + cwd, filename.c_str(), + static_cast(completed_kernels.size())); + } else { + printf("KokkosP NVML Energy: Kernel energy CSV written to %s (%" PRIu64 + " kernels)\n", + filename.c_str(), static_cast(completed_kernels.size())); + } + } } void DataManager::write_region_data(const std::string& filename) const { - if (completed_regions.empty()) return; - - FILE* regions_file = fopen(filename.c_str(), "w"); - if (regions_file) { - fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); - for (const auto& region : completed_regions) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", - region.name.c_str(), - region_type_to_string(region.type), - start_ns, - end_ns, - (int64_t)region.duration.count(), - region.start_energy_mj, - region.end_energy_mj, - region.delta_energy_mj, - region.average_power_w); - } - fclose(regions_file); - char cwd[256]; - if (getcwd(cwd, 256) != nullptr) { - printf("KokkosP NVML Energy: Region energy CSV written to %s/%s (%" PRIu64 " regions)\n", - cwd, filename.c_str(), static_cast(completed_regions.size())); - } else { - printf("KokkosP NVML Energy: Region energy CSV written to %s (%" PRIu64 " regions)\n", - filename.c_str(), static_cast(completed_regions.size())); - } + if (completed_regions.empty()) return; + + FILE* regions_file = fopen(filename.c_str(), "w"); + if (regions_file) { + fprintf(regions_file, + "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_" + "energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); + for (const auto& region : completed_regions) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(regions_file, + "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", + region.name.c_str(), region_type_to_string(region.type), start_ns, + end_ns, (int64_t)region.duration.count(), region.start_energy_mj, + region.end_energy_mj, region.delta_energy_mj, + region.average_power_w); + } + fclose(regions_file); + char cwd[256]; + if (getcwd(cwd, 256) != nullptr) { + printf("KokkosP NVML Energy: Region energy CSV written to %s/%s (%" PRIu64 + " regions)\n", + cwd, filename.c_str(), + static_cast(completed_regions.size())); + } else { + printf("KokkosP NVML Energy: Region energy CSV written to %s (%" PRIu64 + " regions)\n", + filename.c_str(), static_cast(completed_regions.size())); } + } } -} // namespace NVMLEnergyProfiler -} // namespace KokkosTools +} // namespace NVMLEnergyProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp index 5500deaf9..0430d11f1 100644 --- a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp +++ b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp @@ -25,53 +25,48 @@ namespace KokkosTools { namespace NVMLEnergyProfiler { -enum class RegionType { - ParallelFor, - ParallelReduce, - ParallelScan, - UserRegion -}; +enum class RegionType { ParallelFor, ParallelReduce, ParallelScan, UserRegion }; struct TimingEnergyInfo { - std::string name; - RegionType type; - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - std::chrono::nanoseconds duration; - unsigned long long start_energy_mj; // millijoules at start - unsigned long long end_energy_mj; // millijoules at end - unsigned long long delta_energy_mj; // energy consumed during region - double average_power_w; // average power in Watts + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + std::chrono::nanoseconds duration; + unsigned long long start_energy_mj; // millijoules at start + unsigned long long end_energy_mj; // millijoules at end + unsigned long long delta_energy_mj; // energy consumed during region + double average_power_w; // average power in Watts }; class DataManager { -private: - std::vector completed_kernels; - std::vector completed_regions; - std::vector active_regions; - nvmlDevice_t device; - bool nvml_initialized; + private: + std::vector completed_kernels; + std::vector completed_regions; + std::vector active_regions; + nvmlDevice_t device; + bool nvml_initialized; + + const char* region_type_to_string(RegionType type) const; + unsigned long long get_current_energy_mj() const; + + public: + DataManager(); + ~DataManager(); + + bool initialize(); + void finalize(); - const char* region_type_to_string(RegionType type) const; - unsigned long long get_current_energy_mj() const; + void start_region(const std::string& name, RegionType type); + void end_region(); -public: - DataManager(); - ~DataManager(); - - bool initialize(); - void finalize(); - - void start_region(const std::string& name, RegionType type); - void end_region(); - - void write_kernel_data(const std::string& filename) const; - void write_region_data(const std::string& filename) const; + void write_kernel_data(const std::string& filename) const; + void write_region_data(const std::string& filename) const; }; extern DataManager* g_data_manager; -} // namespace NVMLEnergyProfiler -} // namespace KokkosTools +} // namespace NVMLEnergyProfiler +} // namespace KokkosTools -#endif // KP_NVML_ENERGY_PROFILER_HPP +#endif // KP_NVML_ENERGY_PROFILER_HPP diff --git a/profiling/energy-profiler/nvml-power/readme.md b/profiling/energy-profiler/nvml-power/readme.md new file mode 100644 index 000000000..520f4020f --- /dev/null +++ b/profiling/energy-profiler/nvml-power/readme.md @@ -0,0 +1,24 @@ +# NVML Power Profiler + +A Kokkos profiling tool that uses NVML to collect power data from NVIDIA GPUs. +This variant uses `nvmlDeviceGetTotalEnergyConsumption(device, &energy)` to get energy data for kernels and regions. + +## Setup + +Requires CUDA toolkit with NVML. +1. Install the CUDA toolkit that includes NVML. +2. Compile this module with the main CMake build. + +## Output Files + +- `hostname-pid-nvml-power-kernels.csv`: Kernel power measurements + - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w` +- `hostname-pid-nvml-power-regions.csv`: Region timings + - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w` + +## Usage + +```bash +export KOKKOS_PROFILE_LIBRARY=/path/to/kp_power_nvml.so +./your_kokkos_application +``` \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp index 6a9a15de6..e585ce443 100644 --- a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp +++ b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp @@ -23,105 +23,112 @@ namespace KokkosTools { namespace NVMLPowerProfiler { void DataManager::add_power_data_point(int64_t timestamp, double power) { - power_data_points.push_back({timestamp, power}); + power_data_points.push_back({timestamp, power}); } void DataManager::start_region(const std::string& name, RegionType type) { - TimingInfo region; - region.name = name; - region.type = type; - region.start_time = std::chrono::high_resolution_clock::now(); - active_regions.push_back(region); + TimingInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + active_regions.push_back(region); } void DataManager::end_region() { - if (!active_regions.empty()) { - auto& region = active_regions.back(); - region.end_time = std::chrono::high_resolution_clock::now(); - region.duration = std::chrono::duration_cast( - region.end_time - region.start_time); - - if (region.type == RegionType::UserRegion) { - completed_regions.push_back(region); - } else { - completed_kernels.push_back(region); - } - active_regions.pop_back(); + if (!active_regions.empty()) { + auto& region = active_regions.back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + + if (region.type == RegionType::UserRegion) { + completed_regions.push_back(region); + } else { + completed_kernels.push_back(region); } + active_regions.pop_back(); + } } const char* DataManager::region_type_to_string(RegionType type) const { - switch (type) { - case RegionType::ParallelFor: return "parallel_for"; - case RegionType::ParallelReduce: return "parallel_reduce"; - case RegionType::ParallelScan: return "parallel_scan"; - case RegionType::UserRegion: return "user_region"; - default: return "unknown"; - } + switch (type) { + case RegionType::ParallelFor: return "parallel_for"; + case RegionType::ParallelReduce: return "parallel_reduce"; + case RegionType::ParallelScan: return "parallel_scan"; + case RegionType::UserRegion: return "user_region"; + default: return "unknown"; + } } void DataManager::write_power_data(const std::string& filename) const { - FILE* csv_file = fopen(filename.c_str(), "w"); - if (csv_file) { - fprintf(csv_file, "timestamp_epoch_ns,power_watts\n"); - for (const auto& point : power_data_points) { - fprintf(csv_file, "%" PRId64 ",%.6f\n", point.timestamp_ns, point.power_watts); - } - fclose(csv_file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Power CSV data written to %s/%s (%" PRIu64 " data points)\n", - cwd, filename.c_str(), static_cast(power_data_points.size())); + FILE* csv_file = fopen(filename.c_str(), "w"); + if (csv_file) { + fprintf(csv_file, "time_epoch_ns,power_w\n"); + for (const auto& point : power_data_points) { + fprintf(csv_file, "%" PRId64 ",%.6f\n", point.timestamp_ns, + point.power_watts); } + fclose(csv_file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Power CSV data written to %s/%s (%" PRIu64 + " data points)\n", + cwd, filename.c_str(), + static_cast(power_data_points.size())); + } } void DataManager::write_kernel_data(const std::string& filename) const { - if (completed_kernels.empty()) return; + if (completed_kernels.empty()) return; - FILE* regions_file = fopen(filename.c_str(), "w"); - if (regions_file) { - fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); - for (const auto& region : completed_kernels) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", - region.name.c_str(), region_type_to_string(region.type), start_ns, - end_ns, (int64_t)region.duration.count()); - } - fclose(regions_file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Kernel timing CSV written to %s/%s\n", cwd, filename.c_str()); + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, + "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); + for (const auto& region : completed_kernels) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", + region.name.c_str(), region_type_to_string(region.type), start_ns, + end_ns, (int64_t)region.duration.count()); } + fclose(file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Kernel timing CSV written to %s/%s\n", cwd, + filename.c_str()); + } } void DataManager::write_region_data(const std::string& filename) const { - if (completed_regions.empty()) return; + if (completed_regions.empty()) return; - FILE* regions_file = fopen(filename.c_str(), "w"); - if (regions_file) { - fprintf(regions_file, "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); - for (const auto& region : completed_regions) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(regions_file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", - region.name.c_str(), region_type_to_string(region.type), start_ns, - end_ns, (int64_t)region.duration.count()); - } - fclose(regions_file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Region timing CSV written to %s/%s\n", cwd, filename.c_str()); + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, + "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); + for (const auto& region : completed_regions) { + auto start_ns = std::chrono::duration_cast( + region.start_time.time_since_epoch()) + .count(); + auto end_ns = std::chrono::duration_cast( + region.end_time.time_since_epoch()) + .count(); + fprintf(file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", + region.name.c_str(), region_type_to_string(region.type), start_ns, + end_ns, (int64_t)region.duration.count()); } + fclose(file); + char cwd[256]; + getcwd(cwd, 256); + printf("KokkosP NVML Power: Region timing CSV written to %s/%s\n", cwd, + filename.c_str()); + } } -} // namespace NVMLPowerProfiler -} // namespace KokkosTools +} // namespace NVMLPowerProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp index 2f03423c7..b79624336 100644 --- a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp +++ b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp @@ -47,24 +47,24 @@ struct TimingInfo { }; class DataManager { -public: - void add_power_data_point(int64_t timestamp, double power); - void start_region(const std::string& name, RegionType type); - void end_region(); - void write_power_data(const std::string& filename) const; - void write_kernel_data(const std::string& filename) const; - void write_region_data(const std::string& filename) const; + public: + void add_power_data_point(int64_t timestamp, double power); + void start_region(const std::string& name, RegionType type); + void end_region(); + void write_power_data(const std::string& filename) const; + void write_kernel_data(const std::string& filename) const; + void write_region_data(const std::string& filename) const; -private: - const char* region_type_to_string(RegionType type) const; + private: + const char* region_type_to_string(RegionType type) const; - std::deque power_data_points; - std::deque completed_kernels; - std::deque completed_regions; - std::deque active_regions; + std::deque power_data_points; + std::deque completed_kernels; + std::deque completed_regions; + std::deque active_regions; }; -} // namespace NVMLPowerProfiler -} // namespace KokkosTools +} // namespace NVMLPowerProfiler +} // namespace KokkosTools -#endif // KP_POWER_PROFILER_HPP +#endif // KP_POWER_PROFILER_HPP diff --git a/profiling/energy-profiler/nvml/kp_power_nvml.cpp b/profiling/energy-profiler/nvml/kp_power_nvml.cpp index 900839272..0de693ecf 100644 --- a/profiling/energy-profiler/nvml/kp_power_nvml.cpp +++ b/profiling/energy-profiler/nvml/kp_power_nvml.cpp @@ -214,8 +214,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } int interval_ms = 20; - if (const char* interval_env = - std::getenv("KOKKOS_NVML_POWER_INTERVAL")) { + if (const char* interval_env = std::getenv("KOKKOS_NVML_POWER_INTERVAL")) { try { interval_ms = std::stoi(interval_env); if (interval_ms <= 0) { @@ -224,17 +223,18 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } printf("KokkosP NVML Power: Using custom interval: %d ms\n", interval_ms); } catch (const std::exception& e) { - printf("KokkosP NVML Power: Invalid interval value, using default 20ms\n"); + printf( + "KokkosP NVML Power: Invalid interval value, using default 20ms\n"); } } else { printf("KokkosP NVML Power: Using default interval: 20 ms\n"); } g_stop_requested.store(false); - - g_monitoring_thread = std::make_unique( - nvml_power_monitoring_thread_func, - std::chrono::milliseconds(interval_ms)); + + g_monitoring_thread = + std::make_unique(nvml_power_monitoring_thread_func, + std::chrono::milliseconds(interval_ms)); printf("KokkosP NVML Power: Power monitoring started\n"); } @@ -314,9 +314,7 @@ void kokkosp_push_profile_region(char const* regionName) { g_data_manager.start_region(regionName, RegionType::UserRegion); } -void kokkosp_pop_profile_region() { - g_data_manager.end_region(); -} +void kokkosp_pop_profile_region() { g_data_manager.end_region(); } Kokkos::Tools::Experimental::EventSet get_event_set() { Kokkos::Tools::Experimental::EventSet my_event_set; diff --git a/profiling/energy-profiler/nvml/readme.md b/profiling/energy-profiler/nvml/readme.md index fd7511f4b..771d9d2ee 100644 --- a/profiling/energy-profiler/nvml/readme.md +++ b/profiling/energy-profiler/nvml/readme.md @@ -10,14 +10,18 @@ Requires CUDA toolkit with NVML. ## Output Files -- `hostname-pid-nvml-power-raw.csv`: Power measurements - - Format: `timestamp_epoch_ns,device_id,power_watts` +- `hostname-pid-nvml-power.csv`: Power measurements + - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` - `hostname-pid-nvml-regions.csv`: Region timings - - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns` + - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` +- `hostname-pid-nvml-kernels.csv`: Kernel timings + - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` ## Usage ```bash export KOKKOS_PROFILE_LIBRARY=/path/to/kp_power_nvml.so ./your_kokkos_application -``` \ No newline at end of file +``` + +Interval of sampling power data can be controlled via the `KOKKOS_NVML_POWER_INTERVAL` environment variable, which specifies the interval in microseconds. \ No newline at end of file From c55fb56f2a64f3d9b7450d0e26079df059d173e7 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Mon, 21 Jul 2025 11:07:46 -0400 Subject: [PATCH 07/11] energy-profiler: fix filename generation for output files in finalize_library --- .../energy-profiler/nvml-power/kp_energy_nvml.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp index b0a4fdbb7..4ef00965a 100644 --- a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp +++ b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp @@ -53,10 +53,12 @@ void kokkosp_finalize_library() { if (g_data_manager) { // Write output files - g_data_manager->write_kernel_data("%s-%d-nvml-power-kernels.csv", hostname, - pid); - g_data_manager->write_region_data("%s-%d-nvml-power-regions.csv", hostname, - pid); + auto kernel_filename = std::string(hostname) + "-" + std::to_string(pid) + + "-nvml-energy-kernels.csv"; + auto region_filename = std::string(hostname) + "-" + std::to_string(pid) + + "-nvml-energy-regions.csv"; + g_data_manager->write_kernel_data(kernel_filename); + g_data_manager->write_region_data(region_filename); delete g_data_manager; g_data_manager = nullptr; From 032538b15f4d2a8db783f396fda1a68e287c8be1 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Mon, 21 Jul 2025 11:22:29 -0400 Subject: [PATCH 08/11] energy-profiler: fix kp_energy and improve cmake --- CMakeLists.txt | 3 ++- profiling/energy-profiler/CMakeLists.txt | 6 +++--- .../energy-profiler/nvml-power/CMakeLists.txt | 4 ++-- .../nvml-power/kp_energy_nvml.cpp | 20 ++++++++++++++++++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48fef3053..4ab545e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,9 +172,10 @@ endif() if(KOKKOSTOOLS_HAS_VARIORUM) add_subdirectory(profiling/variorum-connector) - add_subdirectory(profiling/energy-profiler) endif() +add_subdirectory(profiling/energy-profiler) + # GPU profilers if(Kokkos_ENABLE_CUDA) add_subdirectory(profiling/nvtx-connector) diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt index 56891d014..be6647373 100644 --- a/profiling/energy-profiler/CMakeLists.txt +++ b/profiling/energy-profiler/CMakeLists.txt @@ -1,5 +1,5 @@ -cmake_minimum_required(VERSION 3.16) - -add_subdirectory(variorum) +if(KOKKOSTOOLS_HAS_VARIORUM) + add_subdirectory(variorum) +endif() add_subdirectory(nvml) add_subdirectory(nvml-power) diff --git a/profiling/energy-profiler/nvml-power/CMakeLists.txt b/profiling/energy-profiler/nvml-power/CMakeLists.txt index 60a679d05..ea08e4a79 100644 --- a/profiling/energy-profiler/nvml-power/CMakeLists.txt +++ b/profiling/energy-profiler/nvml-power/CMakeLists.txt @@ -2,9 +2,9 @@ find_package(CUDAToolkit QUIET) if (CUDAToolkit_FOUND) find_package(CUDA::nvml QUIET) - message(STATUS "Found CUDA NVML, making NVML power profiler available.") + message(STATUS "Found CUDA NVML, making NVML energy profiler available.") else() - message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") + message(STATUS "CUDAToolkit not found, skipping NVML energy profiler.") return() endif() diff --git a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp index 4ef00965a..7b9457e0b 100644 --- a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp +++ b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp @@ -14,9 +14,27 @@ // //@HEADER -#include "kp_nvml_energy_profiler.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include +#include + +#include + +#include "kp_core.hpp" +#include "kp_nvml_energy_profiler.hpp" using namespace KokkosTools::NVMLEnergyProfiler; From 1c31665e5f24207c6ed71276e928a397715497e6 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Wed, 13 Aug 2025 14:23:07 -0400 Subject: [PATCH 09/11] energy-profiler: refactor energy tool --- profiling/energy-profiler/CMakeLists.txt | 7 +- profiling/energy-profiler/common/daemon.cpp | 43 ++ profiling/energy-profiler/common/daemon.hpp | 23 + .../common/filename_prefix.cpp | 8 + .../common/filename_prefix.hpp | 6 + profiling/energy-profiler/common/timer.cpp | 244 ++++++++ profiling/energy-profiler/common/timer.hpp | 69 +++ .../energy-profiler/common/tool_interface.hpp | 31 + .../energy-profiler/kokkos/CMakeLists.txt | 76 +++ .../kokkos/kp_energy_kernel_timer.cpp | 200 +++++++ .../kokkos/kp_nvml_direct_power.cpp | 356 ++++++++++++ .../kokkos/kp_nvml_energy_consumption.cpp | 533 ++++++++++++++++++ .../energy-profiler/kokkos/kp_nvml_power.cpp | 288 ++++++++++ .../kokkos/kp_variorum_power.cpp | 299 ++++++++++ .../energy-profiler/nvml-power/CMakeLists.txt | 17 - .../nvml-power/kp_energy_nvml.cpp | 140 ----- .../nvml-power/kp_nvml_energy_profiler.cpp | 206 ------- .../nvml-power/kp_nvml_energy_profiler.hpp | 72 --- .../energy-profiler/nvml-power/readme.md | 24 - profiling/energy-profiler/nvml/CMakeLists.txt | 17 - .../nvml/kp_nvml_power_profiler.cpp | 134 ----- .../nvml/kp_nvml_power_profiler.hpp | 70 --- .../energy-profiler/nvml/kp_power_nvml.cpp | 353 ------------ profiling/energy-profiler/nvml/readme.md | 27 - .../provider/provider_nvml.cpp | 219 +++++++ .../provider/provider_nvml.hpp | 49 ++ .../provider/provider_variorum.cpp | 230 ++++++++ .../provider/provider_variorum.hpp | 56 ++ profiling/energy-profiler/readme.md | 17 - .../energy-profiler/tests/CMakeLists.txt | 166 ++++++ .../energy-profiler/tests/csv_export_test.cpp | 68 +++ .../tests/daemon_nvml_fast_test.cpp | 226 ++++++++ .../tests/daemon_nvml_integration_test.cpp | 123 ++++ .../energy-profiler/tests/daemon_test.cpp | 221 ++++++++ .../tests/daemon_variorum_fast_test.cpp | 227 ++++++++ .../daemon_variorum_integration_test.cpp | 128 +++++ .../tests/nvml_provider_test.cpp | 100 ++++ .../energy-profiler/tests/timer_test.cpp | 343 +++++++++++ .../tests/variorum_provider_test.cpp | 77 +++ .../tools/kernel_timer_tool.cpp | 93 +++ .../tools/kernel_timer_tool.hpp | 56 ++ .../energy-profiler/variorum/CMakeLists.txt | 13 - profiling/energy-profiler/variorum/Makefile | 27 - .../variorum/kp_power_variorum.cpp | 175 ------ profiling/energy-profiler/variorum/readme.md | 38 -- .../variorum/variorum_power_profiler.cpp | 369 ------------ .../variorum/variorum_power_profiler.hpp | 133 ----- 47 files changed, 4560 insertions(+), 1837 deletions(-) create mode 100644 profiling/energy-profiler/common/daemon.cpp create mode 100644 profiling/energy-profiler/common/daemon.hpp create mode 100644 profiling/energy-profiler/common/filename_prefix.cpp create mode 100644 profiling/energy-profiler/common/filename_prefix.hpp create mode 100644 profiling/energy-profiler/common/timer.cpp create mode 100644 profiling/energy-profiler/common/timer.hpp create mode 100644 profiling/energy-profiler/common/tool_interface.hpp create mode 100644 profiling/energy-profiler/kokkos/CMakeLists.txt create mode 100644 profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp create mode 100644 profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp create mode 100644 profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp create mode 100644 profiling/energy-profiler/kokkos/kp_nvml_power.cpp create mode 100644 profiling/energy-profiler/kokkos/kp_variorum_power.cpp delete mode 100644 profiling/energy-profiler/nvml-power/CMakeLists.txt delete mode 100644 profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp delete mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp delete mode 100644 profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp delete mode 100644 profiling/energy-profiler/nvml-power/readme.md delete mode 100644 profiling/energy-profiler/nvml/CMakeLists.txt delete mode 100644 profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp delete mode 100644 profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp delete mode 100644 profiling/energy-profiler/nvml/kp_power_nvml.cpp delete mode 100644 profiling/energy-profiler/nvml/readme.md create mode 100644 profiling/energy-profiler/provider/provider_nvml.cpp create mode 100644 profiling/energy-profiler/provider/provider_nvml.hpp create mode 100644 profiling/energy-profiler/provider/provider_variorum.cpp create mode 100644 profiling/energy-profiler/provider/provider_variorum.hpp delete mode 100644 profiling/energy-profiler/readme.md create mode 100644 profiling/energy-profiler/tests/CMakeLists.txt create mode 100644 profiling/energy-profiler/tests/csv_export_test.cpp create mode 100644 profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp create mode 100644 profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp create mode 100644 profiling/energy-profiler/tests/daemon_test.cpp create mode 100644 profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp create mode 100644 profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp create mode 100644 profiling/energy-profiler/tests/nvml_provider_test.cpp create mode 100644 profiling/energy-profiler/tests/timer_test.cpp create mode 100644 profiling/energy-profiler/tests/variorum_provider_test.cpp create mode 100644 profiling/energy-profiler/tools/kernel_timer_tool.cpp create mode 100644 profiling/energy-profiler/tools/kernel_timer_tool.hpp delete mode 100644 profiling/energy-profiler/variorum/CMakeLists.txt delete mode 100644 profiling/energy-profiler/variorum/Makefile delete mode 100644 profiling/energy-profiler/variorum/kp_power_variorum.cpp delete mode 100644 profiling/energy-profiler/variorum/readme.md delete mode 100644 profiling/energy-profiler/variorum/variorum_power_profiler.cpp delete mode 100644 profiling/energy-profiler/variorum/variorum_power_profiler.hpp diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt index be6647373..535cd8e9a 100644 --- a/profiling/energy-profiler/CMakeLists.txt +++ b/profiling/energy-profiler/CMakeLists.txt @@ -1,5 +1,2 @@ -if(KOKKOSTOOLS_HAS_VARIORUM) - add_subdirectory(variorum) -endif() -add_subdirectory(nvml) -add_subdirectory(nvml-power) +add_subdirectory(tests) +add_subdirectory(kokkos) \ No newline at end of file diff --git a/profiling/energy-profiler/common/daemon.cpp b/profiling/energy-profiler/common/daemon.cpp new file mode 100644 index 000000000..b4487eb75 --- /dev/null +++ b/profiling/energy-profiler/common/daemon.cpp @@ -0,0 +1,43 @@ +#include "daemon.hpp" +#include +#include + +void Daemon::start() { + if (!running_) { + running_ = true; + thread_ = std::thread(&Daemon::tick, this); + } else { + throw std::runtime_error("Daemon already started"); + } +} + +void Daemon::tick() { + while (running_) { + std::chrono::high_resolution_clock::time_point start_time = + std::chrono::high_resolution_clock::now(); + + // Execute the function + func_(); + + std::chrono::high_resolution_clock::time_point end_time = + std::chrono::high_resolution_clock::now(); + std::chrono::milliseconds execution_duration = + std::chrono::duration_cast(end_time - + start_time); + + // Calculate how long to sleep to maintain the interval + if (execution_duration < interval_) { + std::chrono::milliseconds sleep_duration = interval_ - execution_duration; + std::this_thread::sleep_for(sleep_duration); + } + } +} + +void Daemon::stop() { + if (running_) { + running_ = false; + thread_.join(); + } else { + throw std::runtime_error("Daemon not started"); + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/common/daemon.hpp b/profiling/energy-profiler/common/daemon.hpp new file mode 100644 index 000000000..c3384b6ad --- /dev/null +++ b/profiling/energy-profiler/common/daemon.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +class Daemon { + public: + Daemon(std::function func, int interval_ms) + : func_(func), interval_(interval_ms) {}; + + void start(); + void tick(); + void stop(); + bool is_running() const { return running_; } + std::thread& get_thread() { return thread_; } + + private: + std::chrono::milliseconds interval_; + bool running_{false}; + std::function func_; + std::thread thread_; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/common/filename_prefix.cpp b/profiling/energy-profiler/common/filename_prefix.cpp new file mode 100644 index 000000000..294acb82c --- /dev/null +++ b/profiling/energy-profiler/common/filename_prefix.cpp @@ -0,0 +1,8 @@ +#include "filename_prefix.hpp" + +std::string generate_prefix() { + char hostname[256]; + gethostname(hostname, 256); + int pid = (int)getpid(); + return std::string(hostname) + "-" + std::to_string(pid); +} \ No newline at end of file diff --git a/profiling/energy-profiler/common/filename_prefix.hpp b/profiling/energy-profiler/common/filename_prefix.hpp new file mode 100644 index 000000000..93b02371e --- /dev/null +++ b/profiling/energy-profiler/common/filename_prefix.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +std::string generate_prefix(); \ No newline at end of file diff --git a/profiling/energy-profiler/common/timer.cpp b/profiling/energy-profiler/common/timer.cpp new file mode 100644 index 000000000..0b9401440 --- /dev/null +++ b/profiling/energy-profiler/common/timer.cpp @@ -0,0 +1,244 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "timer.hpp" +#include +#include +#include + +// EnergyTiming implementations +EnergyTiming::EnergyTiming() + : timing_id_(0), name_(""), region_type_(RegionType::Unknown) { + start_time_ = std::chrono::high_resolution_clock::now(); +} + +EnergyTiming::EnergyTiming(uint64_t timing_id, RegionType type, + std::string name) + : timing_id_(timing_id), name_(name), region_type_(type) { + start_time_ = std::chrono::high_resolution_clock::now(); +} + +void EnergyTiming::end() { + end_time_ = std::chrono::high_resolution_clock::now(); +} + +bool EnergyTiming::is_ended() const { + return end_time_ != + std::chrono::time_point{}; +} + +uint64_t EnergyTiming::get_duration_ms() const { + auto duration = std::chrono::duration_cast( + end_time_ - start_time_); + return static_cast(duration.count()); +} + +// EnergyTimer implementations +void EnergyTimer::start_timing(uint64_t timing_id, RegionType type, + std::string name) { + timings_.emplace(timing_id, EnergyTiming(timing_id, type, name)); +} + +void EnergyTimer::end_timing(uint64_t timing_id) { + auto it = timings_.find(timing_id); + if (it != timings_.end()) { + it->second.end(); + } +} + +std::unordered_map& EnergyTimer::get_timings() { + return timings_; +} + +namespace KokkosTools { +namespace Timer { + +void export_kernels_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, + "name,type,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + std::string type; + switch (timing.type) { + case RegionType::ParallelFor: type = "parallel_for"; break; + case RegionType::ParallelScan: type = "parallel_scan"; break; + case RegionType::ParallelReduce: type = "parallel_reduce"; break; + default: type = "unknown"; + } + + fprintf(file, "%s,%s,%ld,%ld,%ld\n", timing.name.c_str(), type.c_str(), + start_ms, end_ms, duration_ms); + } + fclose(file); + std::cout << "Timing data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void export_regions_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, "name,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + fprintf(file, "%s,%ld,%ld,%ld\n", timing.name.c_str(), start_ms, end_ms, + duration_ms); + } + fclose(file); + std::cout << "Region data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void export_deepcopies_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, "name,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + fprintf(file, "%s,%ld,%ld,%ld\n", timing.name.c_str(), start_ms, end_ms, + duration_ms); + } + fclose(file); + std::cout << "Deep copy data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void print_kernels_summary(const std::deque& kernels) { + std::cout << "\n==== KERNELS ====\n"; + std::cout << "| Name | Type | " + "Start(ms) | End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|----------------|------" + "-------------|-------------------|---------------|\n"; + for (const auto& info : kernels) { + std::string type; + switch (info.type) { + case RegionType::ParallelFor: type = "parallel_for"; break; + case RegionType::ParallelScan: type = "parallel_scan"; break; + case RegionType::ParallelReduce: type = "parallel_reduce"; break; + default: type = "unknown"; + } + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout + << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') << "| " + << type << std::string(16 - type.size(), ' ') << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - std::to_string(info.duration.count() / 1000000).size(), ' ') + << "|\n"; + } +} + +void print_regions_summary(const std::deque& regions) { + std::cout << "\n==== REGIONS ====\n"; + std::cout << "| Name | Start(ms) | " + "End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|-------------------|---" + "----------------|---------------|\n"; + for (const auto& info : regions) { + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') + << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - + std::to_string(info.duration.count() / 1000000).size(), + ' ') + << "|\n"; + } +} + +void print_deepcopies_summary(const std::deque& deepcopies) { + std::cout << "\n==== DEEP COPIES ====\n"; + std::cout << "| Name | Start(ms) | " + "End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|-------------------|---" + "----------------|---------------|\n"; + for (const auto& info : deepcopies) { + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') + << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - + std::to_string(info.duration.count() / 1000000).size(), + ' ') + << "|\n"; + } +} + +} // namespace Timer +} // namespace KokkosTools diff --git a/profiling/energy-profiler/common/timer.hpp b/profiling/energy-profiler/common/timer.hpp new file mode 100644 index 000000000..bb4ff8ac3 --- /dev/null +++ b/profiling/energy-profiler/common/timer.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include + +enum class RegionType { + Unknown, + ParallelFor, + ParallelReduce, + ParallelScan, + DeepCopy, + UserRegion +}; + +struct TimingInfo { + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + std::chrono::nanoseconds duration; + uint64_t id = 0; +}; + +struct EnergyTiming { + // Default constructor + EnergyTiming(); + + EnergyTiming(uint64_t timing_id, RegionType type, std::string name); + + void end(); + + bool is_ended() const; + + uint64_t get_duration_ms() const; + + uint64_t timing_id_; + std::string name_; + RegionType region_type_; + std::chrono::time_point start_time_; + std::chrono::time_point end_time_; +}; + +struct EnergyTimer { + public: + void start_timing(uint64_t timing_id, RegionType type, std::string name); + void end_timing(uint64_t timing_id); + std::unordered_map& get_timings(); + + private: + std::unordered_map timings_; +}; + +// CSV Export functions for TimingInfo +namespace KokkosTools { +namespace Timer { +void export_kernels_csv(const std::deque& timings, + const std::string& filename); +void export_regions_csv(const std::deque& timings, + const std::string& filename); +void export_deepcopies_csv(const std::deque& timings, + const std::string& filename); +void print_kernels_summary(const std::deque& kernels); +void print_regions_summary(const std::deque& regions); +void print_deepcopies_summary(const std::deque& deepcopies); +} // namespace Timer +} // namespace KokkosTools \ No newline at end of file diff --git a/profiling/energy-profiler/common/tool_interface.hpp b/profiling/energy-profiler/common/tool_interface.hpp new file mode 100644 index 000000000..2aef08b5f --- /dev/null +++ b/profiling/energy-profiler/common/tool_interface.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include "kp_core.hpp" + +class ToolInterface { + public: + ToolInterface() = default; + virtual ~ToolInterface() = default; + virtual void init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) = 0; + virtual void finalize_library() = 0; + virtual void begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) = 0; + virtual void end_parallel_for(uint64_t kID) = 0; + virtual void begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) = 0; + virtual void end_parallel_scan(uint64_t kID) = 0; + virtual void begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) = 0; + virtual void end_parallel_reduce(uint64_t kID) = 0; + virtual void begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) = 0; + virtual void end_deep_copy() = 0; + virtual void push_profile_region(const char* region_name) = 0; + virtual void pop_profile_region() = 0; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/kokkos/CMakeLists.txt b/profiling/energy-profiler/kokkos/CMakeLists.txt new file mode 100644 index 000000000..ec0d18bf8 --- /dev/null +++ b/profiling/energy-profiler/kokkos/CMakeLists.txt @@ -0,0 +1,76 @@ +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) + +kp_add_library(kp_energy_kernel_timer kp_energy_kernel_timer.cpp + ../common/timer.cpp + ../common/filename_prefix.cpp + ../tools/kernel_timer_tool.cpp +) + +target_include_directories(kp_energy_kernel_timer PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +if(KOKKOSTOOLS_HAS_VARIORUM) + kp_add_library(kp_variorum_power kp_variorum_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_variorum.cpp + ../tools/kernel_timer_tool.cpp +) + +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) +target_link_libraries(kp_variorum_power PRIVATE variorum::variorum Threads::Threads) +endif() + +find_package(CUDAToolkit QUIET) + +if (CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + message(STATUS "Found CUDA NVML, making NVML power profiler available.") +else() + message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") + return() +endif() + +kp_add_library(kp_nvml_power kp_nvml_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_power PRIVATE CUDA::nvml Threads::Threads) + +target_include_directories(kp_nvml_power PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# NVML Direct Power Tool +kp_add_library(kp_nvml_direct_power kp_nvml_direct_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_direct_power PRIVATE CUDA::nvml Threads::Threads) + +target_include_directories(kp_nvml_direct_power PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# NVML Energy Consumption Tool (no daemon needed) +kp_add_library(kp_nvml_energy_consumption kp_nvml_energy_consumption.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_energy_consumption PRIVATE CUDA::nvml) + +target_include_directories(kp_nvml_energy_consumption PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) \ No newline at end of file diff --git a/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp b/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp new file mode 100644 index 000000000..8be118d20 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp @@ -0,0 +1,200 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * Kokkos Power Profiler - Specialized for Variorum + * Simplified version focused on Variorum energy monitoring with integrated + * timing + */ + +#include +#include + +#include "kp_core.hpp" +#include "../tools/kernel_timer_tool.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" + +namespace KokkosTools { +namespace KernelTimer { + +// --- Core Initialization --- +KernelTimerTool timer; + +bool VERBOSE = false; +std::string KOKKOS_PROFILE_LIBRARY_NAME = + "Kokkos Kernel Timer for Energy Profiler"; + +// --- Library Initialization/Finalization --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); +} + +void kokkosp_finalize_library() { + std::cout << "Kokkos Power Profiler: Finalizing library\n"; + timer.finalize_library(); + std::cout << "Kokkos Power Profiler: Library finalized\n"; + + std::string prefix = generate_prefix(); + + const auto& kernels = timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + // Récapitulatif des régions + const auto& regions = timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + // Récapitulatif des deep copies + const auto& deepcopies = timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); +} + +// --- Kernels Launch/End --- + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_for(name, devID, *kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_for '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + timer.end_parallel_for(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_for with ID " << kID + << "\n"; + } +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_scan(name, devID, kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_scan '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + timer.end_parallel_scan(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_scan with ID " << kID + << "\n"; + } +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_reduce(name, devID, kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_reduce '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + timer.end_parallel_reduce(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_reduce with ID " << kID + << "\n"; + } +} + +void kokkosp_push_profile_region(char const* regionName) { + timer.push_profile_region(regionName); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Pushed profile region '" << regionName + << "'\n"; + } +} + +void kokkosp_pop_profile_region() { + timer.pop_profile_region(); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Popped profile region\n"; + } +} + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started deep copy from '" << src_name + << "' to '" << dst_name << "' of size " << size << " bytes\n"; + } +} + +void kokkosp_end_deep_copy() { + timer.end_deep_copy(); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended deep copy\n"; + } +} + +// --- Event Set Configuration --- + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace KernelTimer +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::KernelTimer; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) +} diff --git a/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp b/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp new file mode 100644 index 000000000..a2160a3f3 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp @@ -0,0 +1,356 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Direct Power Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_direct_power.cpp + * @brief Kokkos Direct Power Profiler Tool using NVML. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the NVML library's direct power measurement API. It starts + * monitoring when the Kokkos library is initialized and prints a detailed power + * profile upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace DirectPower { + +// --- Configuration --- +// The interval in milliseconds for power sampling. +constexpr int SAMPLING_INTERVAL_MS = 20; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +// Structure to store a single power measurement with a timestamp per device. +struct DirectPowerSample { + std::chrono::high_resolution_clock::time_point timestamp; + std::vector device_powers_watts; // Power for each device +}; + +// Thread-safe storage for collected power samples. +static std::vector g_power_samples; +static std::mutex g_samples_mutex; +static std::chrono::high_resolution_clock::time_point g_start_time; +static size_t g_device_count = 0; + +/** + * @brief The function executed by the daemon thread to sample power. + * + * This function is called periodically. It fetches the current direct power + * usage from each GPU device using the NVML provider and stores it with a + * timestamp. + */ +void power_monitoring_tick() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + DirectPowerSample sample; + sample.timestamp = std::chrono::high_resolution_clock::now(); + sample.device_powers_watts.reserve(g_device_count); + + // Collect power for each device + for (size_t i = 0; i < g_device_count; ++i) { + double power = g_nvml_provider->get_device_power_usage_direct(i); + sample.device_powers_watts.push_back(power); + } + + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(std::move(sample)); +} + +/** + * @brief Calculates statistics from the collected power samples. + * + * @param samples A constant reference to the vector of power samples. + * @param device_index The device index to analyze. + * @param[out] avg_power Average power consumption. + * @param[out] min_power Minimum power consumption. + * @param[out] max_power Maximum power consumption. + * @param[out] total_energy Total energy consumed in Joules. + */ +void analyze_device_power_data(const std::vector& samples, + size_t device_index, double& avg_power, + double& min_power, double& max_power, + double& total_energy) { + if (samples.empty() || device_index >= g_device_count) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + // Find first valid sample for this device + size_t first_valid = 0; + while (first_valid < samples.size() && + (device_index >= samples[first_valid].device_powers_watts.size() || + samples[first_valid].device_powers_watts[device_index] < 0)) { + first_valid++; + } + + if (first_valid >= samples.size()) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + min_power = samples[first_valid].device_powers_watts[device_index]; + max_power = samples[first_valid].device_powers_watts[device_index]; + double power_sum = 0.0; + size_t valid_samples = 0; + total_energy = 0.0; + + for (size_t i = first_valid; i < samples.size(); ++i) { + if (device_index >= samples[i].device_powers_watts.size()) continue; + + const double power = samples[i].device_powers_watts[device_index]; + if (power < 0) continue; // Skip invalid measurements + + power_sum += power; + valid_samples++; + if (power < min_power) min_power = power; + if (power > max_power) max_power = power; + + // Energy = Power * Time. Time delta is from the previous sample. + if (i > first_valid) { + double time_delta_s = std::chrono::duration( + samples[i].timestamp - samples[i - 1].timestamp) + .count(); + // Use previous sample's power for energy calculation + if (device_index < samples[i - 1].device_powers_watts.size() && + samples[i - 1].device_powers_watts[device_index] >= 0) { + total_energy += + samples[i - 1].device_powers_watts[device_index] * time_delta_s; + } + } + } + + avg_power = valid_samples > 0 ? power_sum / valid_samples : 0.0; +} + +void export_direct_power_data_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + + // Write header + file << "timestamp"; + for (size_t i = 0; i < g_device_count; ++i) { + file << ",device_" << i << "_power_watts"; + } + file << "\n"; + + // Write data + for (const auto& sample : g_power_samples) { + auto timestamp = std::chrono::duration_cast( + sample.timestamp.time_since_epoch()) + .count(); + file << timestamp; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < sample.device_powers_watts.size()) { + file << "," << sample.device_powers_watts[i]; + } else { + file << ",-1"; // Invalid measurement + } + } + file << "\n"; + } + file.close(); + std::cout << "Direct power data exported to " << filename << std::endl; +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Direct Power Profiler: Initializing...\n"; + std::cout << "Sampling Interval: " << SAMPLING_INTERVAL_MS << " ms\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Direct power " + "profiling disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + g_device_count = g_nvml_provider->get_device_count(); + std::cout << "SUCCESS: NVML provider initialized with " << g_device_count + << " device(s).\n"; + + // Print device information + for (size_t i = 0; i < g_device_count; ++i) { + std::cout << " Device " << i << ": " << g_nvml_provider->get_device_name(i) + << std::endl; + } + + // Start the monitoring daemon + g_power_daemon = + std::make_unique(power_monitoring_tick, SAMPLING_INTERVAL_MS); + g_start_time = std::chrono::high_resolution_clock::now(); + g_power_daemon->start(); + std::cout << "SUCCESS: Direct power monitoring daemon started.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Direct Power Profiler: Finalizing...\n"; + + if (g_power_daemon) { + g_power_daemon->stop(); + std::cout << "SUCCESS: Direct power monitoring daemon stopped.\n"; + } + + // Finalize the timer + g_timer.finalize_library(); + + // Make a copy of the samples to avoid holding the lock during analysis + std::vector samples_copy; + { + std::lock_guard lock(g_samples_mutex); + samples_copy = g_power_samples; + } + + if (samples_copy.empty()) { + std::cout << "No direct power samples collected.\n"; + } else { + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + std::cout << "\n==== Direct Power Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(2); + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + std::cout << "Samples Collected: " << samples_copy.size() << "\n"; + std::cout << "Number of Devices: " << g_device_count << "\n"; + std::cout << "---------------------------------------\n"; + + // Analyze each device separately + for (size_t dev = 0; dev < g_device_count; ++dev) { + double avg_power, min_power, max_power, total_energy; + analyze_device_power_data(samples_copy, dev, avg_power, min_power, + max_power, total_energy); + + std::cout << "Device " << dev << " (" + << g_nvml_provider->get_device_name(dev) << "):\n"; + std::cout << " Average Power: " << avg_power << " W\n"; + std::cout << " Minimum Power: " << min_power << " W\n"; + std::cout << " Maximum Power: " << max_power << " W\n"; + std::cout << " Total Energy Consumed: " << total_energy << " J\n"; + std::cout << "---------------------------------------\n"; + } + + std::string csv_filename = + generate_prefix() + "_nvml_direct_power_samples.csv"; + std::cout << "Exporting direct power data to " << csv_filename << "...\n"; + export_direct_power_data_csv(csv_filename); + } + + std::string prefix = generate_prefix(); + + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); +} +void kokkosp_end_parallel_for(uint64_t kID) { g_timer.end_parallel_for(kID); } +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); +} +void kokkosp_end_parallel_scan(uint64_t kID) { g_timer.end_parallel_scan(kID); } +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); +} +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); +} +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); +} +void kokkosp_pop_profile_region() { g_timer.pop_profile_region(); } +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} +void kokkosp_end_deep_copy() { g_timer.end_deep_copy(); } + +} // namespace DirectPower +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::DirectPower; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp b/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp new file mode 100644 index 000000000..94f8bcf0e --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp @@ -0,0 +1,533 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Energy Consumption Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_energy_consumption.cpp + * @brief Kokkos Energy Consumption Profiler Tool using NVML. + * + * This tool measures energy consumption by tracking the cumulative energy + * values from NVML at the beginning and end of kernels, regions, and deep + * copies. It does not use a background daemon since the energy consumption is a + * cumulative counter that can be read directly when events occur. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace EnergyConsumption { + +// --- Global State for the Profiler --- +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +static size_t g_device_count = 0; +static std::chrono::high_resolution_clock::time_point g_start_time; + +// Energy tracking structures +struct EnergySnapshot { + std::chrono::high_resolution_clock::time_point timestamp; + std::vector + device_energies_joules; // Energy for each device in Joules +}; + +struct KernelEnergyRecord { + std::string name; + uint32_t devID; + uint64_t kID; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +struct RegionEnergyRecord { + std::string name; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +struct DeepCopyEnergyRecord { + std::string dst_name; + std::string src_name; + uint64_t size; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +// Storage for energy records +static std::vector g_kernel_energy_records; +static std::vector g_region_energy_records; +static std::vector g_deep_copy_energy_records; +static std::mutex g_energy_mutex; + +// Stack for nested regions +static std::stack> g_region_stack; + +// Maps for tracking active kernels/deep copies +static std::unordered_map g_active_kernels; +static std::pair g_active_deep_copy = {false, {}}; + +/** + * @brief Captures a snapshot of current energy consumption for all devices. + */ +EnergySnapshot capture_energy_snapshot() { + EnergySnapshot snapshot; + snapshot.timestamp = std::chrono::high_resolution_clock::now(); + snapshot.device_energies_joules.reserve(g_device_count); + + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + // Fill with invalid values + for (size_t i = 0; i < g_device_count; ++i) { + snapshot.device_energies_joules.push_back(-1.0); + } + return snapshot; + } + + // Collect energy for each device + for (size_t i = 0; i < g_device_count; ++i) { + double energy = g_nvml_provider->get_current_energy_consumption(i); + snapshot.device_energies_joules.push_back(energy); + } + + return snapshot; +} + +/** + * @brief Calculates energy consumed between two snapshots. + */ +std::vector calculate_energy_delta(const EnergySnapshot& start, + const EnergySnapshot& end) { + std::vector delta(g_device_count, 0.0); + + for (size_t i = 0; i < g_device_count; ++i) { + if (i < start.device_energies_joules.size() && + i < end.device_energies_joules.size() && + start.device_energies_joules[i] >= 0 && + end.device_energies_joules[i] >= 0) { + delta[i] = + end.device_energies_joules[i] - start.device_energies_joules[i]; + // Handle potential counter reset (though rare) + if (delta[i] < 0) { + delta[i] = 0; // Reset occurred, use 0 as approximation + } + } else { + delta[i] = -1.0; // Invalid measurement + } + } + + return delta; +} + +/** + * @brief Calculates duration in seconds between two snapshots. + */ +double calculate_duration_seconds(const EnergySnapshot& start, + const EnergySnapshot& end) { + return std::chrono::duration(end.timestamp - start.timestamp).count(); +} + +void export_energy_consumption_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + + // Write kernels + file << "type,name,duration_seconds"; + for (size_t i = 0; i < g_device_count; ++i) { + file << ",device_" << i << "_energy_joules"; + } + file << "\n"; + + for (const auto& record : g_kernel_energy_records) { + file << "kernel," << record.name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + for (const auto& record : g_region_energy_records) { + file << "region," << record.name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + for (const auto& record : g_deep_copy_energy_records) { + std::string name = record.src_name + "_to_" + record.dst_name + "_size_" + + std::to_string(record.size); + file << "deepcopy," << name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + file.close(); + std::cout << "Energy consumption data exported to " << filename << std::endl; +} + +void print_energy_summary() { + std::cout << "\n==== Energy Consumption Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(4); + + // Calculate total energy per device + std::vector total_kernel_energy(g_device_count, 0.0); + std::vector total_region_energy(g_device_count, 0.0); + std::vector total_deepcopy_energy(g_device_count, 0.0); + + for (const auto& record : g_kernel_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_kernel_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + for (const auto& record : g_region_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_region_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + for (const auto& record : g_deep_copy_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_deepcopy_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + std::cout << "Number of Kernels: " << g_kernel_energy_records.size() + << "\n"; + std::cout << "Number of Regions: " << g_region_energy_records.size() + << "\n"; + std::cout << "Number of Deep Copies: " + << g_deep_copy_energy_records.size() << "\n"; + std::cout << "Number of Devices: " << g_device_count << "\n"; + std::cout << "--------------------------------------------\n"; + + for (size_t dev = 0; dev < g_device_count; ++dev) { + std::cout << "Device " << dev << " (" + << g_nvml_provider->get_device_name(dev) << "):\n"; + std::cout << " Total Kernel Energy: " << total_kernel_energy[dev] + << " J\n"; + std::cout << " Total Region Energy: " << total_region_energy[dev] + << " J\n"; + std::cout << " Total Deep Copy Energy: " << total_deepcopy_energy[dev] + << " J\n"; + std::cout << " Total Energy: " + << (total_kernel_energy[dev] + total_region_energy[dev] + + total_deepcopy_energy[dev]) + << " J\n"; + std::cout << "--------------------------------------------\n"; + } +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Energy Consumption Profiler: Initializing...\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Energy " + "consumption profiling disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + g_device_count = g_nvml_provider->get_device_count(); + std::cout << "SUCCESS: NVML provider initialized with " << g_device_count + << " device(s).\n"; + + // Print device information + for (size_t i = 0; i < g_device_count; ++i) { + std::cout << " Device " << i << ": " << g_nvml_provider->get_device_name(i) + << std::endl; + } + + g_start_time = std::chrono::high_resolution_clock::now(); + std::cout << "SUCCESS: Energy consumption monitoring initialized.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Energy Consumption Profiler: Finalizing...\n"; + + // Finalize the timer + g_timer.finalize_library(); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + + print_energy_summary(); + + std::string prefix = generate_prefix(); + + // Export energy data + std::string csv_filename = prefix + "_nvml_energy_consumption.csv"; + std::cout << "Exporting energy consumption data to " << csv_filename + << "...\n"; + export_energy_consumption_csv(csv_filename); + + // Export timing data + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer and Energy Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); + + // Capture energy snapshot + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_for(uint64_t kID) { + g_timer.end_parallel_for(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); + + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_scan(uint64_t kID) { + g_timer.end_parallel_scan(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); + + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); + + EnergySnapshot snapshot = capture_energy_snapshot(); + g_region_stack.push({std::string(regionName), snapshot}); +} + +void kokkosp_pop_profile_region() { + g_timer.pop_profile_region(); + + if (!g_region_stack.empty()) { + auto [name, start_energy] = g_region_stack.top(); + g_region_stack.pop(); + + RegionEnergyRecord record; + record.name = name; + record.start_energy = start_energy; + record.end_energy = capture_energy_snapshot(); + record.duration_seconds = + calculate_duration_seconds(record.start_energy, record.end_energy); + record.energy_consumed_joules = + calculate_energy_delta(record.start_energy, record.end_energy); + + std::lock_guard lock(g_energy_mutex); + g_region_energy_records.push_back(record); + } +} + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); + + std::lock_guard lock(g_energy_mutex); + if (!g_active_deep_copy.first) { + g_active_deep_copy.second.dst_name = dst_name ? dst_name : "unknown"; + g_active_deep_copy.second.src_name = src_name ? src_name : "unknown"; + g_active_deep_copy.second.size = size; + g_active_deep_copy.second.start_energy = capture_energy_snapshot(); + g_active_deep_copy.first = true; + } +} + +void kokkosp_end_deep_copy() { + g_timer.end_deep_copy(); + + std::lock_guard lock(g_energy_mutex); + if (g_active_deep_copy.first) { + g_active_deep_copy.second.end_energy = capture_energy_snapshot(); + g_active_deep_copy.second.duration_seconds = + calculate_duration_seconds(g_active_deep_copy.second.start_energy, + g_active_deep_copy.second.end_energy); + g_active_deep_copy.second.energy_consumed_joules = + calculate_energy_delta(g_active_deep_copy.second.start_energy, + g_active_deep_copy.second.end_energy); + + g_deep_copy_energy_records.push_back(g_active_deep_copy.second); + g_active_deep_copy.first = false; + } +} + +} // namespace EnergyConsumption +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::EnergyConsumption; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_nvml_power.cpp b/profiling/energy-profiler/kokkos/kp_nvml_power.cpp new file mode 100644 index 000000000..cedbfa157 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_power.cpp @@ -0,0 +1,288 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Power Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_power_tool.cpp + * @brief Kokkos Power Profiler Tool using NVML. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the NVML library. It starts monitoring when the Kokkos + * library is initialized and prints a detailed power profile upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace Power { + +// --- Configuration --- +// The interval in milliseconds for power sampling. +constexpr int SAMPLING_INTERVAL_MS = 20; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +// Structure to store a single power measurement with a timestamp. +struct PowerSample { + std::chrono::high_resolution_clock::time_point timestamp; + double power_watts; +}; + +// Thread-safe storage for collected power samples. +static std::vector g_power_samples; +static std::mutex g_samples_mutex; +static std::chrono::high_resolution_clock::time_point g_start_time; + +/** + * @brief The function executed by the daemon thread to sample power. + * + * This function is called periodically. It fetches the current total power + * usage from the NVML provider and stores it with a timestamp. + */ +void power_monitoring_tick() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back( + {std::chrono::high_resolution_clock::now(), current_power}); +} + +/** + * @brief Calculates statistics from the collected power samples. + * + * @param samples A constant reference to the vector of power samples. + * @param[out] avg_power Average power consumption. + * @param[out] min_power Minimum power consumption. + * @param[out] max_power Maximum power consumption. + * @param[out] total_energy Total energy consumed in Joules. + */ +void analyze_power_data(const std::vector& samples, + double& avg_power, double& min_power, double& max_power, + double& total_energy) { + if (samples.empty()) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + min_power = samples[0].power_watts; + max_power = samples[0].power_watts; + double power_sum = 0.0; + total_energy = 0.0; + + for (size_t i = 0; i < samples.size(); ++i) { + const double power = samples[i].power_watts; + power_sum += power; + if (power < min_power) min_power = power; + if (power > max_power) max_power = power; + + // Energy = Power * Time. Time delta is from the previous sample. + if (i > 0) { + double time_delta_s = std::chrono::duration( + samples[i].timestamp - samples[i - 1].timestamp) + .count(); + total_energy += samples[i - 1].power_watts * time_delta_s; + } + } + + avg_power = power_sum / samples.size(); +} + +void export_power_data_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + file << "timestamp,power_watts\n"; + for (const auto& sample : g_power_samples) { + auto timestamp = std::chrono::duration_cast( + sample.timestamp.time_since_epoch()) + .count(); + file << timestamp << "," << sample.power_watts << "\n"; + } + file.close(); + std::cout << "Power data exported to " << filename << std::endl; +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Power Profiler: Initializing...\n"; + std::cout << "Sampling Interval: " << SAMPLING_INTERVAL_MS << " ms\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Power profiling " + "disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + std::cout << "SUCCESS: NVML provider initialized with " + << g_nvml_provider->get_device_count() << " device(s).\n"; + + // Start the monitoring daemon + g_power_daemon = + std::make_unique(power_monitoring_tick, SAMPLING_INTERVAL_MS); + g_start_time = std::chrono::high_resolution_clock::now(); + g_power_daemon->start(); + std::cout << "SUCCESS: Power monitoring daemon started.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Power Profiler: Finalizing...\n"; + + if (g_power_daemon) { + g_power_daemon->stop(); + std::cout << "SUCCESS: Power monitoring daemon stopped.\n"; + } + + // Finalize the timer + g_timer.finalize_library(); + + // Make a copy of the samples to avoid holding the lock during analysis + std::vector samples_copy; + { + std::lock_guard lock(g_samples_mutex); + samples_copy = g_power_samples; + } + + if (samples_copy.empty()) { + std::cout << "No power samples collected.\n"; + } else { + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + double avg_power, min_power, max_power, total_energy; + analyze_power_data(samples_copy, avg_power, min_power, max_power, + total_energy); + + std::cout << "\n==== Power Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(2); + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + std::cout << "Samples Collected: " << samples_copy.size() << "\n"; + std::cout << "---------------------------------\n"; + std::cout << "Average Power: " << avg_power << " W\n"; + std::cout << "Minimum Power: " << min_power << " W\n"; + std::cout << "Maximum Power: " << max_power << " W\n"; + std::cout << "Total Energy Consumed: " << total_energy << " J\n"; + std::cout << "===============================\n"; + + std::string csv_filename = generate_prefix() + "_nvml_power_samples.csv"; + std::cout << "Exporting power data to " << csv_filename << "...\n"; + export_power_data_csv(csv_filename); + } + + std::string prefix = generate_prefix(); + + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); +} +void kokkosp_end_parallel_for(uint64_t kID) { g_timer.end_parallel_for(kID); } +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); +} +void kokkosp_end_parallel_scan(uint64_t kID) { g_timer.end_parallel_scan(kID); } +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); +} +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); +} +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); +} +void kokkosp_pop_profile_region() { g_timer.pop_profile_region(); } +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} +void kokkosp_end_deep_copy() { g_timer.end_deep_copy(); } + +} // namespace Power +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::Power; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_variorum_power.cpp b/profiling/energy-profiler/kokkos/kp_variorum_power.cpp new file mode 100644 index 000000000..8e5e47b99 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_variorum_power.cpp @@ -0,0 +1,299 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_variorum_power_tool.cpp + * @brief Kokkos Power Profiler Tool using Variorum. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the Variorum library via a provider interface. It starts + * monitoring when the Kokkos library is initialized and writes detailed + * power profiles to CSV files upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../provider/provider_variorum.hpp" +#include "../common/daemon.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace VariorumPower { + +KernelTimerTool timer; + +// --- Data Structures for Self-Contained Management --- + +struct PowerDataPoint { + int64_t timestamp_ns; + double power_watts; +}; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_variorum_provider; +static std::mutex g_data_mutex; // Mutex for all data collections +static std::chrono::high_resolution_clock::time_point g_start_time; + +// Data Collections +static std::vector g_power_data; + +// --- Helper Functions --- + +// Get current time in nanoseconds since epoch +int64_t get_current_epoch_ns() { + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); +} + +void write_power_data_to_csv(const std::string& filename) { + std::ofstream outfile(filename); + if (!outfile.is_open()) { + std::cerr << "KokkosP Variorum Power: Could not open file for writing: " + << filename << "\n"; + return; + } + + outfile << "timestamp_nanoseconds,power_watts\n"; + std::lock_guard lock(g_data_mutex); + for (const auto& point : g_power_data) { + outfile << point.timestamp_ns << "," << std::fixed << std::setprecision(3) + << point.power_watts << "\n"; + } + printf("KokkosP Variorum Power: Wrote power data to %s\n", filename.c_str()); +} + +// --- Monitoring Function (for Daemon) --- + +void variorum_power_monitoring_tick() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + return; + } + + double current_power_W = g_variorum_provider->get_total_power_usage(); + int64_t timestamp_ns = get_current_epoch_ns(); + + std::lock_guard lock(g_data_mutex); + g_power_data.push_back({timestamp_ns, current_power_W}); +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + printf( + "======================================================================" + "\n"); + printf("KokkosP: Variorum Power Profiler Initialized\n"); + printf("KokkosP: Sequence: %d, Interface Version: %llu, Devices: %u\n", + loadSeq, (unsigned long long)interfaceVer, devInfoCount); + printf( + "======================================================================" + "\n"); + + g_start_time = std::chrono::high_resolution_clock::now(); + + g_variorum_provider = std::make_unique(); + if (!g_variorum_provider->initialize()) { + std::cerr << "KokkosP Variorum Power: Failed to initialize Variorum, power " + "monitoring disabled\n"; + g_variorum_provider.reset(); + return; + } + + int interval_ms = 20; + if (const char* interval_env = + std::getenv("KOKKOS_VARIORUM_POWER_INTERVAL")) { + try { + interval_ms = std::stoi(interval_env); + if (interval_ms <= 0) { + interval_ms = 20; + throw std::invalid_argument("Interval must be positive"); + } + printf("KokkosP Variorum Power: Using custom interval: %d ms\n", + interval_ms); + } catch (const std::exception& e) { + printf( + "KokkosP Variorum Power: Invalid interval value, using default " + "20ms\n"); + } + } else { + printf("KokkosP Variorum Power: Using default interval: 20 ms\n"); + } + + g_power_daemon = std::make_unique( + std::function(variorum_power_monitoring_tick), interval_ms); + g_power_daemon->start(); + printf("KokkosP Variorum Power: Power monitoring started\n"); + + timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); +} + +void kokkosp_finalize_library() { + auto end_time = std::chrono::high_resolution_clock::now(); + + printf( + "======================================================================" + "\n"); + printf("KokkosP: Variorum Power Profiler Finalization\n"); + + if (g_power_daemon && g_power_daemon->is_running()) { + g_power_daemon->stop(); + printf("KokkosP Variorum Power: Power monitoring stopped\n"); + } + + auto duration = std::chrono::duration_cast( + end_time - g_start_time); + double elapsed_seconds = duration.count() / 1000.0; + + printf("KokkosP Variorum Power: Total execution time: %.3f seconds\n", + elapsed_seconds); + + auto power_filename = generate_prefix() + "_variorum_power_samples.csv"; + write_power_data_to_csv(power_filename); + + if (g_variorum_provider) { + g_variorum_provider->finalize(); + } + printf( + "======================================================================" + "\n"); + + timer.finalize_library(); + + std::string prefix = generate_prefix(); + + const auto& kernels = timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); +} + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_for(name, devID, *kID); +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + timer.end_parallel_for(kID); +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_scan(name, devID, kID); +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + timer.end_parallel_scan(kID); +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_reduce(name, devID, kID); +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + timer.end_parallel_reduce(kID); +} + +void kokkosp_push_profile_region(char const* regionName) { + timer.push_profile_region(regionName); +} + +void kokkosp_pop_profile_region() { timer.pop_profile_region(); } + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} + +void kokkosp_end_deep_copy() { timer.end_deep_copy(); } + +// --- Event Set Configuration --- + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace VariorumPower +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::VariorumPower; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) +} \ No newline at end of file diff --git a/profiling/energy-profiler/nvml-power/CMakeLists.txt b/profiling/energy-profiler/nvml-power/CMakeLists.txt deleted file mode 100644 index ea08e4a79..000000000 --- a/profiling/energy-profiler/nvml-power/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -find_package(CUDAToolkit QUIET) - -if (CUDAToolkit_FOUND) - find_package(CUDA::nvml QUIET) - message(STATUS "Found CUDA NVML, making NVML energy profiler available.") -else() - message(STATUS "CUDAToolkit not found, skipping NVML energy profiler.") - return() -endif() - -kp_add_library(kp_energy_nvml kp_energy_nvml.cpp kp_nvml_energy_profiler.cpp) - -target_link_libraries(kp_energy_nvml PRIVATE CUDA::nvml pthread) - -target_include_directories(kp_energy_nvml PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} -) \ No newline at end of file diff --git a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp b/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp deleted file mode 100644 index 7b9457e0b..000000000 --- a/profiling/energy-profiler/nvml-power/kp_energy_nvml.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "kp_core.hpp" -#include "kp_nvml_energy_profiler.hpp" - -using namespace KokkosTools::NVMLEnergyProfiler; - -extern "C" { - -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t devInfoCount __attribute__((unused)), - void* deviceInfo __attribute__((unused))) { - const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); - if (tool_verbose != nullptr) { - printf( - "KokkosP NVML Energy: library loaded (sequence is %d, version: %" PRIu64 - ")\n", - loadSeq, interfaceVer); - } - - g_data_manager = new DataManager(); - if (!g_data_manager->initialize()) { - printf("KokkosP NVML Energy: Failed to initialize, profiling disabled\n"); - delete g_data_manager; - g_data_manager = nullptr; - } -} - -void kokkosp_finalize_library() { - const char* tool_verbose = getenv("KOKKOS_TOOLS_LIBS_VERBOSE"); - if (tool_verbose != nullptr) { - printf("KokkosP NVML Energy: finalizing library\n"); - } - - char hostname[256]; - gethostname(hostname, 256); - int pid = (int)getpid(); - - if (g_data_manager) { - // Write output files - auto kernel_filename = std::string(hostname) + "-" + std::to_string(pid) + - "-nvml-energy-kernels.csv"; - auto region_filename = std::string(hostname) + "-" + std::to_string(pid) + - "-nvml-energy-regions.csv"; - g_data_manager->write_kernel_data(kernel_filename); - g_data_manager->write_region_data(region_filename); - - delete g_data_manager; - g_data_manager = nullptr; - } -} - -void kokkosp_begin_parallel_for(const char* name, - uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelFor); - } -} - -void kokkosp_end_parallel_for(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_begin_parallel_reduce(const char* name, - uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelReduce); - } -} - -void kokkosp_end_parallel_reduce(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_begin_parallel_scan(const char* name, - uint32_t devid __attribute__((unused)), - uint64_t* kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::ParallelScan); - } -} - -void kokkosp_end_parallel_scan(uint64_t kernid __attribute__((unused))) { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -void kokkosp_push_profile_region(const char* name) { - if (g_data_manager) { - g_data_manager->start_region(std::string(name), RegionType::UserRegion); - } -} - -void kokkosp_pop_profile_region() { - if (g_data_manager) { - g_data_manager->end_region(); - } -} - -} // extern "C" diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp deleted file mode 100644 index 0f2723f6e..000000000 --- a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include "kp_nvml_energy_profiler.hpp" -#include -#include -#include - -namespace KokkosTools { -namespace NVMLEnergyProfiler { - -DataManager* g_data_manager = nullptr; - -DataManager::DataManager() : nvml_initialized(false) {} - -DataManager::~DataManager() { finalize(); } - -bool DataManager::initialize() { - nvmlReturn_t result = nvmlInit(); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to initialize NVML: %s\n", - nvmlErrorString(result)); - return false; - } - - // Get the first GPU device - result = nvmlDeviceGetHandleByIndex(0, &device); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to get device handle: %s\n", - nvmlErrorString(result)); - nvmlShutdown(); - return false; - } - - // Test if energy monitoring is available - unsigned long long energy; - result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Energy monitoring not available: %s\n", - nvmlErrorString(result)); - nvmlShutdown(); - return false; - } - - nvml_initialized = true; - printf("KokkosP NVML Energy: Initialized successfully\n"); - return true; -} - -void DataManager::finalize() { - if (nvml_initialized) { - nvmlShutdown(); - nvml_initialized = false; - } -} - -unsigned long long DataManager::get_current_energy_mj() const { - if (!nvml_initialized) return 0; - - unsigned long long energy; - nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption(device, &energy); - if (result != NVML_SUCCESS) { - printf("KokkosP NVML Energy: Failed to get energy consumption: %s\n", - nvmlErrorString(result)); - return 0; - } - return energy; -} - -void DataManager::start_region(const std::string& name, RegionType type) { - TimingEnergyInfo region; - region.name = name; - region.type = type; - region.start_time = std::chrono::high_resolution_clock::now(); - region.start_energy_mj = get_current_energy_mj(); - active_regions.push_back(region); -} - -void DataManager::end_region() { - if (!active_regions.empty()) { - auto& region = active_regions.back(); - region.end_time = std::chrono::high_resolution_clock::now(); - region.end_energy_mj = get_current_energy_mj(); - region.duration = std::chrono::duration_cast( - region.end_time - region.start_time); - region.delta_energy_mj = region.end_energy_mj - region.start_energy_mj; - - // Power (W) = (delta_energy_mj / 1000) [Joules] / (duration_ns / 1e9) - // [seconds] - // => Power = (delta_energy_mj * 1e6) / duration_ns - if (region.duration.count() > 0) { - region.average_power_w = - (static_cast(region.delta_energy_mj) * 1e6) / - static_cast(region.duration.count()); - } else { - region.average_power_w = 0.0; - } - - if (region.type == RegionType::UserRegion) { - completed_regions.push_back(region); - } else { - completed_kernels.push_back(region); - } - active_regions.pop_back(); - } -} - -const char* DataManager::region_type_to_string(RegionType type) const { - switch (type) { - case RegionType::ParallelFor: return "parallel_for"; - case RegionType::ParallelReduce: return "parallel_reduce"; - case RegionType::ParallelScan: return "parallel_scan"; - case RegionType::UserRegion: return "user_region"; - default: return "unknown"; - } -} - -void DataManager::write_kernel_data(const std::string& filename) const { - if (completed_kernels.empty()) return; - - FILE* kernels_file = fopen(filename.c_str(), "w"); - if (kernels_file) { - fprintf(kernels_file, - "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_" - "energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); - for (const auto& kernel : completed_kernels) { - auto start_ns = std::chrono::duration_cast( - kernel.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - kernel.end_time.time_since_epoch()) - .count(); - fprintf(kernels_file, - "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", - kernel.name.c_str(), region_type_to_string(kernel.type), start_ns, - end_ns, (int64_t)kernel.duration.count(), kernel.start_energy_mj, - kernel.end_energy_mj, kernel.delta_energy_mj, - kernel.average_power_w); - } - fclose(kernels_file); - char cwd[256]; - if (getcwd(cwd, 256) != nullptr) { - printf("KokkosP NVML Energy: Kernel energy CSV written to %s/%s (%" PRIu64 - " kernels)\n", - cwd, filename.c_str(), - static_cast(completed_kernels.size())); - } else { - printf("KokkosP NVML Energy: Kernel energy CSV written to %s (%" PRIu64 - " kernels)\n", - filename.c_str(), static_cast(completed_kernels.size())); - } - } -} - -void DataManager::write_region_data(const std::string& filename) const { - if (completed_regions.empty()) return; - - FILE* regions_file = fopen(filename.c_str(), "w"); - if (regions_file) { - fprintf(regions_file, - "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_" - "energy_mj,end_energy_mj,delta_energy_mj,average_power_w\n"); - for (const auto& region : completed_regions) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(regions_file, - "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%llu,%llu,%llu,%.6f\n", - region.name.c_str(), region_type_to_string(region.type), start_ns, - end_ns, (int64_t)region.duration.count(), region.start_energy_mj, - region.end_energy_mj, region.delta_energy_mj, - region.average_power_w); - } - fclose(regions_file); - char cwd[256]; - if (getcwd(cwd, 256) != nullptr) { - printf("KokkosP NVML Energy: Region energy CSV written to %s/%s (%" PRIu64 - " regions)\n", - cwd, filename.c_str(), - static_cast(completed_regions.size())); - } else { - printf("KokkosP NVML Energy: Region energy CSV written to %s (%" PRIu64 - " regions)\n", - filename.c_str(), static_cast(completed_regions.size())); - } - } -} - -} // namespace NVMLEnergyProfiler -} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp b/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp deleted file mode 100644 index 0430d11f1..000000000 --- a/profiling/energy-profiler/nvml-power/kp_nvml_energy_profiler.hpp +++ /dev/null @@ -1,72 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KP_NVML_ENERGY_PROFILER_HPP -#define KP_NVML_ENERGY_PROFILER_HPP - -#include -#include -#include -#include - -namespace KokkosTools { -namespace NVMLEnergyProfiler { - -enum class RegionType { ParallelFor, ParallelReduce, ParallelScan, UserRegion }; - -struct TimingEnergyInfo { - std::string name; - RegionType type; - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - std::chrono::nanoseconds duration; - unsigned long long start_energy_mj; // millijoules at start - unsigned long long end_energy_mj; // millijoules at end - unsigned long long delta_energy_mj; // energy consumed during region - double average_power_w; // average power in Watts -}; - -class DataManager { - private: - std::vector completed_kernels; - std::vector completed_regions; - std::vector active_regions; - nvmlDevice_t device; - bool nvml_initialized; - - const char* region_type_to_string(RegionType type) const; - unsigned long long get_current_energy_mj() const; - - public: - DataManager(); - ~DataManager(); - - bool initialize(); - void finalize(); - - void start_region(const std::string& name, RegionType type); - void end_region(); - - void write_kernel_data(const std::string& filename) const; - void write_region_data(const std::string& filename) const; -}; - -extern DataManager* g_data_manager; - -} // namespace NVMLEnergyProfiler -} // namespace KokkosTools - -#endif // KP_NVML_ENERGY_PROFILER_HPP diff --git a/profiling/energy-profiler/nvml-power/readme.md b/profiling/energy-profiler/nvml-power/readme.md deleted file mode 100644 index 520f4020f..000000000 --- a/profiling/energy-profiler/nvml-power/readme.md +++ /dev/null @@ -1,24 +0,0 @@ -# NVML Power Profiler - -A Kokkos profiling tool that uses NVML to collect power data from NVIDIA GPUs. -This variant uses `nvmlDeviceGetTotalEnergyConsumption(device, &energy)` to get energy data for kernels and regions. - -## Setup - -Requires CUDA toolkit with NVML. -1. Install the CUDA toolkit that includes NVML. -2. Compile this module with the main CMake build. - -## Output Files - -- `hostname-pid-nvml-power-kernels.csv`: Kernel power measurements - - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w` -- `hostname-pid-nvml-power-regions.csv`: Region timings - - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns,start_energy_mj,end_energy_mj,delta_energy_mj,average_power_w` - -## Usage - -```bash -export KOKKOS_PROFILE_LIBRARY=/path/to/kp_power_nvml.so -./your_kokkos_application -``` \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/CMakeLists.txt b/profiling/energy-profiler/nvml/CMakeLists.txt deleted file mode 100644 index 72a92f9b2..000000000 --- a/profiling/energy-profiler/nvml/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -find_package(CUDAToolkit QUIET) - -if (CUDAToolkit_FOUND) - find_package(CUDA::nvml QUIET) - message(STATUS "Found CUDA NVML, making NVML power profiler available.") -else() - message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") - return() -endif() - -kp_add_library(kp_power_nvml kp_power_nvml.cpp kp_nvml_power_profiler.cpp) - -target_link_libraries(kp_power_nvml PRIVATE CUDA::nvml pthread) - -target_include_directories(kp_power_nvml PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} -) \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp deleted file mode 100644 index e585ce443..000000000 --- a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include "kp_nvml_power_profiler.hpp" -#include -#include -#include - -namespace KokkosTools { -namespace NVMLPowerProfiler { - -void DataManager::add_power_data_point(int64_t timestamp, double power) { - power_data_points.push_back({timestamp, power}); -} - -void DataManager::start_region(const std::string& name, RegionType type) { - TimingInfo region; - region.name = name; - region.type = type; - region.start_time = std::chrono::high_resolution_clock::now(); - active_regions.push_back(region); -} - -void DataManager::end_region() { - if (!active_regions.empty()) { - auto& region = active_regions.back(); - region.end_time = std::chrono::high_resolution_clock::now(); - region.duration = std::chrono::duration_cast( - region.end_time - region.start_time); - - if (region.type == RegionType::UserRegion) { - completed_regions.push_back(region); - } else { - completed_kernels.push_back(region); - } - active_regions.pop_back(); - } -} - -const char* DataManager::region_type_to_string(RegionType type) const { - switch (type) { - case RegionType::ParallelFor: return "parallel_for"; - case RegionType::ParallelReduce: return "parallel_reduce"; - case RegionType::ParallelScan: return "parallel_scan"; - case RegionType::UserRegion: return "user_region"; - default: return "unknown"; - } -} - -void DataManager::write_power_data(const std::string& filename) const { - FILE* csv_file = fopen(filename.c_str(), "w"); - if (csv_file) { - fprintf(csv_file, "time_epoch_ns,power_w\n"); - for (const auto& point : power_data_points) { - fprintf(csv_file, "%" PRId64 ",%.6f\n", point.timestamp_ns, - point.power_watts); - } - fclose(csv_file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Power CSV data written to %s/%s (%" PRIu64 - " data points)\n", - cwd, filename.c_str(), - static_cast(power_data_points.size())); - } -} - -void DataManager::write_kernel_data(const std::string& filename) const { - if (completed_kernels.empty()) return; - - FILE* file = fopen(filename.c_str(), "w"); - if (file) { - fprintf(file, - "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); - for (const auto& region : completed_kernels) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", - region.name.c_str(), region_type_to_string(region.type), start_ns, - end_ns, (int64_t)region.duration.count()); - } - fclose(file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Kernel timing CSV written to %s/%s\n", cwd, - filename.c_str()); - } -} - -void DataManager::write_region_data(const std::string& filename) const { - if (completed_regions.empty()) return; - - FILE* file = fopen(filename.c_str(), "w"); - if (file) { - fprintf(file, - "name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns\n"); - for (const auto& region : completed_regions) { - auto start_ns = std::chrono::duration_cast( - region.start_time.time_since_epoch()) - .count(); - auto end_ns = std::chrono::duration_cast( - region.end_time.time_since_epoch()) - .count(); - fprintf(file, "%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 "\n", - region.name.c_str(), region_type_to_string(region.type), start_ns, - end_ns, (int64_t)region.duration.count()); - } - fclose(file); - char cwd[256]; - getcwd(cwd, 256); - printf("KokkosP NVML Power: Region timing CSV written to %s/%s\n", cwd, - filename.c_str()); - } -} - -} // namespace NVMLPowerProfiler -} // namespace KokkosTools diff --git a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp b/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp deleted file mode 100644 index b79624336..000000000 --- a/profiling/energy-profiler/nvml/kp_nvml_power_profiler.hpp +++ /dev/null @@ -1,70 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KP_POWER_PROFILER_HPP -#define KP_POWER_PROFILER_HPP - -#include -#include -#include -#include - -namespace KokkosTools { -namespace NVMLPowerProfiler { - -enum class RegionType { - Unknown, - ParallelFor, - ParallelReduce, - ParallelScan, - UserRegion -}; - -struct PowerDataPoint { - int64_t timestamp_ns; - double power_watts; -}; - -struct TimingInfo { - std::string name; - RegionType type; - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - std::chrono::nanoseconds duration; -}; - -class DataManager { - public: - void add_power_data_point(int64_t timestamp, double power); - void start_region(const std::string& name, RegionType type); - void end_region(); - void write_power_data(const std::string& filename) const; - void write_kernel_data(const std::string& filename) const; - void write_region_data(const std::string& filename) const; - - private: - const char* region_type_to_string(RegionType type) const; - - std::deque power_data_points; - std::deque completed_kernels; - std::deque completed_regions; - std::deque active_regions; -}; - -} // namespace NVMLPowerProfiler -} // namespace KokkosTools - -#endif // KP_POWER_PROFILER_HPP diff --git a/profiling/energy-profiler/nvml/kp_power_nvml.cpp b/profiling/energy-profiler/nvml/kp_power_nvml.cpp deleted file mode 100644 index 0de693ecf..000000000 --- a/profiling/energy-profiler/nvml/kp_power_nvml.cpp +++ /dev/null @@ -1,353 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -/** - * Kokkos NVML Power Profiler - * Simple Kokkos profiling tool that monitors GPU power consumption using NVML - * Polls nvmlDeviceGetPowerUsage() every 20ms in a background thread - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "kp_core.hpp" -#include "kp_nvml_power_profiler.hpp" - -namespace KokkosTools { -namespace NVMLPowerProfiler { - -// State variables -std::atomic g_stop_requested(false); -std::deque g_nvml_devices; -std::unique_ptr g_monitoring_thread; -std::condition_variable g_sleep_cv; -std::mutex g_sleep_mutex; -DataManager g_data_manager; -std::chrono::high_resolution_clock::time_point g_start_time; - -// Get current time in nanoseconds since epoch -int64_t get_current_epoch_ns() { - return std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); -} - -void nvml_power_monitoring_thread_func(std::chrono::milliseconds interval) { - auto start_time = std::chrono::high_resolution_clock::now(); - int64_t interval_count = 0; - int64_t delayed_intervals = 0; - - while (!g_stop_requested.load()) { - auto next_check_time = start_time + ((interval_count + 1) * interval); - - { - std::unique_lock sleep_lock(g_sleep_mutex); - if (g_sleep_cv.wait_until(sleep_lock, next_check_time, - [] { return g_stop_requested.load(); })) { - break; - } - } - - auto current_time = std::chrono::high_resolution_clock::now(); - interval_count++; - - auto expected_time = start_time + (interval_count * interval); - auto delay = current_time - expected_time; - - if (delay > interval / 2) { - delayed_intervals++; - } - - double current_power_sum_W = 0.0; - - for (size_t i = 0; i < g_nvml_devices.size(); ++i) { - if (g_nvml_devices[i] == nullptr) continue; - - unsigned int power_mW; - nvmlReturn_t result = - nvmlDeviceGetPowerUsage(g_nvml_devices[i], &power_mW); - - if (NVML_SUCCESS == result) { - double current_power_W = static_cast(power_mW) / 1000.0; - current_power_sum_W += current_power_W; - } - } - - int64_t timestamp_ns = get_current_epoch_ns(); - g_data_manager.add_power_data_point(timestamp_ns, current_power_sum_W); - - if (interval_count % 100 == 0 && delayed_intervals > 0) { - printf("KokkosP NVML Power: Timing info - %" PRId64 " intervals, %" PRId64 - " delayed (%.1f%%)\n", - interval_count, delayed_intervals, - (100.0 * delayed_intervals) / interval_count); - } - } - - if (interval_count > 0) { - auto total_duration = - std::chrono::high_resolution_clock::now() - start_time; - auto actual_avg_interval = total_duration / interval_count; - printf( - "KokkosP NVML Power: Monitoring completed - %" PRId64 - " intervals, avg interval: %.1f ms (expected: %" PRId64 " ms)\n", - interval_count, - std::chrono::duration(actual_avg_interval).count(), - static_cast(interval.count())); - } -} - -bool initialize_nvml() { - nvmlReturn_t result = nvmlInit(); - if (NVML_SUCCESS != result) { - std::cerr << "KokkosP NVML Power: Failed to initialize NVML: " - << nvmlErrorString(result) << "\n"; - return false; - } - - unsigned int device_count; - result = nvmlDeviceGetCount(&device_count); - if (NVML_SUCCESS != result) { - std::cerr << "KokkosP NVML Power: Failed to get device count: " - << nvmlErrorString(result) << "\n"; - nvmlShutdown(); - return false; - } - - if (device_count == 0) { - std::cerr << "KokkosP NVML Power: No NVIDIA devices found\n"; - nvmlShutdown(); - return false; - } - - g_nvml_devices.resize(device_count); - - printf("KokkosP NVML Power: Found %u NVIDIA device(s)\n", device_count); - - for (unsigned int i = 0; i < device_count; ++i) { - result = nvmlDeviceGetHandleByIndex(i, &g_nvml_devices[i]); - if (NVML_SUCCESS != result) { - std::cerr << "KokkosP NVML Power: Failed to get handle for device " << i - << "\n"; - g_nvml_devices[i] = nullptr; - continue; - } - - char device_name[NVML_DEVICE_NAME_BUFFER_SIZE]; - result = nvmlDeviceGetName(g_nvml_devices[i], device_name, - NVML_DEVICE_NAME_BUFFER_SIZE); - if (NVML_SUCCESS == result) { - printf("KokkosP NVML Power: Device %u: %s\n", i, device_name); - } - - nvmlEnableState_t pmmode; - result = nvmlDeviceGetPowerManagementMode(g_nvml_devices[i], &pmmode); - if (NVML_SUCCESS == result && pmmode == NVML_FEATURE_ENABLED) { - printf("KokkosP NVML Power: Device %u: Power management enabled\n", i); - } else { - printf( - "KokkosP NVML Power: Device %u: Power management disabled or not " - "supported\n", - i); - } - } - - return true; -} - -void finalize_nvml() { - if (!g_nvml_devices.empty()) { - nvmlShutdown(); - } - g_nvml_devices.clear(); -} - -// Kokkos profiler interface functions -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t devInfoCount, - Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { - printf( - "======================================================================" - "\n"); - printf("KokkosP: NVML Power Profiler Initialized\n"); - printf("KokkosP: Sequence: %d, Interface Version: %llu, Devices: %u\n", - loadSeq, (unsigned long long)interfaceVer, devInfoCount); - printf( - "======================================================================" - "\n"); - - g_start_time = std::chrono::high_resolution_clock::now(); - - if (!initialize_nvml()) { - printf( - "KokkosP NVML Power: Failed to initialize NVML, power monitoring " - "disabled\n"); - return; - } - - int interval_ms = 20; - if (const char* interval_env = std::getenv("KOKKOS_NVML_POWER_INTERVAL")) { - try { - interval_ms = std::stoi(interval_env); - if (interval_ms <= 0) { - interval_ms = 20; - throw std::invalid_argument("Interval must be positive"); - } - printf("KokkosP NVML Power: Using custom interval: %d ms\n", interval_ms); - } catch (const std::exception& e) { - printf( - "KokkosP NVML Power: Invalid interval value, using default 20ms\n"); - } - } else { - printf("KokkosP NVML Power: Using default interval: 20 ms\n"); - } - - g_stop_requested.store(false); - - g_monitoring_thread = - std::make_unique(nvml_power_monitoring_thread_func, - std::chrono::milliseconds(interval_ms)); - - printf("KokkosP NVML Power: Power monitoring started\n"); -} - -void kokkosp_finalize_library() { - auto end_time = std::chrono::high_resolution_clock::now(); - - printf( - "======================================================================" - "\n"); - printf("KokkosP: NVML Power Profiler Finalization\n"); - - if (g_monitoring_thread) { - g_stop_requested.store(true); - g_sleep_cv.notify_all(); - g_monitoring_thread->join(); - g_monitoring_thread.reset(); - } - - auto duration = std::chrono::duration_cast( - end_time - g_start_time); - double elapsed_seconds = duration.count() / 1000.0; - - printf("KokkosP NVML Power: Total execution time: %.3f seconds\n", - elapsed_seconds); - - char hostname[256]; - gethostname(hostname, 256); - int pid = (int)getpid(); - - char power_filename[512]; - snprintf(power_filename, 512, "%s-%d-nvml-power.csv", hostname, pid); - g_data_manager.write_power_data(power_filename); - - char kernels_filename[512]; - snprintf(kernels_filename, 512, "%s-%d-nvml-kernels.csv", hostname, pid); - g_data_manager.write_kernel_data(kernels_filename); - - char regions_filename[512]; - snprintf(regions_filename, 512, "%s-%d-nvml-regions.csv", hostname, pid); - g_data_manager.write_region_data(regions_filename); - - finalize_nvml(); - printf( - "======================================================================" - "\n"); -} - -void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, - uint64_t* kID) { - g_data_manager.start_region(name, RegionType::ParallelFor); -} - -void kokkosp_end_parallel_for(const uint64_t kID) { - g_data_manager.end_region(); -} - -void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, - uint64_t* kID) { - g_data_manager.start_region(name, RegionType::ParallelScan); -} - -void kokkosp_end_parallel_scan(const uint64_t kID) { - g_data_manager.end_region(); -} - -void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, - uint64_t* kID) { - g_data_manager.start_region(name, RegionType::ParallelReduce); -} - -void kokkosp_end_parallel_reduce(const uint64_t kID) { - g_data_manager.end_region(); -} - -void kokkosp_push_profile_region(char const* regionName) { - g_data_manager.start_region(regionName, RegionType::UserRegion); -} - -void kokkosp_pop_profile_region() { g_data_manager.end_region(); } - -Kokkos::Tools::Experimental::EventSet get_event_set() { - Kokkos::Tools::Experimental::EventSet my_event_set; - memset(&my_event_set, 0, sizeof(my_event_set)); - my_event_set.init = kokkosp_init_library; - my_event_set.finalize = kokkosp_finalize_library; - my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; - my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; - my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; - my_event_set.end_parallel_for = kokkosp_end_parallel_for; - my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; - my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; - my_event_set.push_region = kokkosp_push_profile_region; - my_event_set.pop_region = kokkosp_pop_profile_region; - return my_event_set; -} - -} // namespace NVMLPowerProfiler -} // namespace KokkosTools - -extern "C" { - -namespace impl = KokkosTools::NVMLPowerProfiler; - -EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) -EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) -EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) -EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) - -} // extern "C" \ No newline at end of file diff --git a/profiling/energy-profiler/nvml/readme.md b/profiling/energy-profiler/nvml/readme.md deleted file mode 100644 index 771d9d2ee..000000000 --- a/profiling/energy-profiler/nvml/readme.md +++ /dev/null @@ -1,27 +0,0 @@ -# NVML Power Profiler - -A Kokkos profiling tool that uses NVML to collect power data from NVIDIA GPUs. - -## Setup - -Requires CUDA toolkit with NVML. -1. Install the CUDA toolkit that includes NVML. -2. Compile this module with the main CMake build. - -## Output Files - -- `hostname-pid-nvml-power.csv`: Power measurements - - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` -- `hostname-pid-nvml-regions.csv`: Region timings - - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` -- `hostname-pid-nvml-kernels.csv`: Kernel timings - - Format: `name,type,start_time_epoch_ns,end_time_epoch_ns,duration_ns` - -## Usage - -```bash -export KOKKOS_PROFILE_LIBRARY=/path/to/kp_power_nvml.so -./your_kokkos_application -``` - -Interval of sampling power data can be controlled via the `KOKKOS_NVML_POWER_INTERVAL` environment variable, which specifies the interval in microseconds. \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_nvml.cpp b/profiling/energy-profiler/provider/provider_nvml.cpp new file mode 100644 index 000000000..d5f35e6cd --- /dev/null +++ b/profiling/energy-profiler/provider/provider_nvml.cpp @@ -0,0 +1,219 @@ +#include "provider_nvml.hpp" +#include +#include +#include + +NVMLProvider::NVMLProvider() : initialized_(false) {} + +NVMLProvider::~NVMLProvider() { + if (initialized_) { + finalize(); + } +} + +bool NVMLProvider::initialize() { + if (initialized_) { + return true; + } + + // Initialize NVML + nvmlReturn_t result = nvmlInit(); + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to initialize NVML: " + << nvmlErrorString(result) << std::endl; + return false; + } + + // Discover devices + if (!discover_devices()) { + nvmlShutdown(); + return false; + } + + initialized_ = true; + std::cout << "NVML Provider: Successfully initialized with " + << devices_.size() << " device(s)" << std::endl; + + return true; +} + +void NVMLProvider::finalize() { + if (!initialized_) { + return; + } + + cleanup_devices(); + nvmlShutdown(); + initialized_ = false; + + std::cout << "NVML Provider: Finalized" << std::endl; +} + +double NVMLProvider::get_total_power_usage() { + if (!initialized_) { + return 0.0; + } + + double total_power_W = 0.0; + + for (size_t i = 0; i < devices_.size(); ++i) { + double device_power = get_device_power_usage(i); + if (device_power >= 0.0) { + total_power_W += device_power; + } + } + + return total_power_W; +} + +double NVMLProvider::get_device_power_usage(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + unsigned int power_mW = 0; + nvmlReturn_t result = + nvmlDeviceGetPowerUsage(devices_[device_index], &power_mW); + + if (result == NVML_SUCCESS) { + // Convert from milliwatts to watts + return static_cast(power_mW) / 1000.0; + } else { + std::cerr << "NVML Provider: Failed to get power usage for device " + << device_index << ": " << nvmlErrorString(result) << std::endl; + return -1.0; + } +} + +double NVMLProvider::get_device_power_usage_direct(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + nvmlFieldValue_t powerFieldNow; + powerFieldNow.fieldId = NVML_FI_DEV_POWER_INSTANT; + if (nvmlDeviceGetFieldValues(devices_[device_index], 1, &powerFieldNow) != + NVML_SUCCESS) { + std::cerr << "NVML power read failed — stopping measurement.\n"; + return -1.0; + } + unsigned int pw = static_cast(powerFieldNow.value.uiVal); + // Convert from milliwatts to watts + return static_cast(pw) / 1000.0; +} + +double NVMLProvider::get_current_energy_consumption(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + unsigned long long energy_joules = 0; + nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption( + devices_[device_index], &energy_joules); + + if (result == NVML_SUCCESS) { + // Convert from millijoules to joules + return static_cast(energy_joules) / 1000.0; + } else { + std::cerr << "NVML Provider: Failed to get energy consumption for device " + << device_index << ": " << nvmlErrorString(result) << std::endl; + return -1.0; + } +} + +size_t NVMLProvider::get_device_count() const { return devices_.size(); } + +std::string NVMLProvider::get_device_name(size_t device_index) const { + if (device_index >= device_names_.size()) { + return "Unknown Device"; + } + return device_names_[device_index]; +} + +bool NVMLProvider::discover_devices() { + unsigned int device_count; + nvmlReturn_t result = nvmlDeviceGetCount(&device_count); + + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to get device count: " + << nvmlErrorString(result) << std::endl; + return false; + } + + if (device_count == 0) { + std::cerr << "NVML Provider: No NVIDIA devices found" << std::endl; + return false; + } + + devices_.resize(device_count); + device_names_.resize(device_count); + + std::cout << "NVML Provider: Found " << device_count << " NVIDIA device(s)" + << std::endl; + + for (unsigned int i = 0; i < device_count; ++i) { + result = nvmlDeviceGetHandleByIndex(i, &devices_[i]); + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to get handle for device " << i + << std::endl; + devices_[i] = nullptr; + device_names_[i] = "Failed Device"; + continue; + } + + // Get device name + char device_name[NVML_DEVICE_NAME_BUFFER_SIZE]; + result = nvmlDeviceGetName(devices_[i], device_name, + NVML_DEVICE_NAME_BUFFER_SIZE); + if (NVML_SUCCESS == result) { + device_names_[i] = std::string(device_name); + std::cout << "NVML Provider: Device " << i << ": " << device_name + << std::endl; + } else { + device_names_[i] = "Unknown Device " + std::to_string(i); + } + + // Check power management capability + nvmlEnableState_t pmmode; + result = nvmlDeviceGetPowerManagementMode(devices_[i], &pmmode); + if (NVML_SUCCESS == result && pmmode == NVML_FEATURE_ENABLED) { + std::cout << "NVML Provider: Device " << i << ": Power management enabled" + << std::endl; + } else { + std::cout << "NVML Provider: Device " << i + << ": Power management disabled or not supported" << std::endl; + } + + // Test power usage reading + unsigned int test_power_mW = 0; + result = nvmlDeviceGetPowerUsage(devices_[i], &test_power_mW); + if (NVML_SUCCESS == result) { + std::cout << "NVML Provider: Device " << i + << ": Current power usage: " << (test_power_mW / 1000.0) << " W" + << std::endl; + } else { + std::cout << "NVML Provider: Device " << i + << ": Power usage reading failed: " << nvmlErrorString(result) + << std::endl; + } + } + + return true; +} + +void NVMLProvider::cleanup_devices() { + devices_.clear(); + device_names_.clear(); +} \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_nvml.hpp b/profiling/energy-profiler/provider/provider_nvml.hpp new file mode 100644 index 000000000..488f6e68e --- /dev/null +++ b/profiling/energy-profiler/provider/provider_nvml.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +/** + * NVML Power Provider + * Simplified power monitoring using nvmlDeviceGetPowerUsage() + */ +class NVMLProvider { + public: + NVMLProvider(); + ~NVMLProvider(); + + // Initialize NVML and discover devices + bool initialize(); + + // Cleanup NVML resources + void finalize(); + + // Get current power consumption in Watts for all devices + double get_total_power_usage(); + + // Get power usage for a specific device + double get_device_power_usage(size_t device_index); // unit: Watts + + double get_device_power_usage_direct(size_t device_index); // unit: Watts + + double get_current_energy_consumption(size_t device_index); // unit: Joules + + // Get number of available devices + size_t get_device_count() const; + + // Get device name + std::string get_device_name(size_t device_index) const; + + // Check if provider is initialized + bool is_initialized() const { return initialized_; } + + private: + bool initialized_; + std::vector devices_; + std::vector device_names_; + + // Helper methods + bool discover_devices(); + void cleanup_devices(); +}; \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_variorum.cpp b/profiling/energy-profiler/provider/provider_variorum.cpp new file mode 100644 index 000000000..a31f1eed2 --- /dev/null +++ b/profiling/energy-profiler/provider/provider_variorum.cpp @@ -0,0 +1,230 @@ +#include "provider_variorum.hpp" +#include +#include +#include + +VariorumProvider::VariorumProvider() : initialized_(false) {} + +VariorumProvider::~VariorumProvider() { + if (initialized_) { + finalize(); + } +} + +bool VariorumProvider::initialize() { + if (initialized_) { + return true; + } + + // Initialize Variorum (in the original code, this was a no-op) + // For now, we'll assume Variorum is available and working + + // Discover devices + if (!discover_devices()) { + return false; + } + + initialized_ = true; + std::cout << "Variorum Provider: Successfully initialized with " + << device_ids_.size() << " device(s)" << std::endl; + + return true; +} + +void VariorumProvider::finalize() { + if (!initialized_) { + return; + } + + cleanup_devices(); + initialized_ = false; + + std::cout << "Variorum Provider: Finalized" << std::endl; +} + +double VariorumProvider::get_total_power_usage() { + if (!initialized_) { + return 0.0; + } + + double total_power_W = 0.0; + std::map power_readings = get_current_power_readings(); + + for (const auto& [device_id, power] : power_readings) { + if (power >= 0.0) { + total_power_W += power; + } + } + + return total_power_W; +} + +double VariorumProvider::get_device_power_usage(size_t device_index) { + if (!initialized_ || device_index >= device_ids_.size()) { + return -1.0; + } + + uint32_t device_id = device_ids_[device_index]; + std::map power_readings = get_current_power_readings(); + + auto it = power_readings.find(device_id); + if (it != power_readings.end()) { + return it->second; + } + + return -1.0; +} + +size_t VariorumProvider::get_device_count() const { return device_ids_.size(); } + +std::string VariorumProvider::get_device_name(size_t device_index) const { + if (device_index >= device_names_.size()) { + return "Unknown Device"; + } + return device_names_[device_index]; +} + +bool VariorumProvider::discover_devices() { + std::set found_device_ids; + unique_json_ptr root = get_variorum_json_data(); + + if (!root) { + std::cerr << "Variorum Provider: Failed to get JSON data from Variorum" + << std::endl; + return false; + } + + // Parse JSON to find GPU devices + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + std::cerr << "Variorum Provider: No host object found in JSON" << std::endl; + return false; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + const char* key; + json_t* value; + json_object_foreach(power_gpu_watts, key, value) { + std::string s_key(key); + if (s_key.length() > 4 && s_key.substr(0, 4) == "GPU_") { + try { + uint32_t device_id = std::stoul(s_key.substr(4)); + found_device_ids.insert(device_id); + } catch (const std::exception& e) { + std::cerr << "Variorum Provider: Could not parse GPU ID from key: " + << s_key << " (" << e.what() << ")" << std::endl; + } + } + } + } + } + + if (found_device_ids.empty()) { + std::cerr << "Variorum Provider: No GPU devices found" << std::endl; + return false; + } + + // Store device information + device_ids_.clear(); + device_names_.clear(); + + for (uint32_t device_id : found_device_ids) { + device_ids_.push_back(device_id); + device_names_.push_back("GPU_" + std::to_string(device_id)); + + std::cout << "Variorum Provider: Found device " << device_ids_.size() - 1 + << ": GPU_" << device_id << std::endl; + } + + // Test initial power readings + std::cout << "Variorum Provider: Testing initial power readings..." + << std::endl; + std::map test_readings = get_current_power_readings(); + for (size_t i = 0; i < device_ids_.size(); ++i) { + uint32_t device_id = device_ids_[i]; + auto it = test_readings.find(device_id); + if (it != test_readings.end()) { + std::cout << "Variorum Provider: Device " << i + << ": Current power usage: " << it->second << " W" << std::endl; + } else { + std::cout << "Variorum Provider: Device " << i << ": Power reading failed" + << std::endl; + } + } + + return true; +} + +void VariorumProvider::cleanup_devices() { + device_ids_.clear(); + device_names_.clear(); +} + +VariorumProvider::unique_json_ptr VariorumProvider::get_variorum_json_data() + const { + char* json_string_c_raw = nullptr; + int variorum_error = variorum_get_power_json(&json_string_c_raw); + + if (variorum_error != 0) { + std::cerr + << "Variorum Provider: variorum_get_power_json() failed. Error code: " + << variorum_error << std::endl; + return unique_json_ptr(nullptr); + } + + unique_cstring json_string_c(json_string_c_raw); + + if (!json_string_c) { + std::cerr + << "Variorum Provider: variorum_get_power_json() returned success " + "but a null pointer." + << std::endl; + return unique_json_ptr(nullptr); + } + + json_error_t error; + json_t* root_ptr = json_loads(json_string_c.get(), 0, &error); + + if (!root_ptr) { + std::cerr << "Variorum Provider: Failed to parse JSON: " << error.text + << std::endl; + return unique_json_ptr(nullptr); + } + + return unique_json_ptr(root_ptr); +} + +std::map VariorumProvider::get_current_power_readings() + const { + std::map readings; + + unique_json_ptr root = get_variorum_json_data(); + if (!root) { + return readings; + } + + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + return readings; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + for (uint32_t device_id : device_ids_) { + std::string gpu_key = "GPU_" + std::to_string(device_id); + json_t* power_value = json_object_get(power_gpu_watts, gpu_key.c_str()); + + if (json_is_number(power_value)) { + readings[device_id] = json_number_value(power_value); + } + } + } + } + + return readings; +} \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_variorum.hpp b/profiling/energy-profiler/provider/provider_variorum.hpp new file mode 100644 index 000000000..bdf470a07 --- /dev/null +++ b/profiling/energy-profiler/provider/provider_variorum.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include +#include +} + +class VariorumProvider { + public: + VariorumProvider(); + ~VariorumProvider(); + + // Core functionality + bool initialize(); + void finalize(); + bool is_initialized() const { return initialized_; } + + // Power monitoring + double get_total_power_usage(); + double get_device_power_usage(size_t device_index); + + // Device information + size_t get_device_count() const; + std::string get_device_name(size_t device_index) const; + + private: + struct JsonDeleter { + void operator()(json_t* json) const { + if (json) json_decref(json); + } + }; + using unique_json_ptr = std::unique_ptr; + + struct CFreeDeleter { + void operator()(char* ptr) const { + if (ptr) free(ptr); + } + }; + using unique_cstring = std::unique_ptr; + + // Internal methods + bool discover_devices(); + void cleanup_devices(); + unique_json_ptr get_variorum_json_data() const; + std::map get_current_power_readings() const; + + // Member variables + bool initialized_; + std::vector device_ids_; + std::vector device_names_; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/readme.md b/profiling/energy-profiler/readme.md deleted file mode 100644 index 124be31d6..000000000 --- a/profiling/energy-profiler/readme.md +++ /dev/null @@ -1,17 +0,0 @@ -# Energy Profiling Tools for Kokkos - -Collection of tools for energy profiling in Kokkos applications. - -## Available Tools - -- **Variorum**: Multi-vendor support (NVIDIA, AMD, Intel) -- **NVML**: NVIDIA GPU specific - -Each tool directory contains: -- Source code -- Build/usage instructions -- Documentation on output format - -## Daemon Mechanism - -A "daemon" mechanism is used to collect power data during Kokkos application execution. This allows for continuous power monitoring with minimal overhead or more generally for data sampling at a specified interval. \ No newline at end of file diff --git a/profiling/energy-profiler/tests/CMakeLists.txt b/profiling/energy-profiler/tests/CMakeLists.txt new file mode 100644 index 000000000..67b433570 --- /dev/null +++ b/profiling/energy-profiler/tests/CMakeLists.txt @@ -0,0 +1,166 @@ +# Tests for Energy Profiler + +enable_testing() + +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) + +# Daemon test +add_executable(daemon_test + daemon_test.cpp + ../common/daemon.cpp +) + +target_include_directories(daemon_test PRIVATE + ../common +) + +target_link_libraries(daemon_test PRIVATE Threads::Threads) + +add_test(NAME daemon_test COMMAND daemon_test) + +# NVML Provider test +add_executable(nvml_provider_test + nvml_provider_test.cpp + ../provider/provider_nvml.cpp +) + +target_include_directories(nvml_provider_test PRIVATE + ../provider +) + +# Find CUDA Toolkit and NVML for NVML tests +find_package(CUDAToolkit QUIET) +if(CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + if(TARGET CUDA::nvml) + message(STATUS "Found CUDA NVML, making NVML tests available.") + target_link_libraries(nvml_provider_test PRIVATE CUDA::nvml) + add_test(NAME nvml_provider_test COMMAND nvml_provider_test) # Register test if NVML is found + else() + message(WARNING "CUDA::nvml target not found. NVML tests may not work properly.") + endif() +else() + message(STATUS "CUDAToolkit not found, NVML tests may not work properly.") +endif() + +# Daemon NVML Integration test +add_executable(daemon_nvml_integration_test + daemon_nvml_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp +) + +target_include_directories(daemon_nvml_integration_test PRIVATE + ../common + ../provider +) + +if(CUDAToolkit_FOUND AND TARGET CUDA::nvml) + target_link_libraries(daemon_nvml_integration_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_integration_test COMMAND daemon_nvml_integration_test) # Register test if NVML is found +else() + target_link_libraries(daemon_nvml_integration_test PRIVATE Threads::Threads) +endif() + +# Fast Daemon NVML Integration test (20ms sampling) +add_executable(daemon_nvml_fast_test + daemon_nvml_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp +) + +target_include_directories(daemon_nvml_fast_test PRIVATE + ../common + ../provider +) + +if(CUDAToolkit_FOUND AND TARGET CUDA::nvml) + target_link_libraries(daemon_nvml_fast_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_fast_test COMMAND daemon_nvml_fast_test) # Register test if NVML is found +else() + target_link_libraries(daemon_nvml_fast_test PRIVATE Threads::Threads) +endif() + +# Variorum Provider test +add_executable(variorum_provider_test + variorum_provider_test.cpp + ../provider/provider_variorum.cpp +) + +target_include_directories(variorum_provider_test PRIVATE + ../provider +) + +# Use Variorum configuration from root CMake +if(KOKKOSTOOLS_HAS_VARIORUM) + message(STATUS "Using Variorum configuration from root CMake, making Variorum tests available.") + target_link_libraries(variorum_provider_test PRIVATE variorum::variorum) + add_test(NAME variorum_provider_test COMMAND variorum_provider_test) # Register test if Variorum is found +else() + message(STATUS "Variorum not available from root CMake, Variorum tests may not work properly.") +endif() + +# Daemon Variorum Integration test +add_executable(daemon_variorum_integration_test + daemon_variorum_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp +) + +target_include_directories(daemon_variorum_integration_test PRIVATE + ../common + ../provider +) + +if(KOKKOSTOOLS_HAS_VARIORUM) + target_link_libraries(daemon_variorum_integration_test PRIVATE variorum::variorum Threads::Threads) + add_test(NAME daemon_variorum_integration_test COMMAND daemon_variorum_integration_test) # Register test if Variorum is found +else() + target_link_libraries(daemon_variorum_integration_test PRIVATE Threads::Threads) +endif() + +add_executable(daemon_variorum_fast_test + daemon_variorum_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp +) + +target_include_directories(daemon_variorum_fast_test PRIVATE + ../common + ../provider +) + +if(KOKKOSTOOLS_HAS_VARIORUM) + target_link_libraries(daemon_variorum_fast_test PRIVATE variorum::variorum Threads::Threads) + add_test(NAME daemon_variorum_fast_test COMMAND daemon_variorum_fast_test) # Register test if Variorum is found +else() + target_link_libraries(daemon_variorum_fast_test PRIVATE Threads::Threads) +endif() + +add_executable(timer_test + timer_test.cpp + ../common/timer.cpp +) + +target_include_directories(timer_test PRIVATE + ../common + ../tools +) + +add_test(NAME timer_test COMMAND timer_test) + +# CSV export test +add_executable(csv_export_test + csv_export_test.cpp + ../common/timer.cpp + ../common/filename_prefix.cpp + ../tools/kernel_timer_tool.cpp +) + +target_include_directories(csv_export_test PRIVATE + ../common + ../tools +) + +add_test(NAME csv_export_test COMMAND csv_export_test) \ No newline at end of file diff --git a/profiling/energy-profiler/tests/csv_export_test.cpp b/profiling/energy-profiler/tests/csv_export_test.cpp new file mode 100644 index 000000000..366f33795 --- /dev/null +++ b/profiling/energy-profiler/tests/csv_export_test.cpp @@ -0,0 +1,68 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +int main() { + std::cout << "Testing CSV export functions..." << std::endl; + + KernelTimerTool timer; + + // Simulate some kernel operations + timer.start_region("test_kernel_1", RegionType::ParallelFor, 1); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.end_region(); + + timer.start_region("test_kernel_2", RegionType::ParallelReduce, 2); + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + timer.end_region(); + + timer.start_region("test_region", RegionType::UserRegion, 3); + std::this_thread::sleep_for(std::chrono::milliseconds(15)); + timer.end_region(); + + timer.start_region("test_deepcopy", RegionType::DeepCopy, 4); + std::this_thread::sleep_for(std::chrono::milliseconds(8)); + timer.end_region(); + + // Test the CSV export functions + const auto& kernels = timer.get_kernel_timings(); + const auto& regions = timer.get_region_timings(); + const auto& deepcopies = timer.get_deep_copy_timings(); + + std::cout << "Found " << kernels.size() << " kernels" << std::endl; + std::cout << "Found " << regions.size() << " regions" << std::endl; + std::cout << "Found " << deepcopies.size() << " deep copies" << std::endl; + + // Test export functions + KokkosTools::Timer::export_kernels_csv(kernels, "test_kernels.csv"); + KokkosTools::Timer::export_regions_csv(regions, "test_regions.csv"); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, "test_deepcopies.csv"); + + // Test print functions + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + + std::cout << "CSV export test completed successfully!" << std::endl; + + return 0; +} diff --git a/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp b/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp new file mode 100644 index 000000000..20a62b864 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" + +// Global variables for the monitoring function +static NVMLProvider* g_nvml_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_min_power{std::numeric_limits::max()}; +static std::atomic g_max_power{0.0}; +static std::vector g_power_samples; +static std::mutex g_samples_mutex; + +void fast_power_monitoring_function() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + + // Update statistics atomically + g_sample_count.fetch_add(1); + + // Accumulate energy (Power * Time) + // Since we sample every 20ms, energy increment = power * 0.02 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 0.02)) { + // Loop until successful update + } + + // Update min power + double current_min = g_min_power.load(); + while (current_power < current_min && + !g_min_power.compare_exchange_weak(current_min, current_power)) { + // Loop until successful update + } + + // Update max power + double current_max = g_max_power.load(); + while (current_power > current_max && + !g_max_power.compare_exchange_weak(current_max, current_power)) { + // Loop until successful update + } + + // Store sample for statistical analysis (thread-safe) + { + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(current_power); + } +} + +double calculate_standard_deviation(const std::vector& samples, + double mean) { + if (samples.size() <= 1) return 0.0; + + double sum_squared_diff = 0.0; + for (double sample : samples) { + double diff = sample - mean; + sum_squared_diff += diff * diff; + } + + return std::sqrt(sum_squared_diff / (samples.size() - 1)); +} + +bool test_daemon_nvml_fast_integration() { + std::cout << "=== Fast Daemon + NVML Integration Test (20ms sampling) ===" + << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_min_power = std::numeric_limits::max(); + g_max_power = 0.0; + g_power_samples.clear(); + + // Initialize NVML provider + std::cout << "\n1. Initializing NVML provider..." << std::endl; + NVMLProvider nvml_provider; + if (!nvml_provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return false; + } + + g_nvml_provider = &nvml_provider; + std::cout << "SUCCESS: NVML provider initialized with " + << nvml_provider.get_device_count() << " device(s)" << std::endl; + + // Create daemon with 20ms interval + std::cout << "\n2. Creating daemon with 20ms monitoring interval..." + << std::endl; + Daemon power_daemon(fast_power_monitoring_function, 20); + + // Start monitoring + std::cout << "\n3. Starting fast power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Fast power monitoring started" << std::endl; + + // Let it run for 10 seconds + std::cout << "\n4. Monitoring for 2 seconds (high frequency sampling)..." + << std::endl; + std::cout << " (No real-time output to avoid saturation)" << std::endl; + + auto start_time = std::chrono::high_resolution_clock::now(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + auto end_time = std::chrono::high_resolution_clock::now(); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Calculate actual monitoring duration + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + // Analyze collected data + std::cout << "\n=== Statistical Analysis ===" << std::endl; + + uint32_t total_samples = g_sample_count.load(); + double total_energy = g_total_energy.load(); + double min_power = g_min_power.load(); + double max_power = g_max_power.load(); + + std::cout << "Monitoring duration: " << actual_duration.count() << " ms" + << std::endl; + std::cout << "Total samples collected: " << total_samples << std::endl; + std::cout << "Expected samples (50 Hz): " << (actual_duration.count() / 20) + << std::endl; + std::cout << "Sampling efficiency: " << std::fixed << std::setprecision(1) + << (100.0 * total_samples / (actual_duration.count() / 20.0)) << "%" + << std::endl; + + if (total_samples > 0) { + double avg_power = total_energy / (total_samples * 0.02); + + std::cout << "\n=== Power Statistics ===" << std::endl; + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + std::cout << "Minimum power: " << std::fixed << std::setprecision(2) + << min_power << " W" << std::endl; + std::cout << "Maximum power: " << std::fixed << std::setprecision(2) + << max_power << " W" << std::endl; + std::cout << "Power range: " << std::fixed << std::setprecision(2) + << (max_power - min_power) << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(3) + << total_energy << " J" << std::endl; + + // Calculate additional statistics from stored samples + { + std::lock_guard lock(g_samples_mutex); + if (!g_power_samples.empty()) { + std::sort(g_power_samples.begin(), g_power_samples.end()); + + size_t n = g_power_samples.size(); + double median = + (n % 2 == 0) + ? (g_power_samples[n / 2 - 1] + g_power_samples[n / 2]) / 2.0 + : g_power_samples[n / 2]; + + double q1 = g_power_samples[n / 4]; + double q3 = g_power_samples[3 * n / 4]; + + double std_dev = + calculate_standard_deviation(g_power_samples, avg_power); + + std::cout << "\n=== Extended Statistics ===" << std::endl; + std::cout << "Median power: " << std::fixed << std::setprecision(2) + << median << " W" << std::endl; + std::cout << "Q1 (25th percentile): " << std::fixed + << std::setprecision(2) << q1 << " W" << std::endl; + std::cout << "Q3 (75th percentile): " << std::fixed + << std::setprecision(2) << q3 << " W" << std::endl; + std::cout << "Standard deviation: " << std::fixed + << std::setprecision(2) << std_dev << " W" << std::endl; + std::cout << "Coefficient of variation: " << std::fixed + << std::setprecision(1) << (100.0 * std_dev / avg_power) + << "%" << std::endl; + } + } + + // Show per-device breakdown if multiple devices + size_t device_count = nvml_provider.get_device_count(); + if (device_count > 1) { + std::cout << "\n=== Per-Device Final Readings ===" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + double device_power = nvml_provider.get_device_power_usage(i); + std::string device_name = nvml_provider.get_device_name(i); + std::cout << " " << device_name << ": " << std::fixed + << std::setprecision(2) << device_power << " W" << std::endl; + } + } + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_nvml_provider = nullptr; + nvml_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_nvml_fast_integration()) { + std::cout << "\nFast integration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nFast integration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp b/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp new file mode 100644 index 000000000..9f052c2b4 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" + +// Global variables for the monitoring function +static NVMLProvider* g_nvml_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_last_power{0.0}; + +void power_monitoring_function() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + std::cout << "ERROR: NVML provider not initialized" << std::endl; + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + g_last_power.store(current_power); + + // Accumulate energy (Power * Time) + // Since we sample every 1000ms, energy increment = power * 1.0 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 1.0)) { + // Loop until successful update + } + + uint32_t sample_num = g_sample_count.fetch_add(1) + 1; + + std::cout << std::fixed << std::setprecision(2) << "Sample #" << sample_num + << " - Power: " << current_power << " W" + << " - Total Energy: " << g_total_energy.load() << " J" + << std::endl; + + // Display individual device power if multiple devices + size_t device_count = g_nvml_provider->get_device_count(); + if (device_count > 1) { + for (size_t i = 0; i < device_count; ++i) { + double device_power = g_nvml_provider->get_device_power_usage(i); + if (device_power >= 0.0) { + std::cout << " Device " << i << " (" + << g_nvml_provider->get_device_name(i) + << "): " << device_power << " W" << std::endl; + } + } + } +} + +bool test_daemon_nvml_integration() { + std::cout << "=== Daemon + NVML Integration Test ===" << std::endl; + + // Initialize NVML provider + std::cout << "\n1. Initializing NVML provider..." << std::endl; + NVMLProvider nvml_provider; + if (!nvml_provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return false; + } + + g_nvml_provider = &nvml_provider; + std::cout << "SUCCESS: NVML provider initialized with " + << nvml_provider.get_device_count() << " device(s)" << std::endl; + + // Create daemon with 1-second interval + std::cout << "\n2. Creating daemon with 1-second monitoring interval..." + << std::endl; + Daemon power_daemon(power_monitoring_function, 1000); + + // Start monitoring + std::cout << "\n3. Starting power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Display final statistics + std::cout << "\n=== Final Statistics ===" << std::endl; + std::cout << "Total samples: " << g_sample_count.load() << std::endl; + std::cout << "Last power reading: " << std::fixed << std::setprecision(2) + << g_last_power.load() << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(2) + << g_total_energy.load() << " J" << std::endl; + + if (g_sample_count.load() > 0) { + double avg_power = g_total_energy.load() / g_sample_count.load(); + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_nvml_provider = nullptr; + nvml_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_nvml_integration()) { + std::cout << "\nIntegration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nIntegration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_test.cpp b/profiling/energy-profiler/tests/daemon_test.cpp new file mode 100644 index 000000000..f33a6da01 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_test.cpp @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" + +// Test counters and flags +static std::atomic counter{0}; +static std::atomic fast_counter{0}; +static std::atomic slow_counter{0}; +static std::atomic exception_thrown{false}; + +// Test functions +void hello_world() { + std::cout << "Hello World (execution #" << counter.load() + 1 << ")" + << std::endl; + counter++; +} + +void fast_function() { + fast_counter++; + // Very fast function (< 1ms) +} + +void slow_function() { + slow_counter++; + // Simulate a function that takes longer than interval + std::this_thread::sleep_for(std::chrono::milliseconds(150)); +} + +void exception_function() { + exception_thrown = true; + throw std::runtime_error("Test exception in daemon function"); +} + +// Test utilities +bool test_basic_functionality() { + std::cout << "\n=== Test 1: Basic Functionality ===" << std::endl; + + counter = 0; + Daemon daemon(hello_world, 100); + + // Test initial state + assert(!daemon.is_running()); + + std::cout << "Starting daemon..." << std::endl; + daemon.start(); + + // Test running state + assert(daemon.is_running()); + + // Let it run for ~350ms (should execute ~3-4 times) + std::this_thread::sleep_for(std::chrono::milliseconds(350)); + + daemon.stop(); + + // Test stopped state + assert(!daemon.is_running()); + + uint32_t final_count = counter.load(); + std::cout << "Daemon finished. Counter: " << final_count << std::endl; + + // Should have executed 3-4 times (allowing some tolerance for timing) + bool success = (final_count >= 3 && final_count <= 4); + std::cout << "Test 1 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_timing_accuracy() { + std::cout << "\n=== Test 2: Timing Accuracy ===" << std::endl; + + fast_counter = 0; + Daemon daemon(fast_function, 50); // 50ms interval + + auto start_time = std::chrono::high_resolution_clock::now(); + daemon.start(); + + // Run for exactly 250ms + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + + daemon.stop(); + auto end_time = std::chrono::high_resolution_clock::now(); + + uint32_t executions = fast_counter.load(); + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + std::cout << "Executions: " << executions << std::endl; + std::cout << "Actual duration: " << actual_duration.count() << "ms" + << std::endl; + + // Should execute ~5 times (250ms / 50ms = 5) + bool success = (executions >= 4 && executions <= 6); + std::cout << "Test 2 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_slow_function_handling() { + std::cout << "\n=== Test 3: Slow Function Handling ===" << std::endl; + + slow_counter = 0; + Daemon daemon(slow_function, + 100); // 100ms interval, but function takes 150ms + + auto start_time = std::chrono::high_resolution_clock::now(); + daemon.start(); + + // Run for 400ms + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + + daemon.stop(); + auto end_time = std::chrono::high_resolution_clock::now(); + + uint32_t executions = slow_counter.load(); + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + std::cout << "Executions: " << executions << std::endl; + std::cout << "Actual duration: " << actual_duration.count() << "ms" + << std::endl; + + // Should execute 2-3 times (each execution takes ~150ms, total time ~400ms) + bool success = (executions >= 2 && executions <= 3); + std::cout << "Test 3 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_start_stop_edge_cases() { + std::cout << "\n=== Test 4: Start/Stop Edge Cases ===" << std::endl; + + Daemon daemon(hello_world, 1000); + bool success = true; + + // Test double start + try { + daemon.start(); + daemon.start(); // Should throw + success = false; + std::cout << "ERROR: Double start should have thrown exception" + << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Double start correctly threw: " << e.what() << std::endl; + } + + daemon.stop(); + + // Test double stop + try { + daemon.stop(); // Should throw + success = false; + std::cout << "ERROR: Double stop should have thrown exception" << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Double stop correctly threw: " << e.what() << std::endl; + } + + // Test stop without start + Daemon daemon2(hello_world, 1000); + try { + daemon2.stop(); // Should throw + success = false; + std::cout << "ERROR: Stop without start should have thrown exception" + << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Stop without start correctly threw: " << e.what() + << std::endl; + } + + std::cout << "Test 4 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_thread_safety() { + std::cout << "\n=== Test 5: Thread Safety ===" << std::endl; + + counter = 0; + Daemon daemon(hello_world, 200); // Fast interval + + daemon.start(); + + // Check is_running from main thread while daemon is running + bool running_check1 = daemon.is_running(); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + bool running_check2 = daemon.is_running(); + + daemon.stop(); + bool running_check3 = daemon.is_running(); + + bool success = running_check1 && running_check2 && !running_check3; + std::cout << "Running state checks: " << running_check1 << ", " + << running_check2 << ", " << running_check3 << std::endl; + std::cout << "Executions during test: " << counter.load() << std::endl; + std::cout << "Test 5 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +int main() { + std::cout << "=== Daemon Comprehensive Test Suite ===" << std::endl; + + int passed = 0; + int total = 5; + + if (test_basic_functionality()) passed++; + if (test_timing_accuracy()) passed++; + if (test_slow_function_handling()) passed++; + if (test_start_stop_edge_cases()) passed++; + if (test_thread_safety()) passed++; + + std::cout << "\n=== Test Results ===" << std::endl; + std::cout << "Passed: " << passed << "/" << total << std::endl; + + if (passed == total) { + std::cout << "ALL TESTS PASSED! Daemon is working correctly." << std::endl; + return 0; + } else { + std::cout << "Some tests failed. Please check the daemon implementation." + << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp b/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp new file mode 100644 index 000000000..747fa3368 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_variorum.hpp" + +// Global variables for the monitoring function +static VariorumProvider* g_variorum_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_min_power{std::numeric_limits::max()}; +static std::atomic g_max_power{0.0}; +static std::vector g_power_samples; +static std::mutex g_samples_mutex; + +void fast_power_monitoring_function() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + return; + } + + double current_power = g_variorum_provider->get_total_power_usage(); + + // Update statistics atomically + g_sample_count.fetch_add(1); + + // Accumulate energy (Power * Time) + // Since we sample every 20ms, energy increment = power * 0.02 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 0.02)) { + // Loop until successful update + } + + // Update min power + double current_min = g_min_power.load(); + while (current_power < current_min && + !g_min_power.compare_exchange_weak(current_min, current_power)) { + // Loop until successful update + } + + // Update max power + double current_max = g_max_power.load(); + while (current_power > current_max && + !g_max_power.compare_exchange_weak(current_max, current_power)) { + // Loop until successful update + } + + // Store sample for statistical analysis (thread-safe) + { + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(current_power); + } +} + +double calculate_standard_deviation(const std::vector& samples, + double mean) { + if (samples.size() <= 1) return 0.0; + + double sum_squared_diff = 0.0; + for (double sample : samples) { + double diff = sample - mean; + sum_squared_diff += diff * diff; + } + + return std::sqrt(sum_squared_diff / (samples.size() - 1)); +} + +bool test_daemon_variorum_fast_integration() { + std::cout << "=== Fast Daemon + Variorum Integration Test (20ms sampling) ===" + << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_min_power = std::numeric_limits::max(); + g_max_power = 0.0; + g_power_samples.clear(); + + // Initialize Variorum provider + std::cout << "\n1. Initializing Variorum provider..." << std::endl; + VariorumProvider variorum_provider; + if (!variorum_provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return false; + } + + g_variorum_provider = &variorum_provider; + std::cout << "SUCCESS: Variorum provider initialized with " + << variorum_provider.get_device_count() << " device(s)" + << std::endl; + + // Create daemon with 20ms interval + std::cout << "\n2. Creating daemon with 20ms monitoring interval..." + << std::endl; + Daemon power_daemon(fast_power_monitoring_function, 20); + + // Start monitoring + std::cout << "\n3. Starting fast power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Fast power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds (high frequency sampling)..." + << std::endl; + std::cout << " (No real-time output to avoid saturation)" << std::endl; + + auto start_time = std::chrono::high_resolution_clock::now(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + auto end_time = std::chrono::high_resolution_clock::now(); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Calculate actual monitoring duration + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + // Analyze collected data + std::cout << "\n=== Statistical Analysis ===" << std::endl; + + uint32_t total_samples = g_sample_count.load(); + double total_energy = g_total_energy.load(); + double min_power = g_min_power.load(); + double max_power = g_max_power.load(); + + std::cout << "Monitoring duration: " << actual_duration.count() << " ms" + << std::endl; + std::cout << "Total samples collected: " << total_samples << std::endl; + std::cout << "Expected samples (50 Hz): " << (actual_duration.count() / 20) + << std::endl; + std::cout << "Sampling efficiency: " << std::fixed << std::setprecision(1) + << (100.0 * total_samples / (actual_duration.count() / 20.0)) << "%" + << std::endl; + + if (total_samples > 0) { + double avg_power = total_energy / (total_samples * 0.02); + + std::cout << "\n=== Power Statistics ===" << std::endl; + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + std::cout << "Minimum power: " << std::fixed << std::setprecision(2) + << min_power << " W" << std::endl; + std::cout << "Maximum power: " << std::fixed << std::setprecision(2) + << max_power << " W" << std::endl; + std::cout << "Power range: " << std::fixed << std::setprecision(2) + << (max_power - min_power) << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(3) + << total_energy << " J" << std::endl; + + // Calculate additional statistics from stored samples + { + std::lock_guard lock(g_samples_mutex); + if (!g_power_samples.empty()) { + std::sort(g_power_samples.begin(), g_power_samples.end()); + + size_t n = g_power_samples.size(); + double median = + (n % 2 == 0) + ? (g_power_samples[n / 2 - 1] + g_power_samples[n / 2]) / 2.0 + : g_power_samples[n / 2]; + + double q1 = g_power_samples[n / 4]; + double q3 = g_power_samples[3 * n / 4]; + + double std_dev = + calculate_standard_deviation(g_power_samples, avg_power); + + std::cout << "\n=== Extended Statistics ===" << std::endl; + std::cout << "Median power: " << std::fixed << std::setprecision(2) + << median << " W" << std::endl; + std::cout << "Q1 (25th percentile): " << std::fixed + << std::setprecision(2) << q1 << " W" << std::endl; + std::cout << "Q3 (75th percentile): " << std::fixed + << std::setprecision(2) << q3 << " W" << std::endl; + std::cout << "Standard deviation: " << std::fixed + << std::setprecision(2) << std_dev << " W" << std::endl; + std::cout << "Coefficient of variation: " << std::fixed + << std::setprecision(1) << (100.0 * std_dev / avg_power) + << "%" << std::endl; + } + } + + // Show per-device breakdown if multiple devices + size_t device_count = variorum_provider.get_device_count(); + if (device_count > 1) { + std::cout << "\n=== Per-Device Final Readings ===" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + double device_power = variorum_provider.get_device_power_usage(i); + std::string device_name = variorum_provider.get_device_name(i); + std::cout << " " << device_name << ": " << std::fixed + << std::setprecision(2) << device_power << " W" << std::endl; + } + } + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_variorum_provider = nullptr; + variorum_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_variorum_fast_integration()) { + std::cout << "\nFast integration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nFast integration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp b/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp new file mode 100644 index 000000000..fbbb1dfb6 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_variorum.hpp" + +// Global variables for the monitoring function +static VariorumProvider* g_variorum_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_last_power{0.0}; + +void power_monitoring_function() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + std::cout << "ERROR: Variorum provider not initialized" << std::endl; + return; + } + + double current_power = g_variorum_provider->get_total_power_usage(); + g_last_power.store(current_power); + + // Accumulate energy (Power * Time) + // Since we sample every 1000ms, energy increment = power * 1.0 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 1.0)) { + // Loop until successful update + } + + uint32_t sample_num = g_sample_count.fetch_add(1) + 1; + + std::cout << std::fixed << std::setprecision(2) << "Sample " << sample_num + << ": " << current_power + << " W (Total Energy: " << g_total_energy.load() << " J)" + << std::endl; + + // Show individual device power if multiple devices + size_t device_count = g_variorum_provider->get_device_count(); + if (device_count > 1) { + for (size_t i = 0; i < device_count; ++i) { + double device_power = g_variorum_provider->get_device_power_usage(i); + if (device_power >= 0.0) { + std::cout << " " << g_variorum_provider->get_device_name(i) << ": " + << device_power << " W" << std::endl; + } + } + } +} + +bool test_daemon_variorum_integration() { + std::cout << "=== Daemon + Variorum Integration Test ===" << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_last_power = 0.0; + + // Initialize Variorum provider + std::cout << "\n1. Initializing Variorum provider..." << std::endl; + VariorumProvider variorum_provider; + if (!variorum_provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return false; + } + + g_variorum_provider = &variorum_provider; + std::cout << "SUCCESS: Variorum provider initialized with " + << variorum_provider.get_device_count() << " device(s)" + << std::endl; + + // Create daemon with 1-second interval + std::cout << "\n2. Creating daemon with 1-second monitoring interval..." + << std::endl; + Daemon power_daemon(power_monitoring_function, 1000); + + // Start monitoring + std::cout << "\n3. Starting power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Display final statistics + std::cout << "\n=== Final Statistics ===" << std::endl; + std::cout << "Total samples: " << g_sample_count.load() << std::endl; + std::cout << "Last power reading: " << std::fixed << std::setprecision(2) + << g_last_power.load() << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(2) + << g_total_energy.load() << " J" << std::endl; + + if (g_sample_count.load() > 0) { + double avg_power = g_total_energy.load() / (g_sample_count.load() * 1.0); + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_variorum_provider = nullptr; + variorum_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_variorum_integration()) { + std::cout << "\nIntegration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nIntegration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/nvml_provider_test.cpp b/profiling/energy-profiler/tests/nvml_provider_test.cpp new file mode 100644 index 000000000..8f5d015ad --- /dev/null +++ b/profiling/energy-profiler/tests/nvml_provider_test.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include "../provider/provider_nvml.hpp" + +void test_nvml_provider() { + std::cout << "=== NVML Provider Test ===" << std::endl; + + NVMLProvider provider; + + // Test initialization + std::cout << "\n1. Testing initialization..." << std::endl; + if (!provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return; + } + std::cout << "SUCCESS: NVML provider initialized successfully" << std::endl; + + // Test device discovery + std::cout << "\n2. Testing device discovery..." << std::endl; + size_t device_count = provider.get_device_count(); + std::cout << "Found " << device_count << " device(s)" << std::endl; + + if (device_count == 0) { + std::cout << "ERROR: No devices found" << std::endl; + return; + } + + // Display device information + std::cout << "\n3. Device information:" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + std::string name = provider.get_device_name(i); + std::cout << " Device " << i << ": " << name << std::endl; + } + + // Test power readings + std::cout << "\n4. Testing power readings..." << std::endl; + for (int sample = 0; sample < 5; ++sample) { + std::cout << "Sample " << (sample + 1) << ":" << std::endl; + + // Individual device power + for (size_t i = 0; i < device_count; ++i) { + double power = provider.get_device_power_usage(i); + if (power >= 0.0) { + std::cout << " Device " << i << ": " << power << " W" << std::endl; + } else { + std::cout << " Device " << i << ": Failed to read power" << std::endl; + } + } + + // Individual device direct power + for (size_t i = 0; i < device_count; ++i) { + double direct_power = provider.get_device_power_usage_direct(i); + if (direct_power >= 0.0) { + std::cout << " Device " << i << " (Direct): " << direct_power << " W" + << std::endl; + } else { + std::cout << " Device " << i + << " (Direct): Failed to read direct power" << std::endl; + } + } + + // Current energy consumption + for (size_t i = 0; i < device_count; ++i) { + double energy = provider.get_current_energy_consumption(i); + if (energy >= 0.0) { + std::cout << " Device " << i << " Energy: " << energy << " J" + << std::endl; + } else { + std::cout << " Device " << i << " Energy: Failed to read energy" + << std::endl; + } + } + + // Total power + double total_power = provider.get_total_power_usage(); + std::cout << " Total Power: " << total_power << " W" << std::endl; + + if (sample < 4) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + // Test finalization + std::cout << "\n5. Testing finalization..." << std::endl; + provider.finalize(); + std::cout << "SUCCESS: NVML provider finalized successfully" << std::endl; + + std::cout << "\n=== Test Completed ===" << std::endl; +} + +int main() { + try { + test_nvml_provider(); + return 0; + } catch (const std::exception& e) { + std::cerr << "ERROR: Test failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/timer_test.cpp b/profiling/energy-profiler/tests/timer_test.cpp new file mode 100644 index 000000000..61a9a3cc2 --- /dev/null +++ b/profiling/energy-profiler/tests/timer_test.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include "../common/timer.hpp" + +// Test helper function to check if a value is within expected range +bool is_within_range(uint64_t actual, uint64_t expected, uint64_t tolerance) { + return (actual >= expected - tolerance) && (actual <= expected + tolerance); +} + +bool test_basic_timing() { + std::cout << "=== Test Basic Timing ===" << std::endl; + + EnergyTimer timer; + + // Test single timing + timer.start_timing(1, RegionType::ParallelFor, "test_kernel"); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(1); + + auto& timings = timer.get_timings(); + if (timings.size() != 1) { + std::cout << "ERROR: Expected 1 timing, got " << timings.size() + << std::endl; + return false; + } + + auto& timing = timings[1]; + if (!timing.is_ended()) { + std::cout << "ERROR: Timing should be ended" << std::endl; + return false; + } + + uint64_t duration = timing.get_duration_ms(); + if (!is_within_range(duration, 2, 2)) { // 2ms ± 2ms tolerance + std::cout << "ERROR: Duration should be ~2ms, got " << duration << "ms" + << std::endl; + return false; + } + + if (timing.name_ != "test_kernel") { + std::cout << "ERROR: Wrong name, expected 'test_kernel', got '" + << timing.name_ << "'" << std::endl; + return false; + } + + if (timing.region_type_ != RegionType::ParallelFor) { + std::cout << "ERROR: Wrong region type" << std::endl; + return false; + } + + std::cout << "SUCCESS: Basic timing works correctly (duration: " << duration + << "ms)" << std::endl; + return true; +} + +bool test_multiple_timings() { + std::cout << "\n=== Test Multiple Timings ===" << std::endl; + + EnergyTimer timer; + + // Start multiple timings + timer.start_timing(1, RegionType::ParallelFor, "kernel_1"); + timer.start_timing(2, RegionType::ParallelReduce, "kernel_2"); + timer.start_timing(3, RegionType::UserRegion, "region_1"); + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + timer.end_timing(1); + + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(2); + + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(3); + + auto& timings = timer.get_timings(); + if (timings.size() != 3) { + std::cout << "ERROR: Expected 3 timings, got " << timings.size() + << std::endl; + return false; + } + + // Check individual durations + uint64_t duration1 = timings[1].get_duration_ms(); + uint64_t duration2 = timings[2].get_duration_ms(); + uint64_t duration3 = timings[3].get_duration_ms(); + + if (!is_within_range(duration1, 1, 1)) { + std::cout << "ERROR: Duration1 should be ~1ms, got " << duration1 << "ms" + << std::endl; + return false; + } + + if (!is_within_range(duration2, 3, 2)) { // 1 + 2 = 3ms + std::cout << "ERROR: Duration2 should be ~3ms, got " << duration2 << "ms" + << std::endl; + return false; + } + + if (!is_within_range(duration3, 5, 2)) { // 1 + 2 + 2 = 5ms + std::cout << "ERROR: Duration3 should be ~5ms, got " << duration3 << "ms" + << std::endl; + return false; + } + + // Check that duration2 > duration1 and duration3 > duration2 + if (duration2 <= duration1) { + std::cout << "ERROR: Duration2 should be greater than duration1" + << std::endl; + return false; + } + + if (duration3 <= duration2) { + std::cout << "ERROR: Duration3 should be greater than duration2" + << std::endl; + return false; + } + + std::cout << "SUCCESS: Multiple timings work correctly" << std::endl; + std::cout << " Duration1: " << duration1 << "ms" << std::endl; + std::cout << " Duration2: " << duration2 << "ms" << std::endl; + std::cout << " Duration3: " << duration3 << "ms" << std::endl; + return true; +} + +bool test_region_types() { + std::cout << "\n=== Test Region Types ===" << std::endl; + + EnergyTimer timer; + + // Test all region types + timer.start_timing(1, RegionType::ParallelFor, "parallel_for"); + timer.start_timing(2, RegionType::ParallelScan, "parallel_scan"); + timer.start_timing(3, RegionType::ParallelReduce, "parallel_reduce"); + timer.start_timing(4, RegionType::DeepCopy, "deep_copy"); + timer.start_timing(5, RegionType::UserRegion, "user_region"); + timer.start_timing(6, RegionType::Unknown, "unknown_op"); + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + timer.end_timing(1); + timer.end_timing(2); + timer.end_timing(3); + timer.end_timing(4); + timer.end_timing(5); + timer.end_timing(6); + + auto& timings = timer.get_timings(); + if (timings.size() != 6) { + std::cout << "ERROR: Expected 6 timings, got " << timings.size() + << std::endl; + return false; + } + + // Verify region types + if (timings[1].region_type_ != RegionType::ParallelFor || + timings[2].region_type_ != RegionType::ParallelScan || + timings[3].region_type_ != RegionType::ParallelReduce || + timings[4].region_type_ != RegionType::DeepCopy || + timings[5].region_type_ != RegionType::UserRegion || + timings[6].region_type_ != RegionType::Unknown) { + std::cout << "ERROR: Region types not correctly set" << std::endl; + return false; + } + + // Verify names + if (timings[1].name_ != "parallel_for" || + timings[2].name_ != "parallel_scan" || + timings[3].name_ != "parallel_reduce" || + timings[4].name_ != "deep_copy" || timings[5].name_ != "user_region" || + timings[6].name_ != "unknown_op") { + std::cout << "ERROR: Names not correctly set" << std::endl; + return false; + } + + std::cout << "SUCCESS: All region types work correctly" << std::endl; + return true; +} + +bool test_error_handling() { + std::cout << "\n=== Test Error Handling ===" << std::endl; + + EnergyTimer timer; + + // Test ending non-existent timing (should not crash) + timer.end_timing(999); // This should not crash + + // Test getting duration before ending + timer.start_timing(1, RegionType::ParallelFor, "test"); + auto& timings = timer.get_timings(); + + if (timings[1].is_ended()) { + std::cout << "ERROR: Timing should not be ended yet" << std::endl; + return false; + } + + // End the timing + timer.end_timing(1); + + if (!timings[1].is_ended()) { + std::cout << "ERROR: Timing should be ended now" << std::endl; + return false; + } + + // Test ending the same timing twice (should not crash) + timer.end_timing(1); + + std::cout << "SUCCESS: Error handling works correctly" << std::endl; + return true; +} + +bool test_precision() { + std::cout << "\n=== Test Precision ===" << std::endl; + + EnergyTimer timer; + + // Test very short timing (should be 0 or 1 ms) + timer.start_timing(1, RegionType::ParallelFor, "short_op"); + // No sleep - immediate end + timer.end_timing(1); + + auto& timings = timer.get_timings(); + uint64_t short_duration = timings[1].get_duration_ms(); + + if (short_duration > 2) { // Should be very small + std::cout << "WARNING: Short duration is " << short_duration + << "ms (expected ≤2ms)" << std::endl; + } + + // Test longer timing for better precision + timer.start_timing(2, RegionType::ParallelFor, "long_op"); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.end_timing(2); + + uint64_t long_duration = timings[2].get_duration_ms(); + + if (!is_within_range(long_duration, 10, 5)) { + std::cout << "ERROR: Long duration should be ~10ms, got " << long_duration + << "ms" << std::endl; + return false; + } + + std::cout << "SUCCESS: Precision test passed" << std::endl; + std::cout << " Short duration: " << short_duration << "ms" << std::endl; + std::cout << " Long duration: " << long_duration << "ms" << std::endl; + return true; +} + +bool test_concurrent_timings() { + std::cout << "\n=== Test Concurrent Timings ===" << std::endl; + + EnergyTimer timer; + + // Start overlapping timings + timer.start_timing(1, RegionType::ParallelFor, "outer"); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + timer.start_timing(2, RegionType::ParallelReduce, "inner"); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(2); // End inner first + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + timer.end_timing(1); // End outer last + + auto& timings = timer.get_timings(); + uint64_t outer_duration = timings[1].get_duration_ms(); + uint64_t inner_duration = timings[2].get_duration_ms(); + + // Outer should be longer than inner + if (outer_duration <= inner_duration) { + std::cout << "ERROR: Outer duration (" << outer_duration + << "ms) should be greater than inner duration (" << inner_duration + << "ms)" << std::endl; + return false; + } + + // Check approximate durations + if (!is_within_range(inner_duration, 2, 2)) { + std::cout << "ERROR: Inner duration should be ~2ms, got " << inner_duration + << "ms" << std::endl; + return false; + } + + if (!is_within_range(outer_duration, 4, 2)) { // 1 + 2 + 1 = 4ms + std::cout << "ERROR: Outer duration should be ~4ms, got " << outer_duration + << "ms" << std::endl; + return false; + } + + std::cout << "SUCCESS: Concurrent timings work correctly" << std::endl; + std::cout << " Outer duration: " << outer_duration << "ms" << std::endl; + std::cout << " Inner duration: " << inner_duration << "ms" << std::endl; + return true; +} + +bool very_long_timing() { + std::cout << "\n=== Test Very Long Timing ===" << std::endl; + + EnergyTimer timer; + + timer.start_timing(1, RegionType::ParallelFor, "very_long_op"); + std::this_thread::sleep_for( + std::chrono::milliseconds(50)); // Sleep for 50ms instead of 1 second + timer.end_timing(1); + + auto& timings = timer.get_timings(); + uint64_t duration = timings[1].get_duration_ms(); + + if (!is_within_range(duration, 50, 10)) { // Allow some margin of error + std::cout << "ERROR: Duration should be ~50ms, got " << duration << "ms" + << std::endl; + return false; + } + + std::cout << "SUCCESS: Very long timing works correctly (duration: " + << duration << "ms)" << std::endl; + return true; +} + +int main() { + std::cout << "Running EnergyTimer Tests..." << std::endl; + std::cout << "=============================" << std::endl; + + bool all_passed = true; + + all_passed &= test_basic_timing(); + all_passed &= test_multiple_timings(); + all_passed &= test_region_types(); + all_passed &= test_error_handling(); + all_passed &= test_precision(); + all_passed &= test_concurrent_timings(); + all_passed &= very_long_timing(); + + std::cout << "\n=============================" << std::endl; + if (all_passed) { + std::cout << "ALL TESTS PASSED!" << std::endl; + return 0; + } else { + std::cout << "SOME TESTS FAILED!" << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/variorum_provider_test.cpp b/profiling/energy-profiler/tests/variorum_provider_test.cpp new file mode 100644 index 000000000..8dae4a4a5 --- /dev/null +++ b/profiling/energy-profiler/tests/variorum_provider_test.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include "../provider/provider_variorum.hpp" + +void test_variorum_provider() { + std::cout << "=== Variorum Provider Test ===" << std::endl; + + VariorumProvider provider; + + // Test initialization + std::cout << "\n1. Testing initialization..." << std::endl; + if (!provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return; + } + std::cout << "SUCCESS: Variorum provider initialized successfully" + << std::endl; + + // Test device discovery + std::cout << "\n2. Testing device discovery..." << std::endl; + size_t device_count = provider.get_device_count(); + std::cout << "Found " << device_count << " device(s)" << std::endl; + + if (device_count == 0) { + std::cout << "ERROR: No devices found" << std::endl; + return; + } + + // Display device information + std::cout << "\n3. Device information:" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + std::string name = provider.get_device_name(i); + std::cout << " Device " << i << ": " << name << std::endl; + } + + // Test power readings + std::cout << "\n4. Testing power readings..." << std::endl; + for (int sample = 0; sample < 5; ++sample) { + std::cout << "Sample " << (sample + 1) << ":" << std::endl; + + // Individual device power + for (size_t i = 0; i < device_count; ++i) { + double power = provider.get_device_power_usage(i); + if (power >= 0.0) { + std::cout << " Device " << i << ": " << power << " W" << std::endl; + } else { + std::cout << " Device " << i << ": Failed to read power" << std::endl; + } + } + + // Total power + double total_power = provider.get_total_power_usage(); + std::cout << " Total Power: " << total_power << " W" << std::endl; + + if (sample < 4) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + // Test finalization + std::cout << "\n5. Testing finalization..." << std::endl; + provider.finalize(); + std::cout << "SUCCESS: Variorum provider finalized successfully" << std::endl; + + std::cout << "\n=== Test Completed ===" << std::endl; +} + +int main() { + try { + test_variorum_provider(); + return 0; + } catch (const std::exception& e) { + std::cerr << "ERROR: Test failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tools/kernel_timer_tool.cpp b/profiling/energy-profiler/tools/kernel_timer_tool.cpp new file mode 100644 index 000000000..c39b16087 --- /dev/null +++ b/profiling/energy-profiler/tools/kernel_timer_tool.cpp @@ -0,0 +1,93 @@ +#include "kernel_timer_tool.hpp" +#include +#include + +void KernelTimerTool::init_library( + const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Power Profiler: Initializing with load sequence " + << loadSeq << " and interface version " << interfaceVer + << std::endl; + std::cout << "Kokkos Power Profiler: Library initialized" << std::endl; +} + +void KernelTimerTool::finalize_library() { + // Implementation is empty +} + +void KernelTimerTool::start_region(const std::string& name, RegionType type, + uint64_t id) { + TimingInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + region.id = id; + active_regions_.push_back(region); +} + +void KernelTimerTool::end_region() { + if (!active_regions_.empty()) { + auto region = active_regions_.back(); + active_regions_.pop_back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + if (region.type == RegionType::UserRegion) + completed_regions_.push_back(region); + else if (region.type == RegionType::DeepCopy) + completed_deepcopies_.push_back(region); + else + completed_kernels_.push_back(region); + } +} + +void KernelTimerTool::begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) { + start_region(name, RegionType::ParallelFor, kID); +} + +void KernelTimerTool::end_parallel_for(uint64_t kID) { end_region(); } + +void KernelTimerTool::begin_parallel_scan(const char* name, + const uint32_t devID, uint64_t* kID) { + start_region(name, RegionType::ParallelScan, *kID); +} + +void KernelTimerTool::end_parallel_scan(uint64_t kID) { end_region(); } + +void KernelTimerTool::begin_parallel_reduce(const char* name, + const uint32_t devID, + uint64_t* kID) { + start_region(name, RegionType::ParallelReduce, *kID); +} + +void KernelTimerTool::end_parallel_reduce(uint64_t kID) { end_region(); } + +void KernelTimerTool::begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + start_region(dst_name, RegionType::DeepCopy, + reinterpret_cast(dst_ptr)); +} + +void KernelTimerTool::end_deep_copy() { end_region(); } + +void KernelTimerTool::push_profile_region(const char* region_name) { + start_region(region_name, RegionType::UserRegion, next_region_id_++); +} + +void KernelTimerTool::pop_profile_region() { end_region(); } + +const std::deque& KernelTimerTool::get_kernel_timings() const { + return completed_kernels_; +} + +const std::deque& KernelTimerTool::get_region_timings() const { + return completed_regions_; +} + +const std::deque& KernelTimerTool::get_deep_copy_timings() const { + return completed_deepcopies_; +} diff --git a/profiling/energy-profiler/tools/kernel_timer_tool.hpp b/profiling/energy-profiler/tools/kernel_timer_tool.hpp new file mode 100644 index 000000000..1dd1f0963 --- /dev/null +++ b/profiling/energy-profiler/tools/kernel_timer_tool.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include "../common/tool_interface.hpp" +#include "../common/timer.hpp" + +class KernelTimerTool : public ToolInterface { + public: + KernelTimerTool() = default; + ~KernelTimerTool() override = default; + + void init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) override; + void finalize_library() override; + + // Stack-based timing for robust region/kernel tracking + void start_region(const std::string& name, RegionType type, uint64_t id = 0); + void end_region(); + + // Kokkos interface + void begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) override; + void end_parallel_for(uint64_t kID) override; + + void begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) override; + void end_parallel_scan(uint64_t kID) override; + + void begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) override; + void end_parallel_reduce(uint64_t kID) override; + + void begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) override; + void end_deep_copy() override; + + void push_profile_region(const char* region_name) override; + void pop_profile_region() override; + + // Getters for summary + const std::deque& get_kernel_timings() const; + const std::deque& get_region_timings() const; + const std::deque& get_deep_copy_timings() const; + + private: + std::deque active_regions_; + std::deque completed_kernels_; + std::deque completed_regions_; + std::deque completed_deepcopies_; + uint64_t next_region_id_ = 1; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/CMakeLists.txt b/profiling/energy-profiler/variorum/CMakeLists.txt deleted file mode 100644 index ca923766b..000000000 --- a/profiling/energy-profiler/variorum/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -kp_add_library(kp_power_variorum kp_power_variorum.cpp - variorum_power_profiler.cpp -) - -target_include_directories(kp_power_variorum PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} -) - -target_link_libraries(kp_power_variorum PRIVATE variorum::variorum) - -if(USE_MPI) - target_link_libraries(kp_power_variorum PRIVATE MPI::MPI_CXX) -endif() \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/Makefile b/profiling/energy-profiler/variorum/Makefile deleted file mode 100644 index a2ca30e58..000000000 --- a/profiling/energy-profiler/variorum/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -#Variorum Dependancy -VAR_LIB=-L$(VARIORUM_ROOT)/lib -VAR_INC=-I$(VARIORUM_ROOT)/include -LINK_FLAG=-lvariorum - -CXX=mpicxx -CXXFLAGS=-O3 -std=c++20 -g -SHARED_CXXFLAGS=-shared -fPIC -all: build/kp_power_variorum.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}/../all - -SOURCES = kp_power_variorum.cpp \ - variorum_power_profiler.cpp - -HEADERS = variorum_power_profiler.hpp - -build/kp_power_variorum.so: $(SOURCES) $(HEADERS) | build - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(VAR_LIB) $(VAR_INC) -o $@ $(SOURCES) $(LINK_FLAG) - -build: - mkdir -p build - -clean: - rm -rf build/*.so diff --git a/profiling/energy-profiler/variorum/kp_power_variorum.cpp b/profiling/energy-profiler/variorum/kp_power_variorum.cpp deleted file mode 100644 index c90722a44..000000000 --- a/profiling/energy-profiler/variorum/kp_power_variorum.cpp +++ /dev/null @@ -1,175 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -/** - * Kokkos Power Profiler - Specialized for Variorum - * Simplified version focused on Variorum energy monitoring with integrated - * timing - */ - -#include -#include - -#include "kp_core.hpp" -#include "variorum_power_profiler.hpp" - -namespace KokkosTools { -namespace PowerProfiler { - -// --- Core Initialization --- -VariorumPowerProfiler power_profiler; - -// --- Library Initialization/Finalization --- - -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t devInfoCount, - Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { - printf("-----------------------------------------------------------\n"); - printf( - "KokkosP: Power Profiler (sequence is %d, version: %lu, devices: %u)\n", - loadSeq, interfaceVer, devInfoCount); - printf("-----------------------------------------------------------\n"); - power_profiler.initialize(); -} - -void kokkosp_finalize_library() { - if (power_profiler.is_initialized()) { - power_profiler.finalize(); - } else { - std::cerr - << "PowerProfiler: Core not initialized, skipping finalization.\n"; - } - printf("-----------------------------------------------------------\n"); - printf("KokkosP: Finalization of Power Profiler. Complete.\n"); - printf("-----------------------------------------------------------\n"); -} - -// --- Kernels Launch/End --- - -void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, - uint64_t* kID) { - if (power_profiler.is_initialized()) { - power_profiler.begin_kernel(*kID, std::string(name), KernelType::FOR); - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot begin parallel for.\n"; - } -} - -void kokkosp_end_parallel_for(const uint64_t kID) { - if (power_profiler.is_initialized()) { - power_profiler.end_kernel(kID); - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot end parallel for.\n"; - } -} - -void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, - uint64_t* kID) { - if (power_profiler.is_initialized() && kID) { - power_profiler.begin_kernel(*kID, std::string(name), KernelType::SCAN); - } else { - std::cerr << "PowerProfiler: Core not initialized or kID is null, " - "cannot begin parallel scan.\n"; - } -} - -void kokkosp_end_parallel_scan(const uint64_t kID) { - if (power_profiler.is_initialized()) { - power_profiler.end_kernel(kID); - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot end parallel scan.\n"; - } -} - -void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, - uint64_t* kID) { - if (power_profiler.is_initialized() && kID) { - power_profiler.begin_kernel(*kID, std::string(name), KernelType::REDUCE); - } else { - std::cerr << "PowerProfiler: Core not initialized or kID is null, " - "cannot begin parallel reduce.\n"; - } -} - -void kokkosp_end_parallel_reduce(const uint64_t kID) { - if (power_profiler.is_initialized()) { - power_profiler.end_kernel(kID); - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot end parallel reduce.\n"; - } -} - -void kokkosp_push_profile_region(char const* regionName) { - if (power_profiler.is_initialized()) { - power_profiler.push_region(std::string(regionName)); - // printf("KokkosP: Entering profiling region: %s\n", regionName); - // Commented out to avoid excessive output - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot push profile region.\n"; - } -} - -void kokkosp_pop_profile_region() { - if (power_profiler.is_initialized()) { - power_profiler.pop_region(); - } else { - std::cerr - << "PowerProfiler: Core not initialized, cannot pop profile region.\n"; - } -} - -// --- Event Set Configuration --- - -Kokkos::Tools::Experimental::EventSet get_event_set() { - Kokkos::Tools::Experimental::EventSet my_event_set; - memset(&my_event_set, 0, - sizeof(my_event_set)); // zero any pointers not set here - my_event_set.init = kokkosp_init_library; - my_event_set.finalize = kokkosp_finalize_library; - my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; - my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; - my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; - my_event_set.end_parallel_for = kokkosp_end_parallel_for; - my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; - my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; - my_event_set.push_region = kokkosp_push_profile_region; - my_event_set.pop_region = kokkosp_pop_profile_region; - return my_event_set; -} - -} // namespace PowerProfiler -} // namespace KokkosTools - -extern "C" { - -namespace impl = KokkosTools::PowerProfiler; - -EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) -EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) -EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) -EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) -} diff --git a/profiling/energy-profiler/variorum/readme.md b/profiling/energy-profiler/variorum/readme.md deleted file mode 100644 index 6413c8b44..000000000 --- a/profiling/energy-profiler/variorum/readme.md +++ /dev/null @@ -1,38 +0,0 @@ -# Variorum Energy Profiler - -A Kokkos profiling tool that uses Variorum to collect power measurements from supported hardware. - -## Setup - -1. Install Variorum library -2. Compile this module with Variorum using the main CMake build system. - -## Configuration - -Environment variables: -- `KOKKOS_TOOLS_POWER_MONITOR_INTERVAL`: Sampling interval in microseconds (default: 20000) -- `KOKKOS_TOOLS_POWER_OUTPUT_PATH`: Base path for output files (optional) - -## Output Files - -The profiler generates three CSV files: -- `hostname-pid-power.csv`: Raw power readings with absolute epoch timestamps - - Format: `timestamp_epoch_ns,device_id,power_watts` -- `hostname-pid-regions.csv`: Timing for user-defined regions - - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns` -- `hostname-pid-kernels.csv`: Timing for Kokkos kernels - - Format: `name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns,duration_ns,kernel_id` - -Power readings are in watts and timestamps are in nanoseconds since the epoch. - -## Usage - -```bash -export KOKKOS_PROFILE_LIBRARY=/path/to/variorum_energy_profiler.so -./your_kokkos_application -``` - -> Note: You might need to set the `LD_LIBRARY_PATH` to include the Variorum library path if it's not in a standard location: -> ```bash -> export LD_LIBRARY_PATH=/path/to/variorum/lib:$LD_LIBRARY_PATH -> ``` \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/variorum_power_profiler.cpp b/profiling/energy-profiler/variorum/variorum_power_profiler.cpp deleted file mode 100644 index 4ab445917..000000000 --- a/profiling/energy-profiler/variorum/variorum_power_profiler.cpp +++ /dev/null @@ -1,369 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include "variorum_power_profiler.hpp" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace KokkosTools { -namespace PowerProfiler { - -std::string kernel_type_to_string(KernelType type) { - switch (type) { - case KernelType::FOR: return "FOR"; - case KernelType::SCAN: return "SCAN"; - case KernelType::REDUCE: return "REDUCE"; - default: return "UNKNOWN"; - } -} - -VariorumPowerProfiler::VariorumPowerProfiler() { - if (const char* interval = - std::getenv("KOKKOS_TOOLS_POWER_MONITOR_INTERVAL")) { - try { - auto interval_us = std::stoul(interval); - monitor_interval_ = std::chrono::microseconds(interval_us); - } catch (const std::exception& e) { - std::cerr - << "PowerProfiler: Invalid monitor interval, using default 20ms\n"; - } - } - - if (const char* output_path = std::getenv("KOKKOS_TOOLS_POWER_OUTPUT_PATH")) { - output_file_path_ = output_path; - } -} - -VariorumPowerProfiler::~VariorumPowerProfiler() { - if (initialized_) { - finalize(); - } -} - -bool VariorumPowerProfiler::initialize() { - if (initialized_) { - return true; - } - - if (!initialize_variorum()) { - std::cerr << "PowerProfiler: Failed to initialize Variorum\n"; - return false; - } - - available_devices_ = get_available_devices(); - if (available_devices_.empty()) { - std::cerr << "PowerProfiler: No energy monitoring devices found\n"; - return false; - } - - start_monitoring(); - initialized_ = true; - - std::cout << "PowerProfiler: Initialized with " << available_devices_.size() - << " devices, monitoring interval: " << monitor_interval_.count() - << "μs\n"; - - return true; -} - -void VariorumPowerProfiler::finalize() { - if (!initialized_) { - return; - } - - stop_monitoring(); - generate_outputs(); - initialized_ = false; -} - -bool VariorumPowerProfiler::initialize_variorum() { return true; } - -VariorumPowerProfiler::unique_json_ptr -VariorumPowerProfiler::get_variorum_json_data() const { - char* json_string_c_raw = nullptr; - int variorum_error = variorum_get_power_json(&json_string_c_raw); - - if (variorum_error != 0) { - std::cerr << "PowerProfiler: variorum_get_power_json() failed. Error code: " - << variorum_error << "\n"; - return unique_json_ptr(nullptr); - } - - unique_cstring json_string_c(json_string_c_raw); - - if (!json_string_c) { - std::cerr << "PowerProfiler: variorum_get_power_json() returned success " - "but a null pointer.\n"; - return unique_json_ptr(nullptr); - } - - json_error_t error; - json_t* root_ptr = json_loads(json_string_c.get(), 0, &error); - - if (!root_ptr) { - std::cerr << "PowerProfiler: Failed to parse JSON: " << error.text << "\n"; - return unique_json_ptr(nullptr); - } - - return unique_json_ptr(root_ptr); -} - -std::deque VariorumPowerProfiler::get_available_devices() const { - std::set found_device_ids; - unique_json_ptr root = get_variorum_json_data(); - - if (!root) { - return {}; - } - - json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); - if (!host_obj) { - return {}; - } - - json_t* socket_0 = json_object_get(host_obj, "socket_0"); - if (socket_0 && json_is_object(socket_0)) { - json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); - if (power_gpu_watts && json_is_object(power_gpu_watts)) { - const char* key; - json_t* value; - json_object_foreach(power_gpu_watts, key, value) { - std::string s_key(key); - if (s_key.length() > 4 && s_key.substr(0, 4) == "GPU_") { - try { - uint32_t device_id = std::stoul(s_key.substr(4)); - found_device_ids.insert(device_id); - } catch (const std::invalid_argument& e) { - std::cerr << "PowerProfiler: Could not parse GPU ID from key: " - << s_key << " (" << e.what() << ")\n"; - } catch (const std::out_of_range& e) { - std::cerr << "PowerProfiler: GPU ID out of range from key: " - << s_key << " (" << e.what() << ")\n"; - } - } - } - } - } - - return std::deque(found_device_ids.begin(), found_device_ids.end()); -} - -EnergyReading VariorumPowerProfiler::get_current_energy_reading() const { - EnergyReading reading; - reading.timestamp = get_current_time(); - reading.epoch_timestamp = get_current_epoch_time(); - - unique_json_ptr root = get_variorum_json_data(); - if (!root) { - return reading; - } - - json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); - if (!host_obj) { - return reading; - } - - json_t* socket_0 = json_object_get(host_obj, "socket_0"); - if (socket_0 && json_is_object(socket_0)) { - json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); - if (power_gpu_watts && json_is_object(power_gpu_watts)) { - for (uint32_t device_id : available_devices_) { - std::string gpu_key = "GPU_" + std::to_string(device_id); - json_t* power_value = json_object_get(power_gpu_watts, gpu_key.c_str()); - - if (json_is_number(power_value)) { - reading.gpu_power_watts[device_id] = json_number_value(power_value); - } - } - } - } - - return reading; -} - -void VariorumPowerProfiler::start_monitoring() { - monitoring_active_ = true; - monitoring_thread_ = std::make_unique( - &VariorumPowerProfiler::monitoring_thread_function, this); -} - -void VariorumPowerProfiler::stop_monitoring() { - monitoring_active_ = false; - if (monitoring_thread_ && monitoring_thread_->joinable()) { - monitoring_thread_->join(); - } -} - -void VariorumPowerProfiler::monitoring_thread_function() { - while (monitoring_active_) { - EnergyReading reading = get_current_energy_reading(); - energy_readings_.push_back(reading); - std::this_thread::sleep_for(monitor_interval_); - } -} - -void VariorumPowerProfiler::begin_kernel(uint64_t kernel_id, - const std::string& name, - KernelType type) { - KernelTiming timing; - timing.kernel_id = kernel_id; - timing.name = name; - timing.type = type; - timing.start_time = get_current_time(); - timing.epoch_start_time = get_current_epoch_time(); - - active_kernels_[kernel_id] = timing; -} - -void VariorumPowerProfiler::end_kernel(uint64_t kernel_id) { - auto it = active_kernels_.find(kernel_id); - if (it != active_kernels_.end()) { - it->second.end_time = get_current_time(); - it->second.epoch_end_time = get_current_epoch_time(); - it->second.duration = std::chrono::duration_cast( - it->second.end_time - it->second.start_time); - - completed_kernels_.push_back(it->second); - active_kernels_.erase(it); - } -} - -void VariorumPowerProfiler::push_region(const std::string& name, - const std::string& type) { - RegionTiming region; - region.name = name; - region.type = type.empty() ? "DEFAULT" : type; - region.start_time = get_current_time(); - region.epoch_start_time = get_current_epoch_time(); - - active_regions_.push_back(region); -} - -void VariorumPowerProfiler::pop_region() { - if (!active_regions_.empty()) { - auto& region = active_regions_.back(); - region.end_time = get_current_time(); - region.epoch_end_time = get_current_epoch_time(); - region.duration = std::chrono::duration_cast( - region.end_time - region.start_time); - - completed_regions_.push_back(region); - active_regions_.pop_back(); - } -} - -std::chrono::time_point -VariorumPowerProfiler::get_current_time() const { - return std::chrono::steady_clock::now(); -} - -std::chrono::system_clock::time_point -VariorumPowerProfiler::get_current_epoch_time() const { - return std::chrono::system_clock::now(); -} - -void VariorumPowerProfiler::generate_outputs() { output_to_csv(); } - -void VariorumPowerProfiler::output_to_csv() const { - char hostname[256]; - gethostname(hostname, 256); - int pid = (int)getpid(); - - // Create power data CSV file - char power_filename[512]; - snprintf(power_filename, 512, "%s-%d-power.csv", hostname, pid); - std::ofstream power_csv(power_filename); - if (power_csv.is_open()) { - power_csv << "timestamp_epoch_ns,device_id,power_watts\n"; - for (const auto& reading : energy_readings_) { - auto epoch_ns = std::chrono::duration_cast( - reading.epoch_timestamp.time_since_epoch()) - .count(); - - for (const auto& [device_id, power] : reading.gpu_power_watts) { - power_csv << epoch_ns << "," << device_id << "," << power << "\n"; - } - } - power_csv.close(); - std::cout << "Power measurements written to " << power_filename << " (" - << energy_readings_.size() << " readings)\n"; - } - - // Create regions CSV file - char regions_filename[512]; - snprintf(regions_filename, 512, "%s-%d-regions.csv", hostname, pid); - std::ofstream regions_csv(regions_filename); - if (regions_csv.is_open() && !completed_regions_.empty()) { - regions_csv << "name,type,start_timestamp_epoch_ns,end_timestamp_epoch_ns," - "duration_ns\n"; - for (const auto& region : completed_regions_) { - auto start_epoch_ns = - std::chrono::duration_cast( - region.epoch_start_time.time_since_epoch()) - .count(); - auto end_epoch_ns = std::chrono::duration_cast( - region.epoch_end_time.time_since_epoch()) - .count(); - - regions_csv << "\"" << region.name << "\"," << "\"" << region.type - << "\"," << start_epoch_ns << "," << end_epoch_ns << "," - << region.duration.count() << "\n"; - } - regions_csv.close(); - std::cout << "Region timings written to " << regions_filename << " (" - << completed_regions_.size() << " regions)\n"; - } - - // Create kernels CSV file (can be considered part of regions with specific - // type) - if (!completed_kernels_.empty()) { - char kernels_filename[512]; - snprintf(kernels_filename, 512, "%s-%d-kernels.csv", hostname, pid); - std::ofstream kernels_csv(kernels_filename); - if (kernels_csv.is_open()) { - kernels_csv << "name,type,start_timestamp_epoch_ns,end_timestamp_epoch_" - "ns,duration_ns,kernel_id\n"; - for (const auto& kernel : completed_kernels_) { - auto start_epoch_ns = - std::chrono::duration_cast( - kernel.epoch_start_time.time_since_epoch()) - .count(); - auto end_epoch_ns = - std::chrono::duration_cast( - kernel.epoch_end_time.time_since_epoch()) - .count(); - - kernels_csv << "\"" << kernel.name << "\"," << "\"" - << kernel_type_to_string(kernel.type) << "\"," - << start_epoch_ns << "," << end_epoch_ns << "," - << kernel.duration.count() << "," << kernel.kernel_id - << "\n"; - } - kernels_csv.close(); - std::cout << "Kernel timings written to " << kernels_filename << " (" - << completed_kernels_.size() << " kernels)\n"; - } - } -} - -} // namespace PowerProfiler -} // namespace KokkosTools \ No newline at end of file diff --git a/profiling/energy-profiler/variorum/variorum_power_profiler.hpp b/profiling/energy-profiler/variorum/variorum_power_profiler.hpp deleted file mode 100644 index 7a04efc9e..000000000 --- a/profiling/energy-profiler/variorum/variorum_power_profiler.hpp +++ /dev/null @@ -1,133 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -extern "C" { -#include -#include -} - -namespace KokkosTools { -namespace PowerProfiler { - -enum class KernelType { FOR, SCAN, REDUCE }; - -struct EnergyReading { - std::chrono::system_clock::time_point epoch_timestamp; - std::chrono::time_point timestamp; - std::map gpu_power_watts; -}; - -struct KernelTiming { - uint64_t kernel_id; - std::string name; - KernelType type; - std::chrono::system_clock::time_point epoch_start_time; - std::chrono::system_clock::time_point epoch_end_time; - std::chrono::time_point start_time; - std::chrono::time_point end_time; - std::chrono::nanoseconds duration; -}; - -struct RegionTiming { - std::string name; - std::string type; - std::chrono::system_clock::time_point epoch_start_time; - std::chrono::system_clock::time_point epoch_end_time; - std::chrono::time_point start_time; - std::chrono::time_point end_time; - std::chrono::nanoseconds duration; -}; - -class VariorumPowerProfiler { - public: - VariorumPowerProfiler(); - ~VariorumPowerProfiler(); - - bool initialize(); - void finalize(); - - void begin_kernel(uint64_t kernel_id, const std::string& name, - KernelType type); - void end_kernel(uint64_t kernel_id); - - void push_region(const std::string& name, const std::string& type = ""); - void pop_region(); - - bool is_initialized() const { return initialized_; } - - private: - struct JsonDeleter { - void operator()(json_t* json) const { - if (json) json_decref(json); - } - }; - using unique_json_ptr = std::unique_ptr; - - struct CFreeDeleter { - void operator()(char* ptr) const { - if (ptr) free(ptr); - } - }; - using unique_cstring = std::unique_ptr; - - bool initialize_variorum(); - unique_json_ptr get_variorum_json_data() const; - EnergyReading get_current_energy_reading() const; - std::deque get_available_devices() const; - - void monitoring_thread_function(); - void start_monitoring(); - void stop_monitoring(); - - void generate_outputs(); - void output_to_csv() const; - - std::chrono::time_point get_current_time() const; - std::chrono::system_clock::time_point get_current_epoch_time() const; - - std::chrono::microseconds monitor_interval_{20000}; - std::string output_file_path_{"power_profile_output"}; - - bool initialized_{false}; - std::deque available_devices_; - - std::atomic monitoring_active_{false}; - std::unique_ptr monitoring_thread_; - - std::deque energy_readings_; - std::deque completed_kernels_; - std::deque completed_regions_; - - std::unordered_map active_kernels_; - std::deque active_regions_; -}; - -// Utility function to convert KernelType to string -std::string kernel_type_to_string(KernelType type); - -} // namespace PowerProfiler -} // namespace KokkosTools \ No newline at end of file From eadef98d5956948ef7a9260128011808206b8135 Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Wed, 13 Aug 2025 14:40:16 -0400 Subject: [PATCH 10/11] energy-profiler: fix warnings --- profiling/energy-profiler/common/daemon.hpp | 2 +- .../energy-profiler/kokkos/CMakeLists.txt | 7 +- .../energy-profiler/tests/CMakeLists.txt | 170 ++++++++---------- 3 files changed, 85 insertions(+), 94 deletions(-) diff --git a/profiling/energy-profiler/common/daemon.hpp b/profiling/energy-profiler/common/daemon.hpp index c3384b6ad..ad9188452 100644 --- a/profiling/energy-profiler/common/daemon.hpp +++ b/profiling/energy-profiler/common/daemon.hpp @@ -7,7 +7,7 @@ class Daemon { public: Daemon(std::function func, int interval_ms) - : func_(func), interval_(interval_ms) {}; + : interval_(interval_ms), func_(func) {}; void start(); void tick(); diff --git a/profiling/energy-profiler/kokkos/CMakeLists.txt b/profiling/energy-profiler/kokkos/CMakeLists.txt index ec0d18bf8..087194a56 100644 --- a/profiling/energy-profiler/kokkos/CMakeLists.txt +++ b/profiling/energy-profiler/kokkos/CMakeLists.txt @@ -29,7 +29,12 @@ find_package(CUDAToolkit QUIET) if (CUDAToolkit_FOUND) find_package(CUDA::nvml QUIET) - message(STATUS "Found CUDA NVML, making NVML power profiler available.") + if(TARGET CUDA::nvml) + message(STATUS "Found CUDA NVML, making NVML power profiler available.") + else() + message(STATUS "CUDA::nvml target not found, skipping NVML power profiler.") + return() + endif() else() message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") return() diff --git a/profiling/energy-profiler/tests/CMakeLists.txt b/profiling/energy-profiler/tests/CMakeLists.txt index 67b433570..da391e9e9 100644 --- a/profiling/energy-profiler/tests/CMakeLists.txt +++ b/profiling/energy-profiler/tests/CMakeLists.txt @@ -19,123 +19,109 @@ target_link_libraries(daemon_test PRIVATE Threads::Threads) add_test(NAME daemon_test COMMAND daemon_test) -# NVML Provider test -add_executable(nvml_provider_test - nvml_provider_test.cpp - ../provider/provider_nvml.cpp -) - -target_include_directories(nvml_provider_test PRIVATE - ../provider -) - # Find CUDA Toolkit and NVML for NVML tests find_package(CUDAToolkit QUIET) if(CUDAToolkit_FOUND) find_package(CUDA::nvml QUIET) if(TARGET CUDA::nvml) message(STATUS "Found CUDA NVML, making NVML tests available.") + + # NVML Provider test + add_executable(nvml_provider_test + nvml_provider_test.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(nvml_provider_test PRIVATE + ../provider + ) + target_link_libraries(nvml_provider_test PRIVATE CUDA::nvml) - add_test(NAME nvml_provider_test COMMAND nvml_provider_test) # Register test if NVML is found + add_test(NAME nvml_provider_test COMMAND nvml_provider_test) + + # Daemon NVML Integration test + add_executable(daemon_nvml_integration_test + daemon_nvml_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(daemon_nvml_integration_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_nvml_integration_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_integration_test COMMAND daemon_nvml_integration_test) + + # Fast Daemon NVML Integration test (20ms sampling) + add_executable(daemon_nvml_fast_test + daemon_nvml_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(daemon_nvml_fast_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_nvml_fast_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_fast_test COMMAND daemon_nvml_fast_test) else() - message(WARNING "CUDA::nvml target not found. NVML tests may not work properly.") + message(STATUS "CUDA::nvml target not found. NVML tests will be skipped.") endif() else() - message(STATUS "CUDAToolkit not found, NVML tests may not work properly.") -endif() - -# Daemon NVML Integration test -add_executable(daemon_nvml_integration_test - daemon_nvml_integration_test.cpp - ../common/daemon.cpp - ../provider/provider_nvml.cpp -) - -target_include_directories(daemon_nvml_integration_test PRIVATE - ../common - ../provider -) - -if(CUDAToolkit_FOUND AND TARGET CUDA::nvml) - target_link_libraries(daemon_nvml_integration_test PRIVATE CUDA::nvml Threads::Threads) - add_test(NAME daemon_nvml_integration_test COMMAND daemon_nvml_integration_test) # Register test if NVML is found -else() - target_link_libraries(daemon_nvml_integration_test PRIVATE Threads::Threads) -endif() - -# Fast Daemon NVML Integration test (20ms sampling) -add_executable(daemon_nvml_fast_test - daemon_nvml_fast_test.cpp - ../common/daemon.cpp - ../provider/provider_nvml.cpp -) - -target_include_directories(daemon_nvml_fast_test PRIVATE - ../common - ../provider -) - -if(CUDAToolkit_FOUND AND TARGET CUDA::nvml) - target_link_libraries(daemon_nvml_fast_test PRIVATE CUDA::nvml Threads::Threads) - add_test(NAME daemon_nvml_fast_test COMMAND daemon_nvml_fast_test) # Register test if NVML is found -else() - target_link_libraries(daemon_nvml_fast_test PRIVATE Threads::Threads) + message(STATUS "CUDAToolkit not found, NVML tests will be skipped.") endif() # Variorum Provider test -add_executable(variorum_provider_test - variorum_provider_test.cpp - ../provider/provider_variorum.cpp -) - -target_include_directories(variorum_provider_test PRIVATE - ../provider -) - -# Use Variorum configuration from root CMake if(KOKKOSTOOLS_HAS_VARIORUM) message(STATUS "Using Variorum configuration from root CMake, making Variorum tests available.") + + add_executable(variorum_provider_test + variorum_provider_test.cpp + ../provider/provider_variorum.cpp + ) + + target_include_directories(variorum_provider_test PRIVATE + ../provider + ) + target_link_libraries(variorum_provider_test PRIVATE variorum::variorum) - add_test(NAME variorum_provider_test COMMAND variorum_provider_test) # Register test if Variorum is found -else() - message(STATUS "Variorum not available from root CMake, Variorum tests may not work properly.") -endif() + add_test(NAME variorum_provider_test COMMAND variorum_provider_test) -# Daemon Variorum Integration test -add_executable(daemon_variorum_integration_test - daemon_variorum_integration_test.cpp - ../common/daemon.cpp - ../provider/provider_variorum.cpp -) + # Daemon Variorum Integration test + add_executable(daemon_variorum_integration_test + daemon_variorum_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp + ) -target_include_directories(daemon_variorum_integration_test PRIVATE - ../common - ../provider -) + target_include_directories(daemon_variorum_integration_test PRIVATE + ../common + ../provider + ) -if(KOKKOSTOOLS_HAS_VARIORUM) target_link_libraries(daemon_variorum_integration_test PRIVATE variorum::variorum Threads::Threads) - add_test(NAME daemon_variorum_integration_test COMMAND daemon_variorum_integration_test) # Register test if Variorum is found -else() - target_link_libraries(daemon_variorum_integration_test PRIVATE Threads::Threads) -endif() + add_test(NAME daemon_variorum_integration_test COMMAND daemon_variorum_integration_test) -add_executable(daemon_variorum_fast_test - daemon_variorum_fast_test.cpp - ../common/daemon.cpp - ../provider/provider_variorum.cpp -) + # Fast Daemon Variorum Integration test + add_executable(daemon_variorum_fast_test + daemon_variorum_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp + ) -target_include_directories(daemon_variorum_fast_test PRIVATE - ../common - ../provider -) + target_include_directories(daemon_variorum_fast_test PRIVATE + ../common + ../provider + ) -if(KOKKOSTOOLS_HAS_VARIORUM) target_link_libraries(daemon_variorum_fast_test PRIVATE variorum::variorum Threads::Threads) - add_test(NAME daemon_variorum_fast_test COMMAND daemon_variorum_fast_test) # Register test if Variorum is found + add_test(NAME daemon_variorum_fast_test COMMAND daemon_variorum_fast_test) else() - target_link_libraries(daemon_variorum_fast_test PRIVATE Threads::Threads) + message(STATUS "Variorum not available from root CMake, Variorum tests will be skipped.") endif() add_executable(timer_test From 7d46730551b94f1436ba8ba3981e009d6418cf5a Mon Sep 17 00:00:00 2001 From: Ethan Puyaubreau Date: Wed, 13 Aug 2025 14:56:26 -0400 Subject: [PATCH 11/11] energy-profiler: suppress unused variable warnings in kernel_timer_tool --- .../tools/kernel_timer_tool.cpp | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/profiling/energy-profiler/tools/kernel_timer_tool.cpp b/profiling/energy-profiler/tools/kernel_timer_tool.cpp index c39b16087..ea0895fbc 100644 --- a/profiling/energy-profiler/tools/kernel_timer_tool.cpp +++ b/profiling/energy-profiler/tools/kernel_timer_tool.cpp @@ -5,6 +5,8 @@ void KernelTimerTool::init_library( const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + (void)devInfoCount; + (void)deviceInfo; std::cout << "Kokkos Power Profiler: Initializing with load sequence " << loadSeq << " and interface version " << interfaceVer << std::endl; @@ -43,31 +45,48 @@ void KernelTimerTool::end_region() { void KernelTimerTool::begin_parallel_for(const char* name, const uint32_t devID, uint64_t kID) { + (void)devID; start_region(name, RegionType::ParallelFor, kID); } -void KernelTimerTool::end_parallel_for(uint64_t kID) { end_region(); } +void KernelTimerTool::end_parallel_for(uint64_t kID) { + (void)kID; + end_region(); +} void KernelTimerTool::begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { + (void)devID; start_region(name, RegionType::ParallelScan, *kID); } -void KernelTimerTool::end_parallel_scan(uint64_t kID) { end_region(); } +void KernelTimerTool::end_parallel_scan(uint64_t kID) { + (void)kID; + end_region(); +} void KernelTimerTool::begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { + (void)devID; start_region(name, RegionType::ParallelReduce, *kID); } -void KernelTimerTool::end_parallel_reduce(uint64_t kID) { end_region(); } +void KernelTimerTool::end_parallel_reduce(uint64_t kID) { + (void)kID; + end_region(); +} void KernelTimerTool::begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr, uint64_t size) { + (void)dst_handle; + (void)src_handle; + (void)src_name; + (void)src_ptr; + (void)size; start_region(dst_name, RegionType::DeepCopy, reinterpret_cast(dst_ptr)); }