diff --git a/CMakeLists.txt b/CMakeLists.txt index bc3b1e63b..4ab545e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,8 @@ if(KOKKOSTOOLS_HAS_VARIORUM) add_subdirectory(profiling/variorum-connector) endif() +add_subdirectory(profiling/energy-profiler) + # GPU profilers if(Kokkos_ENABLE_CUDA) add_subdirectory(profiling/nvtx-connector) diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt new file mode 100644 index 000000000..535cd8e9a --- /dev/null +++ b/profiling/energy-profiler/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(tests) +add_subdirectory(kokkos) \ No newline at end of file diff --git a/profiling/energy-profiler/common/daemon.cpp b/profiling/energy-profiler/common/daemon.cpp new file mode 100644 index 000000000..b4487eb75 --- /dev/null +++ b/profiling/energy-profiler/common/daemon.cpp @@ -0,0 +1,43 @@ +#include "daemon.hpp" +#include +#include + +void Daemon::start() { + if (!running_) { + running_ = true; + thread_ = std::thread(&Daemon::tick, this); + } else { + throw std::runtime_error("Daemon already started"); + } +} + +void Daemon::tick() { + while (running_) { + std::chrono::high_resolution_clock::time_point start_time = + std::chrono::high_resolution_clock::now(); + + // Execute the function + func_(); + + std::chrono::high_resolution_clock::time_point end_time = + std::chrono::high_resolution_clock::now(); + std::chrono::milliseconds execution_duration = + std::chrono::duration_cast(end_time - + start_time); + + // Calculate how long to sleep to maintain the interval + if (execution_duration < interval_) { + std::chrono::milliseconds sleep_duration = interval_ - execution_duration; + std::this_thread::sleep_for(sleep_duration); + } + } +} + +void Daemon::stop() { + if (running_) { + running_ = false; + thread_.join(); + } else { + throw std::runtime_error("Daemon not started"); + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/common/daemon.hpp b/profiling/energy-profiler/common/daemon.hpp new file mode 100644 index 000000000..ad9188452 --- /dev/null +++ b/profiling/energy-profiler/common/daemon.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +class Daemon { + public: + Daemon(std::function func, int interval_ms) + : interval_(interval_ms), func_(func) {}; + + void start(); + void tick(); + void stop(); + bool is_running() const { return running_; } + std::thread& get_thread() { return thread_; } + + private: + std::chrono::milliseconds interval_; + bool running_{false}; + std::function func_; + std::thread thread_; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/common/filename_prefix.cpp b/profiling/energy-profiler/common/filename_prefix.cpp new file mode 100644 index 000000000..294acb82c --- /dev/null +++ b/profiling/energy-profiler/common/filename_prefix.cpp @@ -0,0 +1,8 @@ +#include "filename_prefix.hpp" + +std::string generate_prefix() { + char hostname[256]; + gethostname(hostname, 256); + int pid = (int)getpid(); + return std::string(hostname) + "-" + std::to_string(pid); +} \ No newline at end of file diff --git a/profiling/energy-profiler/common/filename_prefix.hpp b/profiling/energy-profiler/common/filename_prefix.hpp new file mode 100644 index 000000000..93b02371e --- /dev/null +++ b/profiling/energy-profiler/common/filename_prefix.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +std::string generate_prefix(); \ No newline at end of file diff --git a/profiling/energy-profiler/common/timer.cpp b/profiling/energy-profiler/common/timer.cpp new file mode 100644 index 000000000..0b9401440 --- /dev/null +++ b/profiling/energy-profiler/common/timer.cpp @@ -0,0 +1,244 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "timer.hpp" +#include +#include +#include + +// EnergyTiming implementations +EnergyTiming::EnergyTiming() + : timing_id_(0), name_(""), region_type_(RegionType::Unknown) { + start_time_ = std::chrono::high_resolution_clock::now(); +} + +EnergyTiming::EnergyTiming(uint64_t timing_id, RegionType type, + std::string name) + : timing_id_(timing_id), name_(name), region_type_(type) { + start_time_ = std::chrono::high_resolution_clock::now(); +} + +void EnergyTiming::end() { + end_time_ = std::chrono::high_resolution_clock::now(); +} + +bool EnergyTiming::is_ended() const { + return end_time_ != + std::chrono::time_point{}; +} + +uint64_t EnergyTiming::get_duration_ms() const { + auto duration = std::chrono::duration_cast( + end_time_ - start_time_); + return static_cast(duration.count()); +} + +// EnergyTimer implementations +void EnergyTimer::start_timing(uint64_t timing_id, RegionType type, + std::string name) { + timings_.emplace(timing_id, EnergyTiming(timing_id, type, name)); +} + +void EnergyTimer::end_timing(uint64_t timing_id) { + auto it = timings_.find(timing_id); + if (it != timings_.end()) { + it->second.end(); + } +} + +std::unordered_map& EnergyTimer::get_timings() { + return timings_; +} + +namespace KokkosTools { +namespace Timer { + +void export_kernels_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, + "name,type,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + std::string type; + switch (timing.type) { + case RegionType::ParallelFor: type = "parallel_for"; break; + case RegionType::ParallelScan: type = "parallel_scan"; break; + case RegionType::ParallelReduce: type = "parallel_reduce"; break; + default: type = "unknown"; + } + + fprintf(file, "%s,%s,%ld,%ld,%ld\n", timing.name.c_str(), type.c_str(), + start_ms, end_ms, duration_ms); + } + fclose(file); + std::cout << "Timing data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void export_regions_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, "name,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + fprintf(file, "%s,%ld,%ld,%ld\n", timing.name.c_str(), start_ms, end_ms, + duration_ms); + } + fclose(file); + std::cout << "Region data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void export_deepcopies_csv(const std::deque& timings, + const std::string& filename) { + if (timings.empty()) return; + + FILE* file = fopen(filename.c_str(), "w"); + if (file) { + fprintf(file, "name,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"); + for (const auto& timing : timings) { + auto start_ms = std::chrono::duration_cast( + timing.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + timing.end_time.time_since_epoch()) + .count(); + auto duration_ms = timing.duration.count() / 1000000; + + fprintf(file, "%s,%ld,%ld,%ld\n", timing.name.c_str(), start_ms, end_ms, + duration_ms); + } + fclose(file); + std::cout << "Deep copy data exported to " << filename << std::endl; + } else { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + } +} + +void print_kernels_summary(const std::deque& kernels) { + std::cout << "\n==== KERNELS ====\n"; + std::cout << "| Name | Type | " + "Start(ms) | End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|----------------|------" + "-------------|-------------------|---------------|\n"; + for (const auto& info : kernels) { + std::string type; + switch (info.type) { + case RegionType::ParallelFor: type = "parallel_for"; break; + case RegionType::ParallelScan: type = "parallel_scan"; break; + case RegionType::ParallelReduce: type = "parallel_reduce"; break; + default: type = "unknown"; + } + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout + << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') << "| " + << type << std::string(16 - type.size(), ' ') << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - std::to_string(info.duration.count() / 1000000).size(), ' ') + << "|\n"; + } +} + +void print_regions_summary(const std::deque& regions) { + std::cout << "\n==== REGIONS ====\n"; + std::cout << "| Name | Start(ms) | " + "End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|-------------------|---" + "----------------|---------------|\n"; + for (const auto& info : regions) { + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') + << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - + std::to_string(info.duration.count() / 1000000).size(), + ' ') + << "|\n"; + } +} + +void print_deepcopies_summary(const std::deque& deepcopies) { + std::cout << "\n==== DEEP COPIES ====\n"; + std::cout << "| Name | Start(ms) | " + "End(ms) | Duration (ms) |\n"; + std::cout << "|--------------------------------------|-------------------|---" + "----------------|---------------|\n"; + for (const auto& info : deepcopies) { + auto start_ms = std::chrono::duration_cast( + info.start_time.time_since_epoch()) + .count(); + auto end_ms = std::chrono::duration_cast( + info.end_time.time_since_epoch()) + .count(); + std::cout << "| " << info.name + << std::string(38 - std::min(info.name.size(), 38), ' ') + << "| " << start_ms + << std::string(19 - std::to_string(start_ms).size(), ' ') << "| " + << end_ms << std::string(19 - std::to_string(end_ms).size(), ' ') + << "| " << (info.duration.count() / 1000000) + << std::string( + 13 - + std::to_string(info.duration.count() / 1000000).size(), + ' ') + << "|\n"; + } +} + +} // namespace Timer +} // namespace KokkosTools diff --git a/profiling/energy-profiler/common/timer.hpp b/profiling/energy-profiler/common/timer.hpp new file mode 100644 index 000000000..bb4ff8ac3 --- /dev/null +++ b/profiling/energy-profiler/common/timer.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include + +enum class RegionType { + Unknown, + ParallelFor, + ParallelReduce, + ParallelScan, + DeepCopy, + UserRegion +}; + +struct TimingInfo { + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + std::chrono::nanoseconds duration; + uint64_t id = 0; +}; + +struct EnergyTiming { + // Default constructor + EnergyTiming(); + + EnergyTiming(uint64_t timing_id, RegionType type, std::string name); + + void end(); + + bool is_ended() const; + + uint64_t get_duration_ms() const; + + uint64_t timing_id_; + std::string name_; + RegionType region_type_; + std::chrono::time_point start_time_; + std::chrono::time_point end_time_; +}; + +struct EnergyTimer { + public: + void start_timing(uint64_t timing_id, RegionType type, std::string name); + void end_timing(uint64_t timing_id); + std::unordered_map& get_timings(); + + private: + std::unordered_map timings_; +}; + +// CSV Export functions for TimingInfo +namespace KokkosTools { +namespace Timer { +void export_kernels_csv(const std::deque& timings, + const std::string& filename); +void export_regions_csv(const std::deque& timings, + const std::string& filename); +void export_deepcopies_csv(const std::deque& timings, + const std::string& filename); +void print_kernels_summary(const std::deque& kernels); +void print_regions_summary(const std::deque& regions); +void print_deepcopies_summary(const std::deque& deepcopies); +} // namespace Timer +} // namespace KokkosTools \ No newline at end of file diff --git a/profiling/energy-profiler/common/tool_interface.hpp b/profiling/energy-profiler/common/tool_interface.hpp new file mode 100644 index 000000000..2aef08b5f --- /dev/null +++ b/profiling/energy-profiler/common/tool_interface.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include "kp_core.hpp" + +class ToolInterface { + public: + ToolInterface() = default; + virtual ~ToolInterface() = default; + virtual void init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) = 0; + virtual void finalize_library() = 0; + virtual void begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) = 0; + virtual void end_parallel_for(uint64_t kID) = 0; + virtual void begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) = 0; + virtual void end_parallel_scan(uint64_t kID) = 0; + virtual void begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) = 0; + virtual void end_parallel_reduce(uint64_t kID) = 0; + virtual void begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) = 0; + virtual void end_deep_copy() = 0; + virtual void push_profile_region(const char* region_name) = 0; + virtual void pop_profile_region() = 0; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/kokkos/CMakeLists.txt b/profiling/energy-profiler/kokkos/CMakeLists.txt new file mode 100644 index 000000000..087194a56 --- /dev/null +++ b/profiling/energy-profiler/kokkos/CMakeLists.txt @@ -0,0 +1,81 @@ +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) + +kp_add_library(kp_energy_kernel_timer kp_energy_kernel_timer.cpp + ../common/timer.cpp + ../common/filename_prefix.cpp + ../tools/kernel_timer_tool.cpp +) + +target_include_directories(kp_energy_kernel_timer PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +if(KOKKOSTOOLS_HAS_VARIORUM) + kp_add_library(kp_variorum_power kp_variorum_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_variorum.cpp + ../tools/kernel_timer_tool.cpp +) + +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) +target_link_libraries(kp_variorum_power PRIVATE variorum::variorum Threads::Threads) +endif() + +find_package(CUDAToolkit QUIET) + +if (CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + if(TARGET CUDA::nvml) + message(STATUS "Found CUDA NVML, making NVML power profiler available.") + else() + message(STATUS "CUDA::nvml target not found, skipping NVML power profiler.") + return() + endif() +else() + message(STATUS "CUDAToolkit not found, skipping NVML power profiler.") + return() +endif() + +kp_add_library(kp_nvml_power kp_nvml_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_power PRIVATE CUDA::nvml Threads::Threads) + +target_include_directories(kp_nvml_power PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# NVML Direct Power Tool +kp_add_library(kp_nvml_direct_power kp_nvml_direct_power.cpp + ../common/daemon.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_direct_power PRIVATE CUDA::nvml Threads::Threads) + +target_include_directories(kp_nvml_direct_power PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# NVML Energy Consumption Tool (no daemon needed) +kp_add_library(kp_nvml_energy_consumption kp_nvml_energy_consumption.cpp + ../common/filename_prefix.cpp + ../common/timer.cpp + ../provider/provider_nvml.cpp + ../tools/kernel_timer_tool.cpp +) +target_link_libraries(kp_nvml_energy_consumption PRIVATE CUDA::nvml) + +target_include_directories(kp_nvml_energy_consumption PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) \ No newline at end of file diff --git a/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp b/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp new file mode 100644 index 000000000..8be118d20 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_energy_kernel_timer.cpp @@ -0,0 +1,200 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * Kokkos Power Profiler - Specialized for Variorum + * Simplified version focused on Variorum energy monitoring with integrated + * timing + */ + +#include +#include + +#include "kp_core.hpp" +#include "../tools/kernel_timer_tool.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" + +namespace KokkosTools { +namespace KernelTimer { + +// --- Core Initialization --- +KernelTimerTool timer; + +bool VERBOSE = false; +std::string KOKKOS_PROFILE_LIBRARY_NAME = + "Kokkos Kernel Timer for Energy Profiler"; + +// --- Library Initialization/Finalization --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); +} + +void kokkosp_finalize_library() { + std::cout << "Kokkos Power Profiler: Finalizing library\n"; + timer.finalize_library(); + std::cout << "Kokkos Power Profiler: Library finalized\n"; + + std::string prefix = generate_prefix(); + + const auto& kernels = timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + // Récapitulatif des régions + const auto& regions = timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + // Récapitulatif des deep copies + const auto& deepcopies = timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); +} + +// --- Kernels Launch/End --- + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_for(name, devID, *kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_for '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + timer.end_parallel_for(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_for with ID " << kID + << "\n"; + } +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_scan(name, devID, kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_scan '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + timer.end_parallel_scan(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_scan with ID " << kID + << "\n"; + } +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_reduce(name, devID, kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started parallel_reduce '" << name + << "' on device " << devID << " with ID " << *kID << "\n"; + } +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + timer.end_parallel_reduce(kID); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended parallel_reduce with ID " << kID + << "\n"; + } +} + +void kokkosp_push_profile_region(char const* regionName) { + timer.push_profile_region(regionName); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Pushed profile region '" << regionName + << "'\n"; + } +} + +void kokkosp_pop_profile_region() { + timer.pop_profile_region(); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Popped profile region\n"; + } +} + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Started deep copy from '" << src_name + << "' to '" << dst_name << "' of size " << size << " bytes\n"; + } +} + +void kokkosp_end_deep_copy() { + timer.end_deep_copy(); + if (VERBOSE) { + std::cout << "Kokkos Power Profiler: Ended deep copy\n"; + } +} + +// --- Event Set Configuration --- + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace KernelTimer +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::KernelTimer; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) +} diff --git a/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp b/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp new file mode 100644 index 000000000..a2160a3f3 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_direct_power.cpp @@ -0,0 +1,356 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Direct Power Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_direct_power.cpp + * @brief Kokkos Direct Power Profiler Tool using NVML. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the NVML library's direct power measurement API. It starts + * monitoring when the Kokkos library is initialized and prints a detailed power + * profile upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace DirectPower { + +// --- Configuration --- +// The interval in milliseconds for power sampling. +constexpr int SAMPLING_INTERVAL_MS = 20; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +// Structure to store a single power measurement with a timestamp per device. +struct DirectPowerSample { + std::chrono::high_resolution_clock::time_point timestamp; + std::vector device_powers_watts; // Power for each device +}; + +// Thread-safe storage for collected power samples. +static std::vector g_power_samples; +static std::mutex g_samples_mutex; +static std::chrono::high_resolution_clock::time_point g_start_time; +static size_t g_device_count = 0; + +/** + * @brief The function executed by the daemon thread to sample power. + * + * This function is called periodically. It fetches the current direct power + * usage from each GPU device using the NVML provider and stores it with a + * timestamp. + */ +void power_monitoring_tick() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + DirectPowerSample sample; + sample.timestamp = std::chrono::high_resolution_clock::now(); + sample.device_powers_watts.reserve(g_device_count); + + // Collect power for each device + for (size_t i = 0; i < g_device_count; ++i) { + double power = g_nvml_provider->get_device_power_usage_direct(i); + sample.device_powers_watts.push_back(power); + } + + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(std::move(sample)); +} + +/** + * @brief Calculates statistics from the collected power samples. + * + * @param samples A constant reference to the vector of power samples. + * @param device_index The device index to analyze. + * @param[out] avg_power Average power consumption. + * @param[out] min_power Minimum power consumption. + * @param[out] max_power Maximum power consumption. + * @param[out] total_energy Total energy consumed in Joules. + */ +void analyze_device_power_data(const std::vector& samples, + size_t device_index, double& avg_power, + double& min_power, double& max_power, + double& total_energy) { + if (samples.empty() || device_index >= g_device_count) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + // Find first valid sample for this device + size_t first_valid = 0; + while (first_valid < samples.size() && + (device_index >= samples[first_valid].device_powers_watts.size() || + samples[first_valid].device_powers_watts[device_index] < 0)) { + first_valid++; + } + + if (first_valid >= samples.size()) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + min_power = samples[first_valid].device_powers_watts[device_index]; + max_power = samples[first_valid].device_powers_watts[device_index]; + double power_sum = 0.0; + size_t valid_samples = 0; + total_energy = 0.0; + + for (size_t i = first_valid; i < samples.size(); ++i) { + if (device_index >= samples[i].device_powers_watts.size()) continue; + + const double power = samples[i].device_powers_watts[device_index]; + if (power < 0) continue; // Skip invalid measurements + + power_sum += power; + valid_samples++; + if (power < min_power) min_power = power; + if (power > max_power) max_power = power; + + // Energy = Power * Time. Time delta is from the previous sample. + if (i > first_valid) { + double time_delta_s = std::chrono::duration( + samples[i].timestamp - samples[i - 1].timestamp) + .count(); + // Use previous sample's power for energy calculation + if (device_index < samples[i - 1].device_powers_watts.size() && + samples[i - 1].device_powers_watts[device_index] >= 0) { + total_energy += + samples[i - 1].device_powers_watts[device_index] * time_delta_s; + } + } + } + + avg_power = valid_samples > 0 ? power_sum / valid_samples : 0.0; +} + +void export_direct_power_data_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + + // Write header + file << "timestamp"; + for (size_t i = 0; i < g_device_count; ++i) { + file << ",device_" << i << "_power_watts"; + } + file << "\n"; + + // Write data + for (const auto& sample : g_power_samples) { + auto timestamp = std::chrono::duration_cast( + sample.timestamp.time_since_epoch()) + .count(); + file << timestamp; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < sample.device_powers_watts.size()) { + file << "," << sample.device_powers_watts[i]; + } else { + file << ",-1"; // Invalid measurement + } + } + file << "\n"; + } + file.close(); + std::cout << "Direct power data exported to " << filename << std::endl; +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Direct Power Profiler: Initializing...\n"; + std::cout << "Sampling Interval: " << SAMPLING_INTERVAL_MS << " ms\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Direct power " + "profiling disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + g_device_count = g_nvml_provider->get_device_count(); + std::cout << "SUCCESS: NVML provider initialized with " << g_device_count + << " device(s).\n"; + + // Print device information + for (size_t i = 0; i < g_device_count; ++i) { + std::cout << " Device " << i << ": " << g_nvml_provider->get_device_name(i) + << std::endl; + } + + // Start the monitoring daemon + g_power_daemon = + std::make_unique(power_monitoring_tick, SAMPLING_INTERVAL_MS); + g_start_time = std::chrono::high_resolution_clock::now(); + g_power_daemon->start(); + std::cout << "SUCCESS: Direct power monitoring daemon started.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Direct Power Profiler: Finalizing...\n"; + + if (g_power_daemon) { + g_power_daemon->stop(); + std::cout << "SUCCESS: Direct power monitoring daemon stopped.\n"; + } + + // Finalize the timer + g_timer.finalize_library(); + + // Make a copy of the samples to avoid holding the lock during analysis + std::vector samples_copy; + { + std::lock_guard lock(g_samples_mutex); + samples_copy = g_power_samples; + } + + if (samples_copy.empty()) { + std::cout << "No direct power samples collected.\n"; + } else { + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + std::cout << "\n==== Direct Power Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(2); + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + std::cout << "Samples Collected: " << samples_copy.size() << "\n"; + std::cout << "Number of Devices: " << g_device_count << "\n"; + std::cout << "---------------------------------------\n"; + + // Analyze each device separately + for (size_t dev = 0; dev < g_device_count; ++dev) { + double avg_power, min_power, max_power, total_energy; + analyze_device_power_data(samples_copy, dev, avg_power, min_power, + max_power, total_energy); + + std::cout << "Device " << dev << " (" + << g_nvml_provider->get_device_name(dev) << "):\n"; + std::cout << " Average Power: " << avg_power << " W\n"; + std::cout << " Minimum Power: " << min_power << " W\n"; + std::cout << " Maximum Power: " << max_power << " W\n"; + std::cout << " Total Energy Consumed: " << total_energy << " J\n"; + std::cout << "---------------------------------------\n"; + } + + std::string csv_filename = + generate_prefix() + "_nvml_direct_power_samples.csv"; + std::cout << "Exporting direct power data to " << csv_filename << "...\n"; + export_direct_power_data_csv(csv_filename); + } + + std::string prefix = generate_prefix(); + + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); +} +void kokkosp_end_parallel_for(uint64_t kID) { g_timer.end_parallel_for(kID); } +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); +} +void kokkosp_end_parallel_scan(uint64_t kID) { g_timer.end_parallel_scan(kID); } +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); +} +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); +} +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); +} +void kokkosp_pop_profile_region() { g_timer.pop_profile_region(); } +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} +void kokkosp_end_deep_copy() { g_timer.end_deep_copy(); } + +} // namespace DirectPower +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::DirectPower; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp b/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp new file mode 100644 index 000000000..94f8bcf0e --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_energy_consumption.cpp @@ -0,0 +1,533 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Energy Consumption Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_energy_consumption.cpp + * @brief Kokkos Energy Consumption Profiler Tool using NVML. + * + * This tool measures energy consumption by tracking the cumulative energy + * values from NVML at the beginning and end of kernels, regions, and deep + * copies. It does not use a background daemon since the energy consumption is a + * cumulative counter that can be read directly when events occur. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace EnergyConsumption { + +// --- Global State for the Profiler --- +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +static size_t g_device_count = 0; +static std::chrono::high_resolution_clock::time_point g_start_time; + +// Energy tracking structures +struct EnergySnapshot { + std::chrono::high_resolution_clock::time_point timestamp; + std::vector + device_energies_joules; // Energy for each device in Joules +}; + +struct KernelEnergyRecord { + std::string name; + uint32_t devID; + uint64_t kID; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +struct RegionEnergyRecord { + std::string name; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +struct DeepCopyEnergyRecord { + std::string dst_name; + std::string src_name; + uint64_t size; + EnergySnapshot start_energy; + EnergySnapshot end_energy; + double duration_seconds; + std::vector energy_consumed_joules; // Per device +}; + +// Storage for energy records +static std::vector g_kernel_energy_records; +static std::vector g_region_energy_records; +static std::vector g_deep_copy_energy_records; +static std::mutex g_energy_mutex; + +// Stack for nested regions +static std::stack> g_region_stack; + +// Maps for tracking active kernels/deep copies +static std::unordered_map g_active_kernels; +static std::pair g_active_deep_copy = {false, {}}; + +/** + * @brief Captures a snapshot of current energy consumption for all devices. + */ +EnergySnapshot capture_energy_snapshot() { + EnergySnapshot snapshot; + snapshot.timestamp = std::chrono::high_resolution_clock::now(); + snapshot.device_energies_joules.reserve(g_device_count); + + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + // Fill with invalid values + for (size_t i = 0; i < g_device_count; ++i) { + snapshot.device_energies_joules.push_back(-1.0); + } + return snapshot; + } + + // Collect energy for each device + for (size_t i = 0; i < g_device_count; ++i) { + double energy = g_nvml_provider->get_current_energy_consumption(i); + snapshot.device_energies_joules.push_back(energy); + } + + return snapshot; +} + +/** + * @brief Calculates energy consumed between two snapshots. + */ +std::vector calculate_energy_delta(const EnergySnapshot& start, + const EnergySnapshot& end) { + std::vector delta(g_device_count, 0.0); + + for (size_t i = 0; i < g_device_count; ++i) { + if (i < start.device_energies_joules.size() && + i < end.device_energies_joules.size() && + start.device_energies_joules[i] >= 0 && + end.device_energies_joules[i] >= 0) { + delta[i] = + end.device_energies_joules[i] - start.device_energies_joules[i]; + // Handle potential counter reset (though rare) + if (delta[i] < 0) { + delta[i] = 0; // Reset occurred, use 0 as approximation + } + } else { + delta[i] = -1.0; // Invalid measurement + } + } + + return delta; +} + +/** + * @brief Calculates duration in seconds between two snapshots. + */ +double calculate_duration_seconds(const EnergySnapshot& start, + const EnergySnapshot& end) { + return std::chrono::duration(end.timestamp - start.timestamp).count(); +} + +void export_energy_consumption_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + + // Write kernels + file << "type,name,duration_seconds"; + for (size_t i = 0; i < g_device_count; ++i) { + file << ",device_" << i << "_energy_joules"; + } + file << "\n"; + + for (const auto& record : g_kernel_energy_records) { + file << "kernel," << record.name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + for (const auto& record : g_region_energy_records) { + file << "region," << record.name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + for (const auto& record : g_deep_copy_energy_records) { + std::string name = record.src_name + "_to_" + record.dst_name + "_size_" + + std::to_string(record.size); + file << "deepcopy," << name << "," << record.duration_seconds; + for (size_t i = 0; i < g_device_count; ++i) { + if (i < record.energy_consumed_joules.size()) { + file << "," << record.energy_consumed_joules[i]; + } else { + file << ",-1"; + } + } + file << "\n"; + } + + file.close(); + std::cout << "Energy consumption data exported to " << filename << std::endl; +} + +void print_energy_summary() { + std::cout << "\n==== Energy Consumption Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(4); + + // Calculate total energy per device + std::vector total_kernel_energy(g_device_count, 0.0); + std::vector total_region_energy(g_device_count, 0.0); + std::vector total_deepcopy_energy(g_device_count, 0.0); + + for (const auto& record : g_kernel_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_kernel_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + for (const auto& record : g_region_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_region_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + for (const auto& record : g_deep_copy_energy_records) { + for (size_t i = 0; + i < g_device_count && i < record.energy_consumed_joules.size(); ++i) { + if (record.energy_consumed_joules[i] >= 0) { + total_deepcopy_energy[i] += record.energy_consumed_joules[i]; + } + } + } + + std::cout << "Number of Kernels: " << g_kernel_energy_records.size() + << "\n"; + std::cout << "Number of Regions: " << g_region_energy_records.size() + << "\n"; + std::cout << "Number of Deep Copies: " + << g_deep_copy_energy_records.size() << "\n"; + std::cout << "Number of Devices: " << g_device_count << "\n"; + std::cout << "--------------------------------------------\n"; + + for (size_t dev = 0; dev < g_device_count; ++dev) { + std::cout << "Device " << dev << " (" + << g_nvml_provider->get_device_name(dev) << "):\n"; + std::cout << " Total Kernel Energy: " << total_kernel_energy[dev] + << " J\n"; + std::cout << " Total Region Energy: " << total_region_energy[dev] + << " J\n"; + std::cout << " Total Deep Copy Energy: " << total_deepcopy_energy[dev] + << " J\n"; + std::cout << " Total Energy: " + << (total_kernel_energy[dev] + total_region_energy[dev] + + total_deepcopy_energy[dev]) + << " J\n"; + std::cout << "--------------------------------------------\n"; + } +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Energy Consumption Profiler: Initializing...\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Energy " + "consumption profiling disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + g_device_count = g_nvml_provider->get_device_count(); + std::cout << "SUCCESS: NVML provider initialized with " << g_device_count + << " device(s).\n"; + + // Print device information + for (size_t i = 0; i < g_device_count; ++i) { + std::cout << " Device " << i << ": " << g_nvml_provider->get_device_name(i) + << std::endl; + } + + g_start_time = std::chrono::high_resolution_clock::now(); + std::cout << "SUCCESS: Energy consumption monitoring initialized.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Energy Consumption Profiler: Finalizing...\n"; + + // Finalize the timer + g_timer.finalize_library(); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + + print_energy_summary(); + + std::string prefix = generate_prefix(); + + // Export energy data + std::string csv_filename = prefix + "_nvml_energy_consumption.csv"; + std::cout << "Exporting energy consumption data to " << csv_filename + << "...\n"; + export_energy_consumption_csv(csv_filename); + + // Export timing data + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer and Energy Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); + + // Capture energy snapshot + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_for(uint64_t kID) { + g_timer.end_parallel_for(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); + + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_scan(uint64_t kID) { + g_timer.end_parallel_scan(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); + + KernelEnergyRecord record; + record.name = name; + record.devID = devID; + record.kID = *kID; + record.start_energy = capture_energy_snapshot(); + + std::lock_guard lock(g_energy_mutex); + g_active_kernels[*kID] = record; +} + +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); + + std::lock_guard lock(g_energy_mutex); + auto it = g_active_kernels.find(kID); + if (it != g_active_kernels.end()) { + it->second.end_energy = capture_energy_snapshot(); + it->second.duration_seconds = calculate_duration_seconds( + it->second.start_energy, it->second.end_energy); + it->second.energy_consumed_joules = + calculate_energy_delta(it->second.start_energy, it->second.end_energy); + + g_kernel_energy_records.push_back(it->second); + g_active_kernels.erase(it); + } +} + +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); + + EnergySnapshot snapshot = capture_energy_snapshot(); + g_region_stack.push({std::string(regionName), snapshot}); +} + +void kokkosp_pop_profile_region() { + g_timer.pop_profile_region(); + + if (!g_region_stack.empty()) { + auto [name, start_energy] = g_region_stack.top(); + g_region_stack.pop(); + + RegionEnergyRecord record; + record.name = name; + record.start_energy = start_energy; + record.end_energy = capture_energy_snapshot(); + record.duration_seconds = + calculate_duration_seconds(record.start_energy, record.end_energy); + record.energy_consumed_joules = + calculate_energy_delta(record.start_energy, record.end_energy); + + std::lock_guard lock(g_energy_mutex); + g_region_energy_records.push_back(record); + } +} + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); + + std::lock_guard lock(g_energy_mutex); + if (!g_active_deep_copy.first) { + g_active_deep_copy.second.dst_name = dst_name ? dst_name : "unknown"; + g_active_deep_copy.second.src_name = src_name ? src_name : "unknown"; + g_active_deep_copy.second.size = size; + g_active_deep_copy.second.start_energy = capture_energy_snapshot(); + g_active_deep_copy.first = true; + } +} + +void kokkosp_end_deep_copy() { + g_timer.end_deep_copy(); + + std::lock_guard lock(g_energy_mutex); + if (g_active_deep_copy.first) { + g_active_deep_copy.second.end_energy = capture_energy_snapshot(); + g_active_deep_copy.second.duration_seconds = + calculate_duration_seconds(g_active_deep_copy.second.start_energy, + g_active_deep_copy.second.end_energy); + g_active_deep_copy.second.energy_consumed_joules = + calculate_energy_delta(g_active_deep_copy.second.start_energy, + g_active_deep_copy.second.end_energy); + + g_deep_copy_energy_records.push_back(g_active_deep_copy.second); + g_active_deep_copy.first = false; + } +} + +} // namespace EnergyConsumption +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::EnergyConsumption; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_nvml_power.cpp b/profiling/energy-profiler/kokkos/kp_nvml_power.cpp new file mode 100644 index 000000000..cedbfa157 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_nvml_power.cpp @@ -0,0 +1,288 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos Power Profiler +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_nvml_power_tool.cpp + * @brief Kokkos Power Profiler Tool using NVML. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the NVML library. It starts monitoring when the Kokkos + * library is initialized and prints a detailed power profile upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace Power { + +// --- Configuration --- +// The interval in milliseconds for power sampling. +constexpr int SAMPLING_INTERVAL_MS = 20; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_nvml_provider; + +// Timer tool for kernel and region timing +static KernelTimerTool g_timer; + +// Structure to store a single power measurement with a timestamp. +struct PowerSample { + std::chrono::high_resolution_clock::time_point timestamp; + double power_watts; +}; + +// Thread-safe storage for collected power samples. +static std::vector g_power_samples; +static std::mutex g_samples_mutex; +static std::chrono::high_resolution_clock::time_point g_start_time; + +/** + * @brief The function executed by the daemon thread to sample power. + * + * This function is called periodically. It fetches the current total power + * usage from the NVML provider and stores it with a timestamp. + */ +void power_monitoring_tick() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back( + {std::chrono::high_resolution_clock::now(), current_power}); +} + +/** + * @brief Calculates statistics from the collected power samples. + * + * @param samples A constant reference to the vector of power samples. + * @param[out] avg_power Average power consumption. + * @param[out] min_power Minimum power consumption. + * @param[out] max_power Maximum power consumption. + * @param[out] total_energy Total energy consumed in Joules. + */ +void analyze_power_data(const std::vector& samples, + double& avg_power, double& min_power, double& max_power, + double& total_energy) { + if (samples.empty()) { + avg_power = min_power = max_power = total_energy = 0.0; + return; + } + + min_power = samples[0].power_watts; + max_power = samples[0].power_watts; + double power_sum = 0.0; + total_energy = 0.0; + + for (size_t i = 0; i < samples.size(); ++i) { + const double power = samples[i].power_watts; + power_sum += power; + if (power < min_power) min_power = power; + if (power > max_power) max_power = power; + + // Energy = Power * Time. Time delta is from the previous sample. + if (i > 0) { + double time_delta_s = std::chrono::duration( + samples[i].timestamp - samples[i - 1].timestamp) + .count(); + total_energy += samples[i - 1].power_watts * time_delta_s; + } + } + + avg_power = power_sum / samples.size(); +} + +void export_power_data_csv(const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + file << "timestamp,power_watts\n"; + for (const auto& sample : g_power_samples) { + auto timestamp = std::chrono::duration_cast( + sample.timestamp.time_since_epoch()) + .count(); + file << timestamp << "," << sample.power_watts << "\n"; + } + file.close(); + std::cout << "Power data exported to " << filename << std::endl; +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + std::cout << "Kokkos Power Profiler: Initializing...\n"; + std::cout << "Sampling Interval: " << SAMPLING_INTERVAL_MS << " ms\n"; + + // Initialize the timer tool + g_timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); + + g_nvml_provider = std::make_unique(); + if (!g_nvml_provider->initialize()) { + std::cerr << "ERROR: Failed to initialize NVML provider. Power profiling " + "disabled.\n"; + g_nvml_provider.reset(); // Release the provider + return; + } + + std::cout << "SUCCESS: NVML provider initialized with " + << g_nvml_provider->get_device_count() << " device(s).\n"; + + // Start the monitoring daemon + g_power_daemon = + std::make_unique(power_monitoring_tick, SAMPLING_INTERVAL_MS); + g_start_time = std::chrono::high_resolution_clock::now(); + g_power_daemon->start(); + std::cout << "SUCCESS: Power monitoring daemon started.\n"; +} + +void kokkosp_finalize_library() { + std::cout << "\nKokkos Power Profiler: Finalizing...\n"; + + if (g_power_daemon) { + g_power_daemon->stop(); + std::cout << "SUCCESS: Power monitoring daemon stopped.\n"; + } + + // Finalize the timer + g_timer.finalize_library(); + + // Make a copy of the samples to avoid holding the lock during analysis + std::vector samples_copy; + { + std::lock_guard lock(g_samples_mutex); + samples_copy = g_power_samples; + } + + if (samples_copy.empty()) { + std::cout << "No power samples collected.\n"; + } else { + auto end_time = std::chrono::high_resolution_clock::now(); + auto total_duration_s = + std::chrono::duration(end_time - g_start_time).count(); + + double avg_power, min_power, max_power, total_energy; + analyze_power_data(samples_copy, avg_power, min_power, max_power, + total_energy); + + std::cout << "\n==== Power Profile Summary ====\n"; + std::cout << std::fixed << std::setprecision(2); + std::cout << "Total Monitoring Duration: " << total_duration_s << " s\n"; + std::cout << "Samples Collected: " << samples_copy.size() << "\n"; + std::cout << "---------------------------------\n"; + std::cout << "Average Power: " << avg_power << " W\n"; + std::cout << "Minimum Power: " << min_power << " W\n"; + std::cout << "Maximum Power: " << max_power << " W\n"; + std::cout << "Total Energy Consumed: " << total_energy << " J\n"; + std::cout << "===============================\n"; + + std::string csv_filename = generate_prefix() + "_nvml_power_samples.csv"; + std::cout << "Exporting power data to " << csv_filename << "...\n"; + export_power_data_csv(csv_filename); + } + + std::string prefix = generate_prefix(); + + const auto& kernels = g_timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = g_timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = g_timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); + + if (g_nvml_provider) { + g_nvml_provider->finalize(); + std::cout << "SUCCESS: NVML provider finalized.\n"; + } +} + +// --- Hook Implementations with Timer Integration --- +void kokkosp_begin_parallel_for(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_for(name, devID, *kID); +} +void kokkosp_end_parallel_for(uint64_t kID) { g_timer.end_parallel_for(kID); } +void kokkosp_begin_parallel_scan(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_scan(name, devID, kID); +} +void kokkosp_end_parallel_scan(uint64_t kID) { g_timer.end_parallel_scan(kID); } +void kokkosp_begin_parallel_reduce(const char* name, uint32_t devID, + uint64_t* kID) { + g_timer.begin_parallel_reduce(name, devID, kID); +} +void kokkosp_end_parallel_reduce(uint64_t kID) { + g_timer.end_parallel_reduce(kID); +} +void kokkosp_push_profile_region(const char* regionName) { + g_timer.push_profile_region(regionName); +} +void kokkosp_pop_profile_region() { g_timer.pop_profile_region(); } +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + g_timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} +void kokkosp_end_deep_copy() { g_timer.end_deep_copy(); } + +} // namespace Power +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::Power; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/energy-profiler/kokkos/kp_variorum_power.cpp b/profiling/energy-profiler/kokkos/kp_variorum_power.cpp new file mode 100644 index 000000000..8e5e47b99 --- /dev/null +++ b/profiling/energy-profiler/kokkos/kp_variorum_power.cpp @@ -0,0 +1,299 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/** + * @file kp_variorum_power_tool.cpp + * @brief Kokkos Power Profiler Tool using Variorum. + * + * This tool leverages a background daemon to periodically sample GPU power + * consumption using the Variorum library via a provider interface. It starts + * monitoring when the Kokkos library is initialized and writes detailed + * power profiles to CSV files upon finalization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "../provider/provider_variorum.hpp" +#include "../common/daemon.hpp" +#include "../common/filename_prefix.hpp" +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +namespace KokkosTools { +namespace VariorumPower { + +KernelTimerTool timer; + +// --- Data Structures for Self-Contained Management --- + +struct PowerDataPoint { + int64_t timestamp_ns; + double power_watts; +}; + +// --- Global State for the Profiler --- +static std::unique_ptr g_power_daemon; +static std::unique_ptr g_variorum_provider; +static std::mutex g_data_mutex; // Mutex for all data collections +static std::chrono::high_resolution_clock::time_point g_start_time; + +// Data Collections +static std::vector g_power_data; + +// --- Helper Functions --- + +// Get current time in nanoseconds since epoch +int64_t get_current_epoch_ns() { + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); +} + +void write_power_data_to_csv(const std::string& filename) { + std::ofstream outfile(filename); + if (!outfile.is_open()) { + std::cerr << "KokkosP Variorum Power: Could not open file for writing: " + << filename << "\n"; + return; + } + + outfile << "timestamp_nanoseconds,power_watts\n"; + std::lock_guard lock(g_data_mutex); + for (const auto& point : g_power_data) { + outfile << point.timestamp_ns << "," << std::fixed << std::setprecision(3) + << point.power_watts << "\n"; + } + printf("KokkosP Variorum Power: Wrote power data to %s\n", filename.c_str()); +} + +// --- Monitoring Function (for Daemon) --- + +void variorum_power_monitoring_tick() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + return; + } + + double current_power_W = g_variorum_provider->get_total_power_usage(); + int64_t timestamp_ns = get_current_epoch_ns(); + + std::lock_guard lock(g_data_mutex); + g_power_data.push_back({timestamp_ns, current_power_W}); +} + +// --- Kokkos Profiling Hooks --- + +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + printf( + "======================================================================" + "\n"); + printf("KokkosP: Variorum Power Profiler Initialized\n"); + printf("KokkosP: Sequence: %d, Interface Version: %llu, Devices: %u\n", + loadSeq, (unsigned long long)interfaceVer, devInfoCount); + printf( + "======================================================================" + "\n"); + + g_start_time = std::chrono::high_resolution_clock::now(); + + g_variorum_provider = std::make_unique(); + if (!g_variorum_provider->initialize()) { + std::cerr << "KokkosP Variorum Power: Failed to initialize Variorum, power " + "monitoring disabled\n"; + g_variorum_provider.reset(); + return; + } + + int interval_ms = 20; + if (const char* interval_env = + std::getenv("KOKKOS_VARIORUM_POWER_INTERVAL")) { + try { + interval_ms = std::stoi(interval_env); + if (interval_ms <= 0) { + interval_ms = 20; + throw std::invalid_argument("Interval must be positive"); + } + printf("KokkosP Variorum Power: Using custom interval: %d ms\n", + interval_ms); + } catch (const std::exception& e) { + printf( + "KokkosP Variorum Power: Invalid interval value, using default " + "20ms\n"); + } + } else { + printf("KokkosP Variorum Power: Using default interval: 20 ms\n"); + } + + g_power_daemon = std::make_unique( + std::function(variorum_power_monitoring_tick), interval_ms); + g_power_daemon->start(); + printf("KokkosP Variorum Power: Power monitoring started\n"); + + timer.init_library(loadSeq, interfaceVer, devInfoCount, deviceInfo); +} + +void kokkosp_finalize_library() { + auto end_time = std::chrono::high_resolution_clock::now(); + + printf( + "======================================================================" + "\n"); + printf("KokkosP: Variorum Power Profiler Finalization\n"); + + if (g_power_daemon && g_power_daemon->is_running()) { + g_power_daemon->stop(); + printf("KokkosP Variorum Power: Power monitoring stopped\n"); + } + + auto duration = std::chrono::duration_cast( + end_time - g_start_time); + double elapsed_seconds = duration.count() / 1000.0; + + printf("KokkosP Variorum Power: Total execution time: %.3f seconds\n", + elapsed_seconds); + + auto power_filename = generate_prefix() + "_variorum_power_samples.csv"; + write_power_data_to_csv(power_filename); + + if (g_variorum_provider) { + g_variorum_provider->finalize(); + } + printf( + "======================================================================" + "\n"); + + timer.finalize_library(); + + std::string prefix = generate_prefix(); + + const auto& kernels = timer.get_kernel_timings(); + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::export_kernels_csv(kernels, prefix + "_kernels.csv"); + + const auto& regions = timer.get_region_timings(); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::export_regions_csv(regions, prefix + "_regions.csv"); + + const auto& deepcopies = timer.get_deep_copy_timings(); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, + prefix + "_deepcopies.csv"); +} + +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_for(name, devID, *kID); +} + +void kokkosp_end_parallel_for(const uint64_t kID) { + timer.end_parallel_for(kID); +} + +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_scan(name, devID, kID); +} + +void kokkosp_end_parallel_scan(const uint64_t kID) { + timer.end_parallel_scan(kID); +} + +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + timer.begin_parallel_reduce(name, devID, kID); +} + +void kokkosp_end_parallel_reduce(const uint64_t kID) { + timer.end_parallel_reduce(kID); +} + +void kokkosp_push_profile_region(char const* regionName) { + timer.push_profile_region(regionName); +} + +void kokkosp_pop_profile_region() { timer.pop_profile_region(); } + +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + timer.begin_deep_copy(dst_handle, dst_name, dst_ptr, src_handle, src_name, + src_ptr, size); +} + +void kokkosp_end_deep_copy() { timer.end_deep_copy(); } + +// --- Event Set Configuration --- + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace VariorumPower +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::VariorumPower; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) +} \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_nvml.cpp b/profiling/energy-profiler/provider/provider_nvml.cpp new file mode 100644 index 000000000..d5f35e6cd --- /dev/null +++ b/profiling/energy-profiler/provider/provider_nvml.cpp @@ -0,0 +1,219 @@ +#include "provider_nvml.hpp" +#include +#include +#include + +NVMLProvider::NVMLProvider() : initialized_(false) {} + +NVMLProvider::~NVMLProvider() { + if (initialized_) { + finalize(); + } +} + +bool NVMLProvider::initialize() { + if (initialized_) { + return true; + } + + // Initialize NVML + nvmlReturn_t result = nvmlInit(); + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to initialize NVML: " + << nvmlErrorString(result) << std::endl; + return false; + } + + // Discover devices + if (!discover_devices()) { + nvmlShutdown(); + return false; + } + + initialized_ = true; + std::cout << "NVML Provider: Successfully initialized with " + << devices_.size() << " device(s)" << std::endl; + + return true; +} + +void NVMLProvider::finalize() { + if (!initialized_) { + return; + } + + cleanup_devices(); + nvmlShutdown(); + initialized_ = false; + + std::cout << "NVML Provider: Finalized" << std::endl; +} + +double NVMLProvider::get_total_power_usage() { + if (!initialized_) { + return 0.0; + } + + double total_power_W = 0.0; + + for (size_t i = 0; i < devices_.size(); ++i) { + double device_power = get_device_power_usage(i); + if (device_power >= 0.0) { + total_power_W += device_power; + } + } + + return total_power_W; +} + +double NVMLProvider::get_device_power_usage(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + unsigned int power_mW = 0; + nvmlReturn_t result = + nvmlDeviceGetPowerUsage(devices_[device_index], &power_mW); + + if (result == NVML_SUCCESS) { + // Convert from milliwatts to watts + return static_cast(power_mW) / 1000.0; + } else { + std::cerr << "NVML Provider: Failed to get power usage for device " + << device_index << ": " << nvmlErrorString(result) << std::endl; + return -1.0; + } +} + +double NVMLProvider::get_device_power_usage_direct(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + nvmlFieldValue_t powerFieldNow; + powerFieldNow.fieldId = NVML_FI_DEV_POWER_INSTANT; + if (nvmlDeviceGetFieldValues(devices_[device_index], 1, &powerFieldNow) != + NVML_SUCCESS) { + std::cerr << "NVML power read failed — stopping measurement.\n"; + return -1.0; + } + unsigned int pw = static_cast(powerFieldNow.value.uiVal); + // Convert from milliwatts to watts + return static_cast(pw) / 1000.0; +} + +double NVMLProvider::get_current_energy_consumption(size_t device_index) { + if (!initialized_ || device_index >= devices_.size()) { + return -1.0; + } + + if (devices_[device_index] == nullptr) { + return -1.0; + } + + unsigned long long energy_joules = 0; + nvmlReturn_t result = nvmlDeviceGetTotalEnergyConsumption( + devices_[device_index], &energy_joules); + + if (result == NVML_SUCCESS) { + // Convert from millijoules to joules + return static_cast(energy_joules) / 1000.0; + } else { + std::cerr << "NVML Provider: Failed to get energy consumption for device " + << device_index << ": " << nvmlErrorString(result) << std::endl; + return -1.0; + } +} + +size_t NVMLProvider::get_device_count() const { return devices_.size(); } + +std::string NVMLProvider::get_device_name(size_t device_index) const { + if (device_index >= device_names_.size()) { + return "Unknown Device"; + } + return device_names_[device_index]; +} + +bool NVMLProvider::discover_devices() { + unsigned int device_count; + nvmlReturn_t result = nvmlDeviceGetCount(&device_count); + + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to get device count: " + << nvmlErrorString(result) << std::endl; + return false; + } + + if (device_count == 0) { + std::cerr << "NVML Provider: No NVIDIA devices found" << std::endl; + return false; + } + + devices_.resize(device_count); + device_names_.resize(device_count); + + std::cout << "NVML Provider: Found " << device_count << " NVIDIA device(s)" + << std::endl; + + for (unsigned int i = 0; i < device_count; ++i) { + result = nvmlDeviceGetHandleByIndex(i, &devices_[i]); + if (NVML_SUCCESS != result) { + std::cerr << "NVML Provider: Failed to get handle for device " << i + << std::endl; + devices_[i] = nullptr; + device_names_[i] = "Failed Device"; + continue; + } + + // Get device name + char device_name[NVML_DEVICE_NAME_BUFFER_SIZE]; + result = nvmlDeviceGetName(devices_[i], device_name, + NVML_DEVICE_NAME_BUFFER_SIZE); + if (NVML_SUCCESS == result) { + device_names_[i] = std::string(device_name); + std::cout << "NVML Provider: Device " << i << ": " << device_name + << std::endl; + } else { + device_names_[i] = "Unknown Device " + std::to_string(i); + } + + // Check power management capability + nvmlEnableState_t pmmode; + result = nvmlDeviceGetPowerManagementMode(devices_[i], &pmmode); + if (NVML_SUCCESS == result && pmmode == NVML_FEATURE_ENABLED) { + std::cout << "NVML Provider: Device " << i << ": Power management enabled" + << std::endl; + } else { + std::cout << "NVML Provider: Device " << i + << ": Power management disabled or not supported" << std::endl; + } + + // Test power usage reading + unsigned int test_power_mW = 0; + result = nvmlDeviceGetPowerUsage(devices_[i], &test_power_mW); + if (NVML_SUCCESS == result) { + std::cout << "NVML Provider: Device " << i + << ": Current power usage: " << (test_power_mW / 1000.0) << " W" + << std::endl; + } else { + std::cout << "NVML Provider: Device " << i + << ": Power usage reading failed: " << nvmlErrorString(result) + << std::endl; + } + } + + return true; +} + +void NVMLProvider::cleanup_devices() { + devices_.clear(); + device_names_.clear(); +} \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_nvml.hpp b/profiling/energy-profiler/provider/provider_nvml.hpp new file mode 100644 index 000000000..488f6e68e --- /dev/null +++ b/profiling/energy-profiler/provider/provider_nvml.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +/** + * NVML Power Provider + * Simplified power monitoring using nvmlDeviceGetPowerUsage() + */ +class NVMLProvider { + public: + NVMLProvider(); + ~NVMLProvider(); + + // Initialize NVML and discover devices + bool initialize(); + + // Cleanup NVML resources + void finalize(); + + // Get current power consumption in Watts for all devices + double get_total_power_usage(); + + // Get power usage for a specific device + double get_device_power_usage(size_t device_index); // unit: Watts + + double get_device_power_usage_direct(size_t device_index); // unit: Watts + + double get_current_energy_consumption(size_t device_index); // unit: Joules + + // Get number of available devices + size_t get_device_count() const; + + // Get device name + std::string get_device_name(size_t device_index) const; + + // Check if provider is initialized + bool is_initialized() const { return initialized_; } + + private: + bool initialized_; + std::vector devices_; + std::vector device_names_; + + // Helper methods + bool discover_devices(); + void cleanup_devices(); +}; \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_variorum.cpp b/profiling/energy-profiler/provider/provider_variorum.cpp new file mode 100644 index 000000000..a31f1eed2 --- /dev/null +++ b/profiling/energy-profiler/provider/provider_variorum.cpp @@ -0,0 +1,230 @@ +#include "provider_variorum.hpp" +#include +#include +#include + +VariorumProvider::VariorumProvider() : initialized_(false) {} + +VariorumProvider::~VariorumProvider() { + if (initialized_) { + finalize(); + } +} + +bool VariorumProvider::initialize() { + if (initialized_) { + return true; + } + + // Initialize Variorum (in the original code, this was a no-op) + // For now, we'll assume Variorum is available and working + + // Discover devices + if (!discover_devices()) { + return false; + } + + initialized_ = true; + std::cout << "Variorum Provider: Successfully initialized with " + << device_ids_.size() << " device(s)" << std::endl; + + return true; +} + +void VariorumProvider::finalize() { + if (!initialized_) { + return; + } + + cleanup_devices(); + initialized_ = false; + + std::cout << "Variorum Provider: Finalized" << std::endl; +} + +double VariorumProvider::get_total_power_usage() { + if (!initialized_) { + return 0.0; + } + + double total_power_W = 0.0; + std::map power_readings = get_current_power_readings(); + + for (const auto& [device_id, power] : power_readings) { + if (power >= 0.0) { + total_power_W += power; + } + } + + return total_power_W; +} + +double VariorumProvider::get_device_power_usage(size_t device_index) { + if (!initialized_ || device_index >= device_ids_.size()) { + return -1.0; + } + + uint32_t device_id = device_ids_[device_index]; + std::map power_readings = get_current_power_readings(); + + auto it = power_readings.find(device_id); + if (it != power_readings.end()) { + return it->second; + } + + return -1.0; +} + +size_t VariorumProvider::get_device_count() const { return device_ids_.size(); } + +std::string VariorumProvider::get_device_name(size_t device_index) const { + if (device_index >= device_names_.size()) { + return "Unknown Device"; + } + return device_names_[device_index]; +} + +bool VariorumProvider::discover_devices() { + std::set found_device_ids; + unique_json_ptr root = get_variorum_json_data(); + + if (!root) { + std::cerr << "Variorum Provider: Failed to get JSON data from Variorum" + << std::endl; + return false; + } + + // Parse JSON to find GPU devices + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + std::cerr << "Variorum Provider: No host object found in JSON" << std::endl; + return false; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + const char* key; + json_t* value; + json_object_foreach(power_gpu_watts, key, value) { + std::string s_key(key); + if (s_key.length() > 4 && s_key.substr(0, 4) == "GPU_") { + try { + uint32_t device_id = std::stoul(s_key.substr(4)); + found_device_ids.insert(device_id); + } catch (const std::exception& e) { + std::cerr << "Variorum Provider: Could not parse GPU ID from key: " + << s_key << " (" << e.what() << ")" << std::endl; + } + } + } + } + } + + if (found_device_ids.empty()) { + std::cerr << "Variorum Provider: No GPU devices found" << std::endl; + return false; + } + + // Store device information + device_ids_.clear(); + device_names_.clear(); + + for (uint32_t device_id : found_device_ids) { + device_ids_.push_back(device_id); + device_names_.push_back("GPU_" + std::to_string(device_id)); + + std::cout << "Variorum Provider: Found device " << device_ids_.size() - 1 + << ": GPU_" << device_id << std::endl; + } + + // Test initial power readings + std::cout << "Variorum Provider: Testing initial power readings..." + << std::endl; + std::map test_readings = get_current_power_readings(); + for (size_t i = 0; i < device_ids_.size(); ++i) { + uint32_t device_id = device_ids_[i]; + auto it = test_readings.find(device_id); + if (it != test_readings.end()) { + std::cout << "Variorum Provider: Device " << i + << ": Current power usage: " << it->second << " W" << std::endl; + } else { + std::cout << "Variorum Provider: Device " << i << ": Power reading failed" + << std::endl; + } + } + + return true; +} + +void VariorumProvider::cleanup_devices() { + device_ids_.clear(); + device_names_.clear(); +} + +VariorumProvider::unique_json_ptr VariorumProvider::get_variorum_json_data() + const { + char* json_string_c_raw = nullptr; + int variorum_error = variorum_get_power_json(&json_string_c_raw); + + if (variorum_error != 0) { + std::cerr + << "Variorum Provider: variorum_get_power_json() failed. Error code: " + << variorum_error << std::endl; + return unique_json_ptr(nullptr); + } + + unique_cstring json_string_c(json_string_c_raw); + + if (!json_string_c) { + std::cerr + << "Variorum Provider: variorum_get_power_json() returned success " + "but a null pointer." + << std::endl; + return unique_json_ptr(nullptr); + } + + json_error_t error; + json_t* root_ptr = json_loads(json_string_c.get(), 0, &error); + + if (!root_ptr) { + std::cerr << "Variorum Provider: Failed to parse JSON: " << error.text + << std::endl; + return unique_json_ptr(nullptr); + } + + return unique_json_ptr(root_ptr); +} + +std::map VariorumProvider::get_current_power_readings() + const { + std::map readings; + + unique_json_ptr root = get_variorum_json_data(); + if (!root) { + return readings; + } + + json_t* host_obj = json_object_iter_value(json_object_iter(root.get())); + if (!host_obj) { + return readings; + } + + json_t* socket_0 = json_object_get(host_obj, "socket_0"); + if (socket_0 && json_is_object(socket_0)) { + json_t* power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (power_gpu_watts && json_is_object(power_gpu_watts)) { + for (uint32_t device_id : device_ids_) { + std::string gpu_key = "GPU_" + std::to_string(device_id); + json_t* power_value = json_object_get(power_gpu_watts, gpu_key.c_str()); + + if (json_is_number(power_value)) { + readings[device_id] = json_number_value(power_value); + } + } + } + } + + return readings; +} \ No newline at end of file diff --git a/profiling/energy-profiler/provider/provider_variorum.hpp b/profiling/energy-profiler/provider/provider_variorum.hpp new file mode 100644 index 000000000..bdf470a07 --- /dev/null +++ b/profiling/energy-profiler/provider/provider_variorum.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include +#include +} + +class VariorumProvider { + public: + VariorumProvider(); + ~VariorumProvider(); + + // Core functionality + bool initialize(); + void finalize(); + bool is_initialized() const { return initialized_; } + + // Power monitoring + double get_total_power_usage(); + double get_device_power_usage(size_t device_index); + + // Device information + size_t get_device_count() const; + std::string get_device_name(size_t device_index) const; + + private: + struct JsonDeleter { + void operator()(json_t* json) const { + if (json) json_decref(json); + } + }; + using unique_json_ptr = std::unique_ptr; + + struct CFreeDeleter { + void operator()(char* ptr) const { + if (ptr) free(ptr); + } + }; + using unique_cstring = std::unique_ptr; + + // Internal methods + bool discover_devices(); + void cleanup_devices(); + unique_json_ptr get_variorum_json_data() const; + std::map get_current_power_readings() const; + + // Member variables + bool initialized_; + std::vector device_ids_; + std::vector device_names_; +}; \ No newline at end of file diff --git a/profiling/energy-profiler/tests/CMakeLists.txt b/profiling/energy-profiler/tests/CMakeLists.txt new file mode 100644 index 000000000..da391e9e9 --- /dev/null +++ b/profiling/energy-profiler/tests/CMakeLists.txt @@ -0,0 +1,152 @@ +# Tests for Energy Profiler + +enable_testing() + +# Find Threads package for pthread support (needed by std::thread in daemon.cpp) +find_package(Threads REQUIRED) + +# Daemon test +add_executable(daemon_test + daemon_test.cpp + ../common/daemon.cpp +) + +target_include_directories(daemon_test PRIVATE + ../common +) + +target_link_libraries(daemon_test PRIVATE Threads::Threads) + +add_test(NAME daemon_test COMMAND daemon_test) + +# Find CUDA Toolkit and NVML for NVML tests +find_package(CUDAToolkit QUIET) +if(CUDAToolkit_FOUND) + find_package(CUDA::nvml QUIET) + if(TARGET CUDA::nvml) + message(STATUS "Found CUDA NVML, making NVML tests available.") + + # NVML Provider test + add_executable(nvml_provider_test + nvml_provider_test.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(nvml_provider_test PRIVATE + ../provider + ) + + target_link_libraries(nvml_provider_test PRIVATE CUDA::nvml) + add_test(NAME nvml_provider_test COMMAND nvml_provider_test) + + # Daemon NVML Integration test + add_executable(daemon_nvml_integration_test + daemon_nvml_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(daemon_nvml_integration_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_nvml_integration_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_integration_test COMMAND daemon_nvml_integration_test) + + # Fast Daemon NVML Integration test (20ms sampling) + add_executable(daemon_nvml_fast_test + daemon_nvml_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_nvml.cpp + ) + + target_include_directories(daemon_nvml_fast_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_nvml_fast_test PRIVATE CUDA::nvml Threads::Threads) + add_test(NAME daemon_nvml_fast_test COMMAND daemon_nvml_fast_test) + else() + message(STATUS "CUDA::nvml target not found. NVML tests will be skipped.") + endif() +else() + message(STATUS "CUDAToolkit not found, NVML tests will be skipped.") +endif() + +# Variorum Provider test +if(KOKKOSTOOLS_HAS_VARIORUM) + message(STATUS "Using Variorum configuration from root CMake, making Variorum tests available.") + + add_executable(variorum_provider_test + variorum_provider_test.cpp + ../provider/provider_variorum.cpp + ) + + target_include_directories(variorum_provider_test PRIVATE + ../provider + ) + + target_link_libraries(variorum_provider_test PRIVATE variorum::variorum) + add_test(NAME variorum_provider_test COMMAND variorum_provider_test) + + # Daemon Variorum Integration test + add_executable(daemon_variorum_integration_test + daemon_variorum_integration_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp + ) + + target_include_directories(daemon_variorum_integration_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_variorum_integration_test PRIVATE variorum::variorum Threads::Threads) + add_test(NAME daemon_variorum_integration_test COMMAND daemon_variorum_integration_test) + + # Fast Daemon Variorum Integration test + add_executable(daemon_variorum_fast_test + daemon_variorum_fast_test.cpp + ../common/daemon.cpp + ../provider/provider_variorum.cpp + ) + + target_include_directories(daemon_variorum_fast_test PRIVATE + ../common + ../provider + ) + + target_link_libraries(daemon_variorum_fast_test PRIVATE variorum::variorum Threads::Threads) + add_test(NAME daemon_variorum_fast_test COMMAND daemon_variorum_fast_test) +else() + message(STATUS "Variorum not available from root CMake, Variorum tests will be skipped.") +endif() + +add_executable(timer_test + timer_test.cpp + ../common/timer.cpp +) + +target_include_directories(timer_test PRIVATE + ../common + ../tools +) + +add_test(NAME timer_test COMMAND timer_test) + +# CSV export test +add_executable(csv_export_test + csv_export_test.cpp + ../common/timer.cpp + ../common/filename_prefix.cpp + ../tools/kernel_timer_tool.cpp +) + +target_include_directories(csv_export_test PRIVATE + ../common + ../tools +) + +add_test(NAME csv_export_test COMMAND csv_export_test) \ No newline at end of file diff --git a/profiling/energy-profiler/tests/csv_export_test.cpp b/profiling/energy-profiler/tests/csv_export_test.cpp new file mode 100644 index 000000000..366f33795 --- /dev/null +++ b/profiling/energy-profiler/tests/csv_export_test.cpp @@ -0,0 +1,68 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include +#include "../common/timer.hpp" +#include "../tools/kernel_timer_tool.hpp" + +int main() { + std::cout << "Testing CSV export functions..." << std::endl; + + KernelTimerTool timer; + + // Simulate some kernel operations + timer.start_region("test_kernel_1", RegionType::ParallelFor, 1); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.end_region(); + + timer.start_region("test_kernel_2", RegionType::ParallelReduce, 2); + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + timer.end_region(); + + timer.start_region("test_region", RegionType::UserRegion, 3); + std::this_thread::sleep_for(std::chrono::milliseconds(15)); + timer.end_region(); + + timer.start_region("test_deepcopy", RegionType::DeepCopy, 4); + std::this_thread::sleep_for(std::chrono::milliseconds(8)); + timer.end_region(); + + // Test the CSV export functions + const auto& kernels = timer.get_kernel_timings(); + const auto& regions = timer.get_region_timings(); + const auto& deepcopies = timer.get_deep_copy_timings(); + + std::cout << "Found " << kernels.size() << " kernels" << std::endl; + std::cout << "Found " << regions.size() << " regions" << std::endl; + std::cout << "Found " << deepcopies.size() << " deep copies" << std::endl; + + // Test export functions + KokkosTools::Timer::export_kernels_csv(kernels, "test_kernels.csv"); + KokkosTools::Timer::export_regions_csv(regions, "test_regions.csv"); + KokkosTools::Timer::export_deepcopies_csv(deepcopies, "test_deepcopies.csv"); + + // Test print functions + KokkosTools::Timer::print_kernels_summary(kernels); + KokkosTools::Timer::print_regions_summary(regions); + KokkosTools::Timer::print_deepcopies_summary(deepcopies); + + std::cout << "CSV export test completed successfully!" << std::endl; + + return 0; +} diff --git a/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp b/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp new file mode 100644 index 000000000..20a62b864 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_nvml_fast_test.cpp @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" + +// Global variables for the monitoring function +static NVMLProvider* g_nvml_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_min_power{std::numeric_limits::max()}; +static std::atomic g_max_power{0.0}; +static std::vector g_power_samples; +static std::mutex g_samples_mutex; + +void fast_power_monitoring_function() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + + // Update statistics atomically + g_sample_count.fetch_add(1); + + // Accumulate energy (Power * Time) + // Since we sample every 20ms, energy increment = power * 0.02 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 0.02)) { + // Loop until successful update + } + + // Update min power + double current_min = g_min_power.load(); + while (current_power < current_min && + !g_min_power.compare_exchange_weak(current_min, current_power)) { + // Loop until successful update + } + + // Update max power + double current_max = g_max_power.load(); + while (current_power > current_max && + !g_max_power.compare_exchange_weak(current_max, current_power)) { + // Loop until successful update + } + + // Store sample for statistical analysis (thread-safe) + { + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(current_power); + } +} + +double calculate_standard_deviation(const std::vector& samples, + double mean) { + if (samples.size() <= 1) return 0.0; + + double sum_squared_diff = 0.0; + for (double sample : samples) { + double diff = sample - mean; + sum_squared_diff += diff * diff; + } + + return std::sqrt(sum_squared_diff / (samples.size() - 1)); +} + +bool test_daemon_nvml_fast_integration() { + std::cout << "=== Fast Daemon + NVML Integration Test (20ms sampling) ===" + << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_min_power = std::numeric_limits::max(); + g_max_power = 0.0; + g_power_samples.clear(); + + // Initialize NVML provider + std::cout << "\n1. Initializing NVML provider..." << std::endl; + NVMLProvider nvml_provider; + if (!nvml_provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return false; + } + + g_nvml_provider = &nvml_provider; + std::cout << "SUCCESS: NVML provider initialized with " + << nvml_provider.get_device_count() << " device(s)" << std::endl; + + // Create daemon with 20ms interval + std::cout << "\n2. Creating daemon with 20ms monitoring interval..." + << std::endl; + Daemon power_daemon(fast_power_monitoring_function, 20); + + // Start monitoring + std::cout << "\n3. Starting fast power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Fast power monitoring started" << std::endl; + + // Let it run for 10 seconds + std::cout << "\n4. Monitoring for 2 seconds (high frequency sampling)..." + << std::endl; + std::cout << " (No real-time output to avoid saturation)" << std::endl; + + auto start_time = std::chrono::high_resolution_clock::now(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + auto end_time = std::chrono::high_resolution_clock::now(); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Calculate actual monitoring duration + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + // Analyze collected data + std::cout << "\n=== Statistical Analysis ===" << std::endl; + + uint32_t total_samples = g_sample_count.load(); + double total_energy = g_total_energy.load(); + double min_power = g_min_power.load(); + double max_power = g_max_power.load(); + + std::cout << "Monitoring duration: " << actual_duration.count() << " ms" + << std::endl; + std::cout << "Total samples collected: " << total_samples << std::endl; + std::cout << "Expected samples (50 Hz): " << (actual_duration.count() / 20) + << std::endl; + std::cout << "Sampling efficiency: " << std::fixed << std::setprecision(1) + << (100.0 * total_samples / (actual_duration.count() / 20.0)) << "%" + << std::endl; + + if (total_samples > 0) { + double avg_power = total_energy / (total_samples * 0.02); + + std::cout << "\n=== Power Statistics ===" << std::endl; + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + std::cout << "Minimum power: " << std::fixed << std::setprecision(2) + << min_power << " W" << std::endl; + std::cout << "Maximum power: " << std::fixed << std::setprecision(2) + << max_power << " W" << std::endl; + std::cout << "Power range: " << std::fixed << std::setprecision(2) + << (max_power - min_power) << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(3) + << total_energy << " J" << std::endl; + + // Calculate additional statistics from stored samples + { + std::lock_guard lock(g_samples_mutex); + if (!g_power_samples.empty()) { + std::sort(g_power_samples.begin(), g_power_samples.end()); + + size_t n = g_power_samples.size(); + double median = + (n % 2 == 0) + ? (g_power_samples[n / 2 - 1] + g_power_samples[n / 2]) / 2.0 + : g_power_samples[n / 2]; + + double q1 = g_power_samples[n / 4]; + double q3 = g_power_samples[3 * n / 4]; + + double std_dev = + calculate_standard_deviation(g_power_samples, avg_power); + + std::cout << "\n=== Extended Statistics ===" << std::endl; + std::cout << "Median power: " << std::fixed << std::setprecision(2) + << median << " W" << std::endl; + std::cout << "Q1 (25th percentile): " << std::fixed + << std::setprecision(2) << q1 << " W" << std::endl; + std::cout << "Q3 (75th percentile): " << std::fixed + << std::setprecision(2) << q3 << " W" << std::endl; + std::cout << "Standard deviation: " << std::fixed + << std::setprecision(2) << std_dev << " W" << std::endl; + std::cout << "Coefficient of variation: " << std::fixed + << std::setprecision(1) << (100.0 * std_dev / avg_power) + << "%" << std::endl; + } + } + + // Show per-device breakdown if multiple devices + size_t device_count = nvml_provider.get_device_count(); + if (device_count > 1) { + std::cout << "\n=== Per-Device Final Readings ===" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + double device_power = nvml_provider.get_device_power_usage(i); + std::string device_name = nvml_provider.get_device_name(i); + std::cout << " " << device_name << ": " << std::fixed + << std::setprecision(2) << device_power << " W" << std::endl; + } + } + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_nvml_provider = nullptr; + nvml_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_nvml_fast_integration()) { + std::cout << "\nFast integration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nFast integration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp b/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp new file mode 100644 index 000000000..9f052c2b4 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_nvml_integration_test.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_nvml.hpp" + +// Global variables for the monitoring function +static NVMLProvider* g_nvml_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_last_power{0.0}; + +void power_monitoring_function() { + if (!g_nvml_provider || !g_nvml_provider->is_initialized()) { + std::cout << "ERROR: NVML provider not initialized" << std::endl; + return; + } + + double current_power = g_nvml_provider->get_total_power_usage(); + g_last_power.store(current_power); + + // Accumulate energy (Power * Time) + // Since we sample every 1000ms, energy increment = power * 1.0 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 1.0)) { + // Loop until successful update + } + + uint32_t sample_num = g_sample_count.fetch_add(1) + 1; + + std::cout << std::fixed << std::setprecision(2) << "Sample #" << sample_num + << " - Power: " << current_power << " W" + << " - Total Energy: " << g_total_energy.load() << " J" + << std::endl; + + // Display individual device power if multiple devices + size_t device_count = g_nvml_provider->get_device_count(); + if (device_count > 1) { + for (size_t i = 0; i < device_count; ++i) { + double device_power = g_nvml_provider->get_device_power_usage(i); + if (device_power >= 0.0) { + std::cout << " Device " << i << " (" + << g_nvml_provider->get_device_name(i) + << "): " << device_power << " W" << std::endl; + } + } + } +} + +bool test_daemon_nvml_integration() { + std::cout << "=== Daemon + NVML Integration Test ===" << std::endl; + + // Initialize NVML provider + std::cout << "\n1. Initializing NVML provider..." << std::endl; + NVMLProvider nvml_provider; + if (!nvml_provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return false; + } + + g_nvml_provider = &nvml_provider; + std::cout << "SUCCESS: NVML provider initialized with " + << nvml_provider.get_device_count() << " device(s)" << std::endl; + + // Create daemon with 1-second interval + std::cout << "\n2. Creating daemon with 1-second monitoring interval..." + << std::endl; + Daemon power_daemon(power_monitoring_function, 1000); + + // Start monitoring + std::cout << "\n3. Starting power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Display final statistics + std::cout << "\n=== Final Statistics ===" << std::endl; + std::cout << "Total samples: " << g_sample_count.load() << std::endl; + std::cout << "Last power reading: " << std::fixed << std::setprecision(2) + << g_last_power.load() << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(2) + << g_total_energy.load() << " J" << std::endl; + + if (g_sample_count.load() > 0) { + double avg_power = g_total_energy.load() / g_sample_count.load(); + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_nvml_provider = nullptr; + nvml_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_nvml_integration()) { + std::cout << "\nIntegration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nIntegration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_test.cpp b/profiling/energy-profiler/tests/daemon_test.cpp new file mode 100644 index 000000000..f33a6da01 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_test.cpp @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" + +// Test counters and flags +static std::atomic counter{0}; +static std::atomic fast_counter{0}; +static std::atomic slow_counter{0}; +static std::atomic exception_thrown{false}; + +// Test functions +void hello_world() { + std::cout << "Hello World (execution #" << counter.load() + 1 << ")" + << std::endl; + counter++; +} + +void fast_function() { + fast_counter++; + // Very fast function (< 1ms) +} + +void slow_function() { + slow_counter++; + // Simulate a function that takes longer than interval + std::this_thread::sleep_for(std::chrono::milliseconds(150)); +} + +void exception_function() { + exception_thrown = true; + throw std::runtime_error("Test exception in daemon function"); +} + +// Test utilities +bool test_basic_functionality() { + std::cout << "\n=== Test 1: Basic Functionality ===" << std::endl; + + counter = 0; + Daemon daemon(hello_world, 100); + + // Test initial state + assert(!daemon.is_running()); + + std::cout << "Starting daemon..." << std::endl; + daemon.start(); + + // Test running state + assert(daemon.is_running()); + + // Let it run for ~350ms (should execute ~3-4 times) + std::this_thread::sleep_for(std::chrono::milliseconds(350)); + + daemon.stop(); + + // Test stopped state + assert(!daemon.is_running()); + + uint32_t final_count = counter.load(); + std::cout << "Daemon finished. Counter: " << final_count << std::endl; + + // Should have executed 3-4 times (allowing some tolerance for timing) + bool success = (final_count >= 3 && final_count <= 4); + std::cout << "Test 1 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_timing_accuracy() { + std::cout << "\n=== Test 2: Timing Accuracy ===" << std::endl; + + fast_counter = 0; + Daemon daemon(fast_function, 50); // 50ms interval + + auto start_time = std::chrono::high_resolution_clock::now(); + daemon.start(); + + // Run for exactly 250ms + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + + daemon.stop(); + auto end_time = std::chrono::high_resolution_clock::now(); + + uint32_t executions = fast_counter.load(); + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + std::cout << "Executions: " << executions << std::endl; + std::cout << "Actual duration: " << actual_duration.count() << "ms" + << std::endl; + + // Should execute ~5 times (250ms / 50ms = 5) + bool success = (executions >= 4 && executions <= 6); + std::cout << "Test 2 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_slow_function_handling() { + std::cout << "\n=== Test 3: Slow Function Handling ===" << std::endl; + + slow_counter = 0; + Daemon daemon(slow_function, + 100); // 100ms interval, but function takes 150ms + + auto start_time = std::chrono::high_resolution_clock::now(); + daemon.start(); + + // Run for 400ms + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + + daemon.stop(); + auto end_time = std::chrono::high_resolution_clock::now(); + + uint32_t executions = slow_counter.load(); + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + std::cout << "Executions: " << executions << std::endl; + std::cout << "Actual duration: " << actual_duration.count() << "ms" + << std::endl; + + // Should execute 2-3 times (each execution takes ~150ms, total time ~400ms) + bool success = (executions >= 2 && executions <= 3); + std::cout << "Test 3 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_start_stop_edge_cases() { + std::cout << "\n=== Test 4: Start/Stop Edge Cases ===" << std::endl; + + Daemon daemon(hello_world, 1000); + bool success = true; + + // Test double start + try { + daemon.start(); + daemon.start(); // Should throw + success = false; + std::cout << "ERROR: Double start should have thrown exception" + << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Double start correctly threw: " << e.what() << std::endl; + } + + daemon.stop(); + + // Test double stop + try { + daemon.stop(); // Should throw + success = false; + std::cout << "ERROR: Double stop should have thrown exception" << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Double stop correctly threw: " << e.what() << std::endl; + } + + // Test stop without start + Daemon daemon2(hello_world, 1000); + try { + daemon2.stop(); // Should throw + success = false; + std::cout << "ERROR: Stop without start should have thrown exception" + << std::endl; + } catch (const std::runtime_error& e) { + std::cout << "Stop without start correctly threw: " << e.what() + << std::endl; + } + + std::cout << "Test 4 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +bool test_thread_safety() { + std::cout << "\n=== Test 5: Thread Safety ===" << std::endl; + + counter = 0; + Daemon daemon(hello_world, 200); // Fast interval + + daemon.start(); + + // Check is_running from main thread while daemon is running + bool running_check1 = daemon.is_running(); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + bool running_check2 = daemon.is_running(); + + daemon.stop(); + bool running_check3 = daemon.is_running(); + + bool success = running_check1 && running_check2 && !running_check3; + std::cout << "Running state checks: " << running_check1 << ", " + << running_check2 << ", " << running_check3 << std::endl; + std::cout << "Executions during test: " << counter.load() << std::endl; + std::cout << "Test 5 " << (success ? "PASSED" : "FAILED") << std::endl; + return success; +} + +int main() { + std::cout << "=== Daemon Comprehensive Test Suite ===" << std::endl; + + int passed = 0; + int total = 5; + + if (test_basic_functionality()) passed++; + if (test_timing_accuracy()) passed++; + if (test_slow_function_handling()) passed++; + if (test_start_stop_edge_cases()) passed++; + if (test_thread_safety()) passed++; + + std::cout << "\n=== Test Results ===" << std::endl; + std::cout << "Passed: " << passed << "/" << total << std::endl; + + if (passed == total) { + std::cout << "ALL TESTS PASSED! Daemon is working correctly." << std::endl; + return 0; + } else { + std::cout << "Some tests failed. Please check the daemon implementation." + << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp b/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp new file mode 100644 index 000000000..747fa3368 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_variorum_fast_test.cpp @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_variorum.hpp" + +// Global variables for the monitoring function +static VariorumProvider* g_variorum_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_min_power{std::numeric_limits::max()}; +static std::atomic g_max_power{0.0}; +static std::vector g_power_samples; +static std::mutex g_samples_mutex; + +void fast_power_monitoring_function() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + return; + } + + double current_power = g_variorum_provider->get_total_power_usage(); + + // Update statistics atomically + g_sample_count.fetch_add(1); + + // Accumulate energy (Power * Time) + // Since we sample every 20ms, energy increment = power * 0.02 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 0.02)) { + // Loop until successful update + } + + // Update min power + double current_min = g_min_power.load(); + while (current_power < current_min && + !g_min_power.compare_exchange_weak(current_min, current_power)) { + // Loop until successful update + } + + // Update max power + double current_max = g_max_power.load(); + while (current_power > current_max && + !g_max_power.compare_exchange_weak(current_max, current_power)) { + // Loop until successful update + } + + // Store sample for statistical analysis (thread-safe) + { + std::lock_guard lock(g_samples_mutex); + g_power_samples.push_back(current_power); + } +} + +double calculate_standard_deviation(const std::vector& samples, + double mean) { + if (samples.size() <= 1) return 0.0; + + double sum_squared_diff = 0.0; + for (double sample : samples) { + double diff = sample - mean; + sum_squared_diff += diff * diff; + } + + return std::sqrt(sum_squared_diff / (samples.size() - 1)); +} + +bool test_daemon_variorum_fast_integration() { + std::cout << "=== Fast Daemon + Variorum Integration Test (20ms sampling) ===" + << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_min_power = std::numeric_limits::max(); + g_max_power = 0.0; + g_power_samples.clear(); + + // Initialize Variorum provider + std::cout << "\n1. Initializing Variorum provider..." << std::endl; + VariorumProvider variorum_provider; + if (!variorum_provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return false; + } + + g_variorum_provider = &variorum_provider; + std::cout << "SUCCESS: Variorum provider initialized with " + << variorum_provider.get_device_count() << " device(s)" + << std::endl; + + // Create daemon with 20ms interval + std::cout << "\n2. Creating daemon with 20ms monitoring interval..." + << std::endl; + Daemon power_daemon(fast_power_monitoring_function, 20); + + // Start monitoring + std::cout << "\n3. Starting fast power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Fast power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds (high frequency sampling)..." + << std::endl; + std::cout << " (No real-time output to avoid saturation)" << std::endl; + + auto start_time = std::chrono::high_resolution_clock::now(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + auto end_time = std::chrono::high_resolution_clock::now(); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Calculate actual monitoring duration + auto actual_duration = std::chrono::duration_cast( + end_time - start_time); + + // Analyze collected data + std::cout << "\n=== Statistical Analysis ===" << std::endl; + + uint32_t total_samples = g_sample_count.load(); + double total_energy = g_total_energy.load(); + double min_power = g_min_power.load(); + double max_power = g_max_power.load(); + + std::cout << "Monitoring duration: " << actual_duration.count() << " ms" + << std::endl; + std::cout << "Total samples collected: " << total_samples << std::endl; + std::cout << "Expected samples (50 Hz): " << (actual_duration.count() / 20) + << std::endl; + std::cout << "Sampling efficiency: " << std::fixed << std::setprecision(1) + << (100.0 * total_samples / (actual_duration.count() / 20.0)) << "%" + << std::endl; + + if (total_samples > 0) { + double avg_power = total_energy / (total_samples * 0.02); + + std::cout << "\n=== Power Statistics ===" << std::endl; + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + std::cout << "Minimum power: " << std::fixed << std::setprecision(2) + << min_power << " W" << std::endl; + std::cout << "Maximum power: " << std::fixed << std::setprecision(2) + << max_power << " W" << std::endl; + std::cout << "Power range: " << std::fixed << std::setprecision(2) + << (max_power - min_power) << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(3) + << total_energy << " J" << std::endl; + + // Calculate additional statistics from stored samples + { + std::lock_guard lock(g_samples_mutex); + if (!g_power_samples.empty()) { + std::sort(g_power_samples.begin(), g_power_samples.end()); + + size_t n = g_power_samples.size(); + double median = + (n % 2 == 0) + ? (g_power_samples[n / 2 - 1] + g_power_samples[n / 2]) / 2.0 + : g_power_samples[n / 2]; + + double q1 = g_power_samples[n / 4]; + double q3 = g_power_samples[3 * n / 4]; + + double std_dev = + calculate_standard_deviation(g_power_samples, avg_power); + + std::cout << "\n=== Extended Statistics ===" << std::endl; + std::cout << "Median power: " << std::fixed << std::setprecision(2) + << median << " W" << std::endl; + std::cout << "Q1 (25th percentile): " << std::fixed + << std::setprecision(2) << q1 << " W" << std::endl; + std::cout << "Q3 (75th percentile): " << std::fixed + << std::setprecision(2) << q3 << " W" << std::endl; + std::cout << "Standard deviation: " << std::fixed + << std::setprecision(2) << std_dev << " W" << std::endl; + std::cout << "Coefficient of variation: " << std::fixed + << std::setprecision(1) << (100.0 * std_dev / avg_power) + << "%" << std::endl; + } + } + + // Show per-device breakdown if multiple devices + size_t device_count = variorum_provider.get_device_count(); + if (device_count > 1) { + std::cout << "\n=== Per-Device Final Readings ===" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + double device_power = variorum_provider.get_device_power_usage(i); + std::string device_name = variorum_provider.get_device_name(i); + std::cout << " " << device_name << ": " << std::fixed + << std::setprecision(2) << device_power << " W" << std::endl; + } + } + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_variorum_provider = nullptr; + variorum_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_variorum_fast_integration()) { + std::cout << "\nFast integration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nFast integration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp b/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp new file mode 100644 index 000000000..fbbb1dfb6 --- /dev/null +++ b/profiling/energy-profiler/tests/daemon_variorum_integration_test.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include "../common/daemon.hpp" +#include "../provider/provider_variorum.hpp" + +// Global variables for the monitoring function +static VariorumProvider* g_variorum_provider = nullptr; +static std::atomic g_sample_count{0}; +static std::atomic g_total_energy{0.0}; +static std::atomic g_last_power{0.0}; + +void power_monitoring_function() { + if (!g_variorum_provider || !g_variorum_provider->is_initialized()) { + std::cout << "ERROR: Variorum provider not initialized" << std::endl; + return; + } + + double current_power = g_variorum_provider->get_total_power_usage(); + g_last_power.store(current_power); + + // Accumulate energy (Power * Time) + // Since we sample every 1000ms, energy increment = power * 1.0 seconds + double expected = g_total_energy.load(); + while (!g_total_energy.compare_exchange_weak( + expected, expected + current_power * 1.0)) { + // Loop until successful update + } + + uint32_t sample_num = g_sample_count.fetch_add(1) + 1; + + std::cout << std::fixed << std::setprecision(2) << "Sample " << sample_num + << ": " << current_power + << " W (Total Energy: " << g_total_energy.load() << " J)" + << std::endl; + + // Show individual device power if multiple devices + size_t device_count = g_variorum_provider->get_device_count(); + if (device_count > 1) { + for (size_t i = 0; i < device_count; ++i) { + double device_power = g_variorum_provider->get_device_power_usage(i); + if (device_power >= 0.0) { + std::cout << " " << g_variorum_provider->get_device_name(i) << ": " + << device_power << " W" << std::endl; + } + } + } +} + +bool test_daemon_variorum_integration() { + std::cout << "=== Daemon + Variorum Integration Test ===" << std::endl; + + // Reset global counters + g_sample_count = 0; + g_total_energy = 0.0; + g_last_power = 0.0; + + // Initialize Variorum provider + std::cout << "\n1. Initializing Variorum provider..." << std::endl; + VariorumProvider variorum_provider; + if (!variorum_provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return false; + } + + g_variorum_provider = &variorum_provider; + std::cout << "SUCCESS: Variorum provider initialized with " + << variorum_provider.get_device_count() << " device(s)" + << std::endl; + + // Create daemon with 1-second interval + std::cout << "\n2. Creating daemon with 1-second monitoring interval..." + << std::endl; + Daemon power_daemon(power_monitoring_function, 1000); + + // Start monitoring + std::cout << "\n3. Starting power monitoring..." << std::endl; + power_daemon.start(); + std::cout << "SUCCESS: Power monitoring started" << std::endl; + + // Let it run for 2 seconds + std::cout << "\n4. Monitoring for 2 seconds..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(2)); + + // Stop monitoring + std::cout << "\n5. Stopping power monitoring..." << std::endl; + power_daemon.stop(); + std::cout << "SUCCESS: Power monitoring stopped" << std::endl; + + // Display final statistics + std::cout << "\n=== Final Statistics ===" << std::endl; + std::cout << "Total samples: " << g_sample_count.load() << std::endl; + std::cout << "Last power reading: " << std::fixed << std::setprecision(2) + << g_last_power.load() << " W" << std::endl; + std::cout << "Total energy consumed: " << std::fixed << std::setprecision(2) + << g_total_energy.load() << " J" << std::endl; + + if (g_sample_count.load() > 0) { + double avg_power = g_total_energy.load() / (g_sample_count.load() * 1.0); + std::cout << "Average power: " << std::fixed << std::setprecision(2) + << avg_power << " W" << std::endl; + } + + // Cleanup + std::cout << "\n6. Cleaning up..." << std::endl; + g_variorum_provider = nullptr; + variorum_provider.finalize(); + std::cout << "SUCCESS: Cleanup completed" << std::endl; + + return true; +} + +int main() { + try { + if (test_daemon_variorum_integration()) { + std::cout << "\nIntegration test PASSED!" << std::endl; + return 0; + } else { + std::cout << "\nIntegration test FAILED!" << std::endl; + return 1; + } + } catch (const std::exception& e) { + std::cerr << "\nTest failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/nvml_provider_test.cpp b/profiling/energy-profiler/tests/nvml_provider_test.cpp new file mode 100644 index 000000000..8f5d015ad --- /dev/null +++ b/profiling/energy-profiler/tests/nvml_provider_test.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include "../provider/provider_nvml.hpp" + +void test_nvml_provider() { + std::cout << "=== NVML Provider Test ===" << std::endl; + + NVMLProvider provider; + + // Test initialization + std::cout << "\n1. Testing initialization..." << std::endl; + if (!provider.initialize()) { + std::cout << "ERROR: Failed to initialize NVML provider" << std::endl; + return; + } + std::cout << "SUCCESS: NVML provider initialized successfully" << std::endl; + + // Test device discovery + std::cout << "\n2. Testing device discovery..." << std::endl; + size_t device_count = provider.get_device_count(); + std::cout << "Found " << device_count << " device(s)" << std::endl; + + if (device_count == 0) { + std::cout << "ERROR: No devices found" << std::endl; + return; + } + + // Display device information + std::cout << "\n3. Device information:" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + std::string name = provider.get_device_name(i); + std::cout << " Device " << i << ": " << name << std::endl; + } + + // Test power readings + std::cout << "\n4. Testing power readings..." << std::endl; + for (int sample = 0; sample < 5; ++sample) { + std::cout << "Sample " << (sample + 1) << ":" << std::endl; + + // Individual device power + for (size_t i = 0; i < device_count; ++i) { + double power = provider.get_device_power_usage(i); + if (power >= 0.0) { + std::cout << " Device " << i << ": " << power << " W" << std::endl; + } else { + std::cout << " Device " << i << ": Failed to read power" << std::endl; + } + } + + // Individual device direct power + for (size_t i = 0; i < device_count; ++i) { + double direct_power = provider.get_device_power_usage_direct(i); + if (direct_power >= 0.0) { + std::cout << " Device " << i << " (Direct): " << direct_power << " W" + << std::endl; + } else { + std::cout << " Device " << i + << " (Direct): Failed to read direct power" << std::endl; + } + } + + // Current energy consumption + for (size_t i = 0; i < device_count; ++i) { + double energy = provider.get_current_energy_consumption(i); + if (energy >= 0.0) { + std::cout << " Device " << i << " Energy: " << energy << " J" + << std::endl; + } else { + std::cout << " Device " << i << " Energy: Failed to read energy" + << std::endl; + } + } + + // Total power + double total_power = provider.get_total_power_usage(); + std::cout << " Total Power: " << total_power << " W" << std::endl; + + if (sample < 4) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + // Test finalization + std::cout << "\n5. Testing finalization..." << std::endl; + provider.finalize(); + std::cout << "SUCCESS: NVML provider finalized successfully" << std::endl; + + std::cout << "\n=== Test Completed ===" << std::endl; +} + +int main() { + try { + test_nvml_provider(); + return 0; + } catch (const std::exception& e) { + std::cerr << "ERROR: Test failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/timer_test.cpp b/profiling/energy-profiler/tests/timer_test.cpp new file mode 100644 index 000000000..61a9a3cc2 --- /dev/null +++ b/profiling/energy-profiler/tests/timer_test.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include "../common/timer.hpp" + +// Test helper function to check if a value is within expected range +bool is_within_range(uint64_t actual, uint64_t expected, uint64_t tolerance) { + return (actual >= expected - tolerance) && (actual <= expected + tolerance); +} + +bool test_basic_timing() { + std::cout << "=== Test Basic Timing ===" << std::endl; + + EnergyTimer timer; + + // Test single timing + timer.start_timing(1, RegionType::ParallelFor, "test_kernel"); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(1); + + auto& timings = timer.get_timings(); + if (timings.size() != 1) { + std::cout << "ERROR: Expected 1 timing, got " << timings.size() + << std::endl; + return false; + } + + auto& timing = timings[1]; + if (!timing.is_ended()) { + std::cout << "ERROR: Timing should be ended" << std::endl; + return false; + } + + uint64_t duration = timing.get_duration_ms(); + if (!is_within_range(duration, 2, 2)) { // 2ms ± 2ms tolerance + std::cout << "ERROR: Duration should be ~2ms, got " << duration << "ms" + << std::endl; + return false; + } + + if (timing.name_ != "test_kernel") { + std::cout << "ERROR: Wrong name, expected 'test_kernel', got '" + << timing.name_ << "'" << std::endl; + return false; + } + + if (timing.region_type_ != RegionType::ParallelFor) { + std::cout << "ERROR: Wrong region type" << std::endl; + return false; + } + + std::cout << "SUCCESS: Basic timing works correctly (duration: " << duration + << "ms)" << std::endl; + return true; +} + +bool test_multiple_timings() { + std::cout << "\n=== Test Multiple Timings ===" << std::endl; + + EnergyTimer timer; + + // Start multiple timings + timer.start_timing(1, RegionType::ParallelFor, "kernel_1"); + timer.start_timing(2, RegionType::ParallelReduce, "kernel_2"); + timer.start_timing(3, RegionType::UserRegion, "region_1"); + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + timer.end_timing(1); + + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(2); + + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(3); + + auto& timings = timer.get_timings(); + if (timings.size() != 3) { + std::cout << "ERROR: Expected 3 timings, got " << timings.size() + << std::endl; + return false; + } + + // Check individual durations + uint64_t duration1 = timings[1].get_duration_ms(); + uint64_t duration2 = timings[2].get_duration_ms(); + uint64_t duration3 = timings[3].get_duration_ms(); + + if (!is_within_range(duration1, 1, 1)) { + std::cout << "ERROR: Duration1 should be ~1ms, got " << duration1 << "ms" + << std::endl; + return false; + } + + if (!is_within_range(duration2, 3, 2)) { // 1 + 2 = 3ms + std::cout << "ERROR: Duration2 should be ~3ms, got " << duration2 << "ms" + << std::endl; + return false; + } + + if (!is_within_range(duration3, 5, 2)) { // 1 + 2 + 2 = 5ms + std::cout << "ERROR: Duration3 should be ~5ms, got " << duration3 << "ms" + << std::endl; + return false; + } + + // Check that duration2 > duration1 and duration3 > duration2 + if (duration2 <= duration1) { + std::cout << "ERROR: Duration2 should be greater than duration1" + << std::endl; + return false; + } + + if (duration3 <= duration2) { + std::cout << "ERROR: Duration3 should be greater than duration2" + << std::endl; + return false; + } + + std::cout << "SUCCESS: Multiple timings work correctly" << std::endl; + std::cout << " Duration1: " << duration1 << "ms" << std::endl; + std::cout << " Duration2: " << duration2 << "ms" << std::endl; + std::cout << " Duration3: " << duration3 << "ms" << std::endl; + return true; +} + +bool test_region_types() { + std::cout << "\n=== Test Region Types ===" << std::endl; + + EnergyTimer timer; + + // Test all region types + timer.start_timing(1, RegionType::ParallelFor, "parallel_for"); + timer.start_timing(2, RegionType::ParallelScan, "parallel_scan"); + timer.start_timing(3, RegionType::ParallelReduce, "parallel_reduce"); + timer.start_timing(4, RegionType::DeepCopy, "deep_copy"); + timer.start_timing(5, RegionType::UserRegion, "user_region"); + timer.start_timing(6, RegionType::Unknown, "unknown_op"); + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + timer.end_timing(1); + timer.end_timing(2); + timer.end_timing(3); + timer.end_timing(4); + timer.end_timing(5); + timer.end_timing(6); + + auto& timings = timer.get_timings(); + if (timings.size() != 6) { + std::cout << "ERROR: Expected 6 timings, got " << timings.size() + << std::endl; + return false; + } + + // Verify region types + if (timings[1].region_type_ != RegionType::ParallelFor || + timings[2].region_type_ != RegionType::ParallelScan || + timings[3].region_type_ != RegionType::ParallelReduce || + timings[4].region_type_ != RegionType::DeepCopy || + timings[5].region_type_ != RegionType::UserRegion || + timings[6].region_type_ != RegionType::Unknown) { + std::cout << "ERROR: Region types not correctly set" << std::endl; + return false; + } + + // Verify names + if (timings[1].name_ != "parallel_for" || + timings[2].name_ != "parallel_scan" || + timings[3].name_ != "parallel_reduce" || + timings[4].name_ != "deep_copy" || timings[5].name_ != "user_region" || + timings[6].name_ != "unknown_op") { + std::cout << "ERROR: Names not correctly set" << std::endl; + return false; + } + + std::cout << "SUCCESS: All region types work correctly" << std::endl; + return true; +} + +bool test_error_handling() { + std::cout << "\n=== Test Error Handling ===" << std::endl; + + EnergyTimer timer; + + // Test ending non-existent timing (should not crash) + timer.end_timing(999); // This should not crash + + // Test getting duration before ending + timer.start_timing(1, RegionType::ParallelFor, "test"); + auto& timings = timer.get_timings(); + + if (timings[1].is_ended()) { + std::cout << "ERROR: Timing should not be ended yet" << std::endl; + return false; + } + + // End the timing + timer.end_timing(1); + + if (!timings[1].is_ended()) { + std::cout << "ERROR: Timing should be ended now" << std::endl; + return false; + } + + // Test ending the same timing twice (should not crash) + timer.end_timing(1); + + std::cout << "SUCCESS: Error handling works correctly" << std::endl; + return true; +} + +bool test_precision() { + std::cout << "\n=== Test Precision ===" << std::endl; + + EnergyTimer timer; + + // Test very short timing (should be 0 or 1 ms) + timer.start_timing(1, RegionType::ParallelFor, "short_op"); + // No sleep - immediate end + timer.end_timing(1); + + auto& timings = timer.get_timings(); + uint64_t short_duration = timings[1].get_duration_ms(); + + if (short_duration > 2) { // Should be very small + std::cout << "WARNING: Short duration is " << short_duration + << "ms (expected ≤2ms)" << std::endl; + } + + // Test longer timing for better precision + timer.start_timing(2, RegionType::ParallelFor, "long_op"); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.end_timing(2); + + uint64_t long_duration = timings[2].get_duration_ms(); + + if (!is_within_range(long_duration, 10, 5)) { + std::cout << "ERROR: Long duration should be ~10ms, got " << long_duration + << "ms" << std::endl; + return false; + } + + std::cout << "SUCCESS: Precision test passed" << std::endl; + std::cout << " Short duration: " << short_duration << "ms" << std::endl; + std::cout << " Long duration: " << long_duration << "ms" << std::endl; + return true; +} + +bool test_concurrent_timings() { + std::cout << "\n=== Test Concurrent Timings ===" << std::endl; + + EnergyTimer timer; + + // Start overlapping timings + timer.start_timing(1, RegionType::ParallelFor, "outer"); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + timer.start_timing(2, RegionType::ParallelReduce, "inner"); + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + timer.end_timing(2); // End inner first + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + timer.end_timing(1); // End outer last + + auto& timings = timer.get_timings(); + uint64_t outer_duration = timings[1].get_duration_ms(); + uint64_t inner_duration = timings[2].get_duration_ms(); + + // Outer should be longer than inner + if (outer_duration <= inner_duration) { + std::cout << "ERROR: Outer duration (" << outer_duration + << "ms) should be greater than inner duration (" << inner_duration + << "ms)" << std::endl; + return false; + } + + // Check approximate durations + if (!is_within_range(inner_duration, 2, 2)) { + std::cout << "ERROR: Inner duration should be ~2ms, got " << inner_duration + << "ms" << std::endl; + return false; + } + + if (!is_within_range(outer_duration, 4, 2)) { // 1 + 2 + 1 = 4ms + std::cout << "ERROR: Outer duration should be ~4ms, got " << outer_duration + << "ms" << std::endl; + return false; + } + + std::cout << "SUCCESS: Concurrent timings work correctly" << std::endl; + std::cout << " Outer duration: " << outer_duration << "ms" << std::endl; + std::cout << " Inner duration: " << inner_duration << "ms" << std::endl; + return true; +} + +bool very_long_timing() { + std::cout << "\n=== Test Very Long Timing ===" << std::endl; + + EnergyTimer timer; + + timer.start_timing(1, RegionType::ParallelFor, "very_long_op"); + std::this_thread::sleep_for( + std::chrono::milliseconds(50)); // Sleep for 50ms instead of 1 second + timer.end_timing(1); + + auto& timings = timer.get_timings(); + uint64_t duration = timings[1].get_duration_ms(); + + if (!is_within_range(duration, 50, 10)) { // Allow some margin of error + std::cout << "ERROR: Duration should be ~50ms, got " << duration << "ms" + << std::endl; + return false; + } + + std::cout << "SUCCESS: Very long timing works correctly (duration: " + << duration << "ms)" << std::endl; + return true; +} + +int main() { + std::cout << "Running EnergyTimer Tests..." << std::endl; + std::cout << "=============================" << std::endl; + + bool all_passed = true; + + all_passed &= test_basic_timing(); + all_passed &= test_multiple_timings(); + all_passed &= test_region_types(); + all_passed &= test_error_handling(); + all_passed &= test_precision(); + all_passed &= test_concurrent_timings(); + all_passed &= very_long_timing(); + + std::cout << "\n=============================" << std::endl; + if (all_passed) { + std::cout << "ALL TESTS PASSED!" << std::endl; + return 0; + } else { + std::cout << "SOME TESTS FAILED!" << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tests/variorum_provider_test.cpp b/profiling/energy-profiler/tests/variorum_provider_test.cpp new file mode 100644 index 000000000..8dae4a4a5 --- /dev/null +++ b/profiling/energy-profiler/tests/variorum_provider_test.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include "../provider/provider_variorum.hpp" + +void test_variorum_provider() { + std::cout << "=== Variorum Provider Test ===" << std::endl; + + VariorumProvider provider; + + // Test initialization + std::cout << "\n1. Testing initialization..." << std::endl; + if (!provider.initialize()) { + std::cout << "ERROR: Failed to initialize Variorum provider" << std::endl; + return; + } + std::cout << "SUCCESS: Variorum provider initialized successfully" + << std::endl; + + // Test device discovery + std::cout << "\n2. Testing device discovery..." << std::endl; + size_t device_count = provider.get_device_count(); + std::cout << "Found " << device_count << " device(s)" << std::endl; + + if (device_count == 0) { + std::cout << "ERROR: No devices found" << std::endl; + return; + } + + // Display device information + std::cout << "\n3. Device information:" << std::endl; + for (size_t i = 0; i < device_count; ++i) { + std::string name = provider.get_device_name(i); + std::cout << " Device " << i << ": " << name << std::endl; + } + + // Test power readings + std::cout << "\n4. Testing power readings..." << std::endl; + for (int sample = 0; sample < 5; ++sample) { + std::cout << "Sample " << (sample + 1) << ":" << std::endl; + + // Individual device power + for (size_t i = 0; i < device_count; ++i) { + double power = provider.get_device_power_usage(i); + if (power >= 0.0) { + std::cout << " Device " << i << ": " << power << " W" << std::endl; + } else { + std::cout << " Device " << i << ": Failed to read power" << std::endl; + } + } + + // Total power + double total_power = provider.get_total_power_usage(); + std::cout << " Total Power: " << total_power << " W" << std::endl; + + if (sample < 4) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + } + + // Test finalization + std::cout << "\n5. Testing finalization..." << std::endl; + provider.finalize(); + std::cout << "SUCCESS: Variorum provider finalized successfully" << std::endl; + + std::cout << "\n=== Test Completed ===" << std::endl; +} + +int main() { + try { + test_variorum_provider(); + return 0; + } catch (const std::exception& e) { + std::cerr << "ERROR: Test failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file diff --git a/profiling/energy-profiler/tools/kernel_timer_tool.cpp b/profiling/energy-profiler/tools/kernel_timer_tool.cpp new file mode 100644 index 000000000..ea0895fbc --- /dev/null +++ b/profiling/energy-profiler/tools/kernel_timer_tool.cpp @@ -0,0 +1,112 @@ +#include "kernel_timer_tool.hpp" +#include +#include + +void KernelTimerTool::init_library( + const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + (void)devInfoCount; + (void)deviceInfo; + std::cout << "Kokkos Power Profiler: Initializing with load sequence " + << loadSeq << " and interface version " << interfaceVer + << std::endl; + std::cout << "Kokkos Power Profiler: Library initialized" << std::endl; +} + +void KernelTimerTool::finalize_library() { + // Implementation is empty +} + +void KernelTimerTool::start_region(const std::string& name, RegionType type, + uint64_t id) { + TimingInfo region; + region.name = name; + region.type = type; + region.start_time = std::chrono::high_resolution_clock::now(); + region.id = id; + active_regions_.push_back(region); +} + +void KernelTimerTool::end_region() { + if (!active_regions_.empty()) { + auto region = active_regions_.back(); + active_regions_.pop_back(); + region.end_time = std::chrono::high_resolution_clock::now(); + region.duration = std::chrono::duration_cast( + region.end_time - region.start_time); + if (region.type == RegionType::UserRegion) + completed_regions_.push_back(region); + else if (region.type == RegionType::DeepCopy) + completed_deepcopies_.push_back(region); + else + completed_kernels_.push_back(region); + } +} + +void KernelTimerTool::begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) { + (void)devID; + start_region(name, RegionType::ParallelFor, kID); +} + +void KernelTimerTool::end_parallel_for(uint64_t kID) { + (void)kID; + end_region(); +} + +void KernelTimerTool::begin_parallel_scan(const char* name, + const uint32_t devID, uint64_t* kID) { + (void)devID; + start_region(name, RegionType::ParallelScan, *kID); +} + +void KernelTimerTool::end_parallel_scan(uint64_t kID) { + (void)kID; + end_region(); +} + +void KernelTimerTool::begin_parallel_reduce(const char* name, + const uint32_t devID, + uint64_t* kID) { + (void)devID; + start_region(name, RegionType::ParallelReduce, *kID); +} + +void KernelTimerTool::end_parallel_reduce(uint64_t kID) { + (void)kID; + end_region(); +} + +void KernelTimerTool::begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { + (void)dst_handle; + (void)src_handle; + (void)src_name; + (void)src_ptr; + (void)size; + start_region(dst_name, RegionType::DeepCopy, + reinterpret_cast(dst_ptr)); +} + +void KernelTimerTool::end_deep_copy() { end_region(); } + +void KernelTimerTool::push_profile_region(const char* region_name) { + start_region(region_name, RegionType::UserRegion, next_region_id_++); +} + +void KernelTimerTool::pop_profile_region() { end_region(); } + +const std::deque& KernelTimerTool::get_kernel_timings() const { + return completed_kernels_; +} + +const std::deque& KernelTimerTool::get_region_timings() const { + return completed_regions_; +} + +const std::deque& KernelTimerTool::get_deep_copy_timings() const { + return completed_deepcopies_; +} diff --git a/profiling/energy-profiler/tools/kernel_timer_tool.hpp b/profiling/energy-profiler/tools/kernel_timer_tool.hpp new file mode 100644 index 000000000..1dd1f0963 --- /dev/null +++ b/profiling/energy-profiler/tools/kernel_timer_tool.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include "../common/tool_interface.hpp" +#include "../common/timer.hpp" + +class KernelTimerTool : public ToolInterface { + public: + KernelTimerTool() = default; + ~KernelTimerTool() override = default; + + void init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) override; + void finalize_library() override; + + // Stack-based timing for robust region/kernel tracking + void start_region(const std::string& name, RegionType type, uint64_t id = 0); + void end_region(); + + // Kokkos interface + void begin_parallel_for(const char* name, const uint32_t devID, + uint64_t kID) override; + void end_parallel_for(uint64_t kID) override; + + void begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) override; + void end_parallel_scan(uint64_t kID) override; + + void begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) override; + void end_parallel_reduce(uint64_t kID) override; + + void begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, + const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) override; + void end_deep_copy() override; + + void push_profile_region(const char* region_name) override; + void pop_profile_region() override; + + // Getters for summary + const std::deque& get_kernel_timings() const; + const std::deque& get_region_timings() const; + const std::deque& get_deep_copy_timings() const; + + private: + std::deque active_regions_; + std::deque completed_kernels_; + std::deque completed_regions_; + std::deque completed_deepcopies_; + uint64_t next_region_id_ = 1; +}; \ No newline at end of file