diff --git a/CMakeLists.txt b/CMakeLists.txt index bc3b1e63b..a3b033562 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,6 +150,7 @@ if(NOT WIN32) add_subdirectory(profiling/chrome-tracing) add_subdirectory(profiling/space-time-stack) add_subdirectory(profiling/perfetto-connector) + add_subdirectory(profiling/energy-profiler) endif() # External lib connectors diff --git a/profiling/energy-profiler/CMakeLists.txt b/profiling/energy-profiler/CMakeLists.txt new file mode 100644 index 000000000..9c2c52bce --- /dev/null +++ b/profiling/energy-profiler/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_energy_profiler + kp_energy_profiler.cpp + timing_utils.cpp + timing_export.cpp +) diff --git a/profiling/energy-profiler/kp_energy_profiler.cpp b/profiling/energy-profiler/kp_energy_profiler.cpp new file mode 100644 index 000000000..f1ef6f667 --- /dev/null +++ b/profiling/energy-profiler/kp_energy_profiler.cpp @@ -0,0 +1,292 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include +#include +#include + +#include "kp_core.hpp" +#include "timing_utils.hpp" +#include "timing_export.hpp" + +namespace KokkosTools { +namespace EnergyProfiler { + +// Helper function to generate new region ID +uint64_t generate_new_region_id() { + auto& state = EnergyProfilerState::get_instance(); + std::lock_guard lock(state.get_mutex()); + uint64_t current_id = state.get_next_region_id(); + state.increment_next_region_id(); + return current_id; +} + +// Helper function for verbose logging +void log_verbose(const std::string& message) { + if (EnergyProfilerState::get_instance().get_verbose_enabled()) { + std::cout << message << std::endl; + } +} + +// Start a region +void start_region(const std::string& name, RegionType type, uint64_t id) { + TimingInfo region; + region.name = name; + region.type = type; + region.id = id; + auto& state = EnergyProfilerState::get_instance(); + std::lock_guard lock(state.get_mutex()); + state.get_active_regions().push_back(region); + state.get_active_regions().back().start_time = + std::chrono::high_resolution_clock::now(); +} + +// End last region of given type +void end_region_by_type(RegionType type_to_end) { + auto end_time = std::chrono::high_resolution_clock::now(); + auto& state = EnergyProfilerState::get_instance(); + std::lock_guard lock(state.get_mutex()); + auto& active_regions = state.get_active_regions(); + if (active_regions.empty()) return; + auto it = std::find_if(active_regions.rbegin(), active_regions.rend(), + [type_to_end](const TimingInfo& region) { + return region.type == type_to_end; + }); + if (it != active_regions.rend()) { + auto region = *it; + active_regions.erase(std::next(it).base()); + region.end_time = end_time; + state.get_completed_timings().push_back(region); + } +} + +// End region by id +void end_region_with_id(uint64_t expected_id) { + auto end_time = std::chrono::high_resolution_clock::now(); + auto& state = EnergyProfilerState::get_instance(); + std::lock_guard lock(state.get_mutex()); + auto& active_regions = state.get_active_regions(); + auto it = std::find_if(active_regions.begin(), active_regions.end(), + [expected_id](const TimingInfo& region) { + return region.id == expected_id; + }); + if (it != active_regions.end()) { + auto region = *it; + region.end_time = end_time; + active_regions.erase(it); + state.get_completed_timings().push_back(region); + } else { + std::cerr << "Warning: No active region found with ID " << expected_id + << "\n"; + } +} + +// Get all completed timings +std::vector get_all_timings() { + auto& state = EnergyProfilerState::get_instance(); + std::lock_guard lock(state.get_mutex()); + std::vector all_timings = state.get_completed_timings(); + std::sort(all_timings.begin(), all_timings.end(), + [](const TimingInfo& a, const TimingInfo& b) { + return a.start_time < b.start_time; + }); + return all_timings; +} + +} // namespace EnergyProfiler +} // namespace KokkosTools + +extern "C" { + +// Tool settings +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + settings->requires_global_fencing = false; + settings->padding[0] = 0; +} + +// Library init +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + (void)devInfoCount; + (void)deviceInfo; + const char* verbose_env = std::getenv("KOKKOS_TOOLS_ENERGY_VERBOSE"); + if (verbose_env && + (std::string(verbose_env) == "1" || std::string(verbose_env) == "ON")) { + KokkosTools::EnergyProfiler::EnergyProfilerState::get_instance() + .set_verbose_enabled(true); + } + std::cout << "Kokkos Energy Profiler: Initializing with load sequence " + << loadSeq << " and interface version " << interfaceVer + << std::endl; + std::cout << "Kokkos Energy Profiler: Library initialized" << std::endl; +} + +// Library finalize +void kokkosp_finalize_library() { + std::cout << "Kokkos Energy Profiler: Finalizing library" << std::endl; + std::string prefix = KokkosTools::EnergyProfiler::generate_prefix(); + auto all_timings = KokkosTools::EnergyProfiler::get_all_timings(); + KokkosTools::EnergyProfiler::print_all_timings_summary( + std::cout, all_timings.begin(), all_timings.end()); + KokkosTools::EnergyProfiler::export_all_timings_csv( + all_timings, prefix + "_timing_data.csv"); + std::cout << "Kokkos Energy Profiler: Library finalized" << std::endl; +} + +// Begin parallel_for +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { + if (!name || !kID) { + std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_for\n"; + return; + } + (void)devID; + uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id(); + *kID = new_id; + KokkosTools::EnergyProfiler::start_region( + name, KokkosTools::EnergyProfiler::RegionType::ParallelFor, *kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Started parallel_for '") + name + + "' on device " + std::to_string(devID) + " with ID " + + std::to_string(*kID)); +} + +// End parallel_for +void kokkosp_end_parallel_for(const uint64_t kID) { + if (kID == 0) { + std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_for\n"; + return; + } + KokkosTools::EnergyProfiler::end_region_with_id(kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Ended parallel_for with ID ") + + std::to_string(kID)); +} + +// Begin parallel_scan +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { + if (!name || !kID) { + std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_scan\n"; + return; + } + (void)devID; + uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id(); + *kID = new_id; + KokkosTools::EnergyProfiler::start_region( + name, KokkosTools::EnergyProfiler::RegionType::ParallelScan, *kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Started parallel_scan '") + name + + "' on device " + std::to_string(devID) + " with ID " + + std::to_string(*kID)); +} + +// End parallel_scan +void kokkosp_end_parallel_scan(const uint64_t kID) { + if (kID == 0) { + std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_scan\n"; + return; + } + KokkosTools::EnergyProfiler::end_region_with_id(kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Ended parallel_scan with ID ") + + std::to_string(kID)); +} + +// Begin parallel_reduce +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { + if (!name || !kID) { + std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_reduce\n"; + return; + } + (void)devID; + uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id(); + *kID = new_id; + KokkosTools::EnergyProfiler::start_region( + name, KokkosTools::EnergyProfiler::RegionType::ParallelReduce, *kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Started parallel_reduce '") + name + + "' on device " + std::to_string(devID) + " with ID " + + std::to_string(*kID)); +} + +// End parallel_reduce +void kokkosp_end_parallel_reduce(const uint64_t kID) { + if (kID == 0) { + std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_reduce\n"; + return; + } + KokkosTools::EnergyProfiler::end_region_with_id(kID); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Ended parallel_reduce with ID ") + + std::to_string(kID)); +} + +// Push user region +void kokkosp_push_profile_region(char const* regionName) { + if (!regionName) { + std::cerr << "Error: Invalid region name in kokkosp_push_profile_region\n"; + return; + } + uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id(); + KokkosTools::EnergyProfiler::start_region( + regionName, KokkosTools::EnergyProfiler::RegionType::UserRegion, new_id); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Pushed profile region '") + + regionName + "'"); +} + +// Pop user region +void kokkosp_pop_profile_region() { + KokkosTools::EnergyProfiler::end_region_by_type( + KokkosTools::EnergyProfiler::RegionType::UserRegion); + KokkosTools::EnergyProfiler::log_verbose( + "Kokkos Energy Profiler: Popped profile region"); +} + +// Begin deep copy +void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle, const char* dst_name, + const void*, Kokkos::Tools::SpaceHandle, + const char* src_name, const void*, uint64_t size) { + if (!dst_name || !src_name) { + std::cerr << "Error: Invalid names in kokkosp_begin_deep_copy\n"; + return; + } + uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id(); + std::string name = std::string(src_name) + " -> " + std::string(dst_name); + KokkosTools::EnergyProfiler::start_region( + name, KokkosTools::EnergyProfiler::RegionType::DeepCopy, new_id); + KokkosTools::EnergyProfiler::log_verbose( + std::string("Kokkos Energy Profiler: Started deep copy from '") + + src_name + "' to '" + dst_name + "' (size: " + std::to_string(size) + + " bytes)"); +} + +// End deep copy +void kokkosp_end_deep_copy() { + KokkosTools::EnergyProfiler::end_region_by_type( + KokkosTools::EnergyProfiler::RegionType::DeepCopy); + KokkosTools::EnergyProfiler::log_verbose( + "Kokkos Energy Profiler: Ended deep copy"); +} + +} // extern "C" diff --git a/profiling/energy-profiler/timing_export.cpp b/profiling/energy-profiler/timing_export.cpp new file mode 100644 index 000000000..9e66a3da3 --- /dev/null +++ b/profiling/energy-profiler/timing_export.cpp @@ -0,0 +1,89 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "timing_export.hpp" +#include +#include +#include + +namespace KokkosTools { +namespace EnergyProfiler { + +// Constants for table formatting +const int COLUMN_WIDTH_CATEGORY = 10; +const int COLUMN_WIDTH_NAME = 32; +const int COLUMN_WIDTH_TYPE = 14; +const int COLUMN_WIDTH_TIME = 17; +const int COLUMN_WIDTH_DURATION = 13; + +void export_all_timings_csv(const std::vector& all_timings, + const std::string& filename) { + std::ofstream file(filename); + if (!file.is_open()) { + std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n"; + return; + } + file << "name,type,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n"; + for (const auto& timing : all_timings) { + auto start_ms = get_epoch_ms(timing.start_time); + auto end_ms = get_epoch_ms(timing.end_time); + auto duration_ms = get_duration_ms(timing.start_time, timing.end_time); + std::string type_str = region_type_to_string(timing.type); + file << timing.name << "," << type_str << "," << start_ms << "," << end_ms + << "," << duration_ms << "\n"; + } + std::cout << "All timing data exported to " << filename << '\n'; +} + +std::string get_category_from_type(RegionType type) { + switch (type) { + case RegionType::UserRegion: return "REGION"; + case RegionType::DeepCopy: return "DEEPCOPY"; + case RegionType::ParallelFor: + case RegionType::ParallelScan: + case RegionType::ParallelReduce: return "KERNEL"; + default: return "OTHER"; + } +} + +void print_all_timings_summary(std::ostream& os, + std::vector::const_iterator begin, + std::vector::const_iterator end) { + os << "\n==== TIMING SUMMARY ====\n"; + os << "| Category | Name | Type " + " | Start (ms) | End (ms) | Duration (ms) |\n"; + os << "|------------|----------------------------------|--------------" + "--|-------------------|-------------------|---------------|\n"; + for (auto it = begin; it != end; ++it) { + const auto& timing_info = *it; + auto start_ms = get_epoch_ms(timing_info.start_time); + auto end_ms = get_epoch_ms(timing_info.end_time); + auto duration_ms = + get_duration_ms(timing_info.start_time, timing_info.end_time); + std::string type_str = region_type_to_string(timing_info.type); + std::string category = get_category_from_type(timing_info.type); + os << "| " << std::setw(COLUMN_WIDTH_CATEGORY) << std::left << category + << " | " << std::setw(COLUMN_WIDTH_NAME) << std::left << timing_info.name + << " | " << std::setw(COLUMN_WIDTH_TYPE) << std::left << type_str + << " | " << std::setw(COLUMN_WIDTH_TIME) << std::right << start_ms + << " | " << std::setw(COLUMN_WIDTH_TIME) << std::right << end_ms << " | " + << std::setw(COLUMN_WIDTH_DURATION) << std::right << duration_ms + << " |\n"; + } +} + +} // namespace EnergyProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/timing_export.hpp b/profiling/energy-profiler/timing_export.hpp new file mode 100644 index 000000000..632d3114b --- /dev/null +++ b/profiling/energy-profiler/timing_export.hpp @@ -0,0 +1,33 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include +#include +#include "timing_utils.hpp" + +namespace KokkosTools { +namespace EnergyProfiler { + +void export_all_timings_csv(const std::vector& all_timings, + const std::string& filename); +void print_all_timings_summary(std::ostream& os, + std::vector::const_iterator begin, + std::vector::const_iterator end); + +} // namespace EnergyProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/timing_utils.cpp b/profiling/energy-profiler/timing_utils.cpp new file mode 100644 index 000000000..9116d0eda --- /dev/null +++ b/profiling/energy-profiler/timing_utils.cpp @@ -0,0 +1,46 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "timing_utils.hpp" +#include +#include + +namespace KokkosTools { +namespace EnergyProfiler { + +std::string generate_prefix() { + char hostname[HOSTNAME_BUFFER_SIZE]; + if (gethostname(hostname, sizeof(hostname)) != 0) { + // Fallback to "unknown" if hostname fails + std::strncpy(hostname, "unknown", sizeof(hostname)); + } + int pid = (int)getpid(); + return std::string(hostname) + "-" + std::to_string(pid); +} + +std::string region_type_to_string(RegionType type) { + switch (type) { + case RegionType::ParallelFor: return "parallel_for"; + case RegionType::ParallelScan: return "parallel_scan"; + case RegionType::ParallelReduce: return "parallel_reduce"; + case RegionType::DeepCopy: return "deep_copy"; + case RegionType::UserRegion: return "user_region"; + default: return "unknown"; + } +} + +} // namespace EnergyProfiler +} // namespace KokkosTools diff --git a/profiling/energy-profiler/timing_utils.hpp b/profiling/energy-profiler/timing_utils.hpp new file mode 100644 index 000000000..0d51d29f4 --- /dev/null +++ b/profiling/energy-profiler/timing_utils.hpp @@ -0,0 +1,124 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include +#include +#include +#include + +namespace KokkosTools { +namespace EnergyProfiler { + +// Constants +/// @brief Buffer size for hostname +const size_t HOSTNAME_BUFFER_SIZE = 256; + +// Helper functions for region type conversion +/// @brief Enumeration of region types +enum class RegionType { + Unknown, + ParallelFor, + ParallelReduce, + ParallelScan, + DeepCopy, + UserRegion +}; + +/// @brief Structure to hold timing information +struct TimingInfo { + std::string name; + RegionType type; + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + uint64_t id = 0; +}; + +// Singleton class to manage global state +/// @brief Singleton class for managing profiler state +class EnergyProfilerState { + public: + static EnergyProfilerState& get_instance() { + static EnergyProfilerState instance; + return instance; + } + + // Delete copy and move operations + EnergyProfilerState(const EnergyProfilerState&) = delete; + EnergyProfilerState& operator=(const EnergyProfilerState&) = delete; + EnergyProfilerState(EnergyProfilerState&&) = delete; + EnergyProfilerState& operator=(EnergyProfilerState&&) = delete; + + // Accessors for state + std::mutex& get_mutex() { return mutex_; } + std::vector& get_active_regions() { return active_regions_; } + std::vector& get_completed_timings() { + return completed_timings_; + } + uint64_t get_next_region_id() const { return next_region_id_; } + bool get_verbose_enabled() const { return verbose_enabled_; } + + // Safe setters + void increment_next_region_id() { next_region_id_++; } + void set_verbose_enabled(bool enabled) { verbose_enabled_ = enabled; } + + private: + EnergyProfilerState() : next_region_id_(1), verbose_enabled_(false) {} + + std::mutex mutex_; + std::vector active_regions_; + std::vector completed_timings_; + uint64_t next_region_id_; + bool verbose_enabled_; +}; + +// Internal functions for region management +void start_region(const std::string& name, RegionType type, uint64_t id); +void end_region_by_type(RegionType type_to_end); +void end_region_with_id(uint64_t expected_id); +uint64_t generate_new_region_id(); +bool is_verbose_enabled(); +void set_verbose_enabled(bool enabled); +void log_verbose(const std::string& message); +std::vector get_all_timings(); + +// Filename prefix generation +/// @brief Generate a prefix for output files based on hostname and PID +std::string generate_prefix(); + +/// @brief Convert RegionType to string +std::string region_type_to_string(RegionType type); + +// Helper functions for timing calculations +/// @brief Get epoch milliseconds from time point +template +long get_epoch_ms(const TimePoint& time_point) { + return std::chrono::duration_cast( + time_point.time_since_epoch()) + .count(); +} + +/// @brief Get duration in milliseconds between two time points +template +long get_duration_ms(const TimePoint& start_time, const TimePoint& end_time) { + auto duration = end_time - start_time; + return std::chrono::duration_cast(duration) + .count(); +} + +} // namespace EnergyProfiler +} // namespace KokkosTools