Skip to content
Open
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ if(NOT WIN32)
add_subdirectory(profiling/chrome-tracing)
add_subdirectory(profiling/space-time-stack)
add_subdirectory(profiling/perfetto-connector)
add_subdirectory(profiling/energy-profiler)
endif()

# External lib connectors
Expand Down
5 changes: 5 additions & 0 deletions profiling/energy-profiler/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kp_add_library(kp_energy_profiler
kp_energy_profiler.cpp
timing_utils.cpp
timing_export.cpp
)
292 changes: 292 additions & 0 deletions profiling/energy-profiler/kp_energy_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <algorithm>
#include <chrono>
#include <iostream>
#include <mutex>
#include <string>
#include <vector>

#include "kp_core.hpp"
#include "timing_utils.hpp"
#include "timing_export.hpp"

namespace KokkosTools {
namespace EnergyProfiler {

// Helper function to generate new region ID
uint64_t generate_new_region_id() {
auto& state = EnergyProfilerState::get_instance();
std::lock_guard<std::mutex> lock(state.get_mutex());
uint64_t current_id = state.get_next_region_id();
state.increment_next_region_id();
return current_id;
}

// Helper function for verbose logging
void log_verbose(const std::string& message) {
if (EnergyProfilerState::get_instance().get_verbose_enabled()) {
std::cout << message << std::endl;
}
}

// Start a region
void start_region(const std::string& name, RegionType type, uint64_t id) {
TimingInfo region;
region.name = name;
region.type = type;
region.id = id;
auto& state = EnergyProfilerState::get_instance();
std::lock_guard<std::mutex> lock(state.get_mutex());
state.get_active_regions().push_back(region);
state.get_active_regions().back().start_time =
std::chrono::high_resolution_clock::now();
}

// End last region of given type
void end_region_by_type(RegionType type_to_end) {
auto end_time = std::chrono::high_resolution_clock::now();
auto& state = EnergyProfilerState::get_instance();
std::lock_guard<std::mutex> lock(state.get_mutex());
auto& active_regions = state.get_active_regions();
if (active_regions.empty()) return;
auto it = std::find_if(active_regions.rbegin(), active_regions.rend(),
[type_to_end](const TimingInfo& region) {
return region.type == type_to_end;
});
if (it != active_regions.rend()) {
auto region = *it;
active_regions.erase(std::next(it).base());
region.end_time = end_time;
state.get_completed_timings().push_back(region);
}
}

// End region by id
void end_region_with_id(uint64_t expected_id) {
auto end_time = std::chrono::high_resolution_clock::now();
auto& state = EnergyProfilerState::get_instance();
std::lock_guard<std::mutex> lock(state.get_mutex());
auto& active_regions = state.get_active_regions();
auto it = std::find_if(active_regions.begin(), active_regions.end(),
[expected_id](const TimingInfo& region) {
return region.id == expected_id;
});
if (it != active_regions.end()) {
auto region = *it;
region.end_time = end_time;
active_regions.erase(it);
state.get_completed_timings().push_back(region);
} else {
std::cerr << "Warning: No active region found with ID " << expected_id
<< "\n";
}
}

// Get all completed timings
std::vector<TimingInfo> get_all_timings() {
auto& state = EnergyProfilerState::get_instance();
std::lock_guard<std::mutex> lock(state.get_mutex());
std::vector<TimingInfo> all_timings = state.get_completed_timings();
std::sort(all_timings.begin(), all_timings.end(),
[](const TimingInfo& a, const TimingInfo& b) {
return a.start_time < b.start_time;
});
return all_timings;
}

} // namespace EnergyProfiler
} // namespace KokkosTools

extern "C" {

// Tool settings
void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
settings->requires_global_fencing = false;
settings->padding[0] = 0;
}

// Library init
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount,
Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) {
(void)devInfoCount;
(void)deviceInfo;
const char* verbose_env = std::getenv("KOKKOS_TOOLS_ENERGY_VERBOSE");
if (verbose_env &&
(std::string(verbose_env) == "1" || std::string(verbose_env) == "ON")) {
KokkosTools::EnergyProfiler::EnergyProfilerState::get_instance()
.set_verbose_enabled(true);
}
std::cout << "Kokkos Energy Profiler: Initializing with load sequence "
<< loadSeq << " and interface version " << interfaceVer
<< std::endl;
std::cout << "Kokkos Energy Profiler: Library initialized" << std::endl;
}

// Library finalize
void kokkosp_finalize_library() {
std::cout << "Kokkos Energy Profiler: Finalizing library" << std::endl;
std::string prefix = KokkosTools::EnergyProfiler::generate_prefix();
auto all_timings = KokkosTools::EnergyProfiler::get_all_timings();
KokkosTools::EnergyProfiler::print_all_timings_summary(
std::cout, all_timings.begin(), all_timings.end());
KokkosTools::EnergyProfiler::export_all_timings_csv(
all_timings, prefix + "_timing_data.csv");
std::cout << "Kokkos Energy Profiler: Library finalized" << std::endl;
}

// Begin parallel_for
void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
uint64_t* kID) {
if (!name || !kID) {
std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_for\n";
return;
}
(void)devID;
uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id();
*kID = new_id;
KokkosTools::EnergyProfiler::start_region(
name, KokkosTools::EnergyProfiler::RegionType::ParallelFor, *kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Started parallel_for '") + name +
"' on device " + std::to_string(devID) + " with ID " +
std::to_string(*kID));
}

// End parallel_for
void kokkosp_end_parallel_for(const uint64_t kID) {
if (kID == 0) {
std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_for\n";
return;
}
KokkosTools::EnergyProfiler::end_region_with_id(kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Ended parallel_for with ID ") +
std::to_string(kID));
}

// Begin parallel_scan
void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
uint64_t* kID) {
if (!name || !kID) {
std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_scan\n";
return;
}
(void)devID;
uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id();
*kID = new_id;
KokkosTools::EnergyProfiler::start_region(
name, KokkosTools::EnergyProfiler::RegionType::ParallelScan, *kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Started parallel_scan '") + name +
"' on device " + std::to_string(devID) + " with ID " +
std::to_string(*kID));
}

// End parallel_scan
void kokkosp_end_parallel_scan(const uint64_t kID) {
if (kID == 0) {
std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_scan\n";
return;
}
KokkosTools::EnergyProfiler::end_region_with_id(kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Ended parallel_scan with ID ") +
std::to_string(kID));
}

// Begin parallel_reduce
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
uint64_t* kID) {
if (!name || !kID) {
std::cerr << "Error: Invalid parameters in kokkosp_begin_parallel_reduce\n";
return;
}
(void)devID;
uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id();
*kID = new_id;
KokkosTools::EnergyProfiler::start_region(
name, KokkosTools::EnergyProfiler::RegionType::ParallelReduce, *kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Started parallel_reduce '") + name +
"' on device " + std::to_string(devID) + " with ID " +
std::to_string(*kID));
}

// End parallel_reduce
void kokkosp_end_parallel_reduce(const uint64_t kID) {
if (kID == 0) {
std::cerr << "Error: Invalid kernel ID in kokkosp_end_parallel_reduce\n";
return;
}
KokkosTools::EnergyProfiler::end_region_with_id(kID);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Ended parallel_reduce with ID ") +
std::to_string(kID));
}

// Push user region
void kokkosp_push_profile_region(char const* regionName) {
if (!regionName) {
std::cerr << "Error: Invalid region name in kokkosp_push_profile_region\n";
return;
}
uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id();
KokkosTools::EnergyProfiler::start_region(
regionName, KokkosTools::EnergyProfiler::RegionType::UserRegion, new_id);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Pushed profile region '") +
regionName + "'");
}

// Pop user region
void kokkosp_pop_profile_region() {
KokkosTools::EnergyProfiler::end_region_by_type(
KokkosTools::EnergyProfiler::RegionType::UserRegion);
KokkosTools::EnergyProfiler::log_verbose(
"Kokkos Energy Profiler: Popped profile region");
}

// Begin deep copy
void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle, const char* dst_name,
const void*, Kokkos::Tools::SpaceHandle,
const char* src_name, const void*, uint64_t size) {
if (!dst_name || !src_name) {
std::cerr << "Error: Invalid names in kokkosp_begin_deep_copy\n";
return;
}
uint64_t new_id = KokkosTools::EnergyProfiler::generate_new_region_id();
std::string name = std::string(src_name) + " -> " + std::string(dst_name);
KokkosTools::EnergyProfiler::start_region(
name, KokkosTools::EnergyProfiler::RegionType::DeepCopy, new_id);
KokkosTools::EnergyProfiler::log_verbose(
std::string("Kokkos Energy Profiler: Started deep copy from '") +
src_name + "' to '" + dst_name + "' (size: " + std::to_string(size) +
" bytes)");
}

// End deep copy
void kokkosp_end_deep_copy() {
KokkosTools::EnergyProfiler::end_region_by_type(
KokkosTools::EnergyProfiler::RegionType::DeepCopy);
KokkosTools::EnergyProfiler::log_verbose(
"Kokkos Energy Profiler: Ended deep copy");
}

} // extern "C"
89 changes: 89 additions & 0 deletions profiling/energy-profiler/timing_export.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include "timing_export.hpp"
#include <fstream>
#include <iostream>
#include <iomanip>

namespace KokkosTools {
namespace EnergyProfiler {

// Constants for table formatting
const int COLUMN_WIDTH_CATEGORY = 10;
const int COLUMN_WIDTH_NAME = 32;
const int COLUMN_WIDTH_TYPE = 14;
const int COLUMN_WIDTH_TIME = 17;
const int COLUMN_WIDTH_DURATION = 13;

void export_all_timings_csv(const std::vector<TimingInfo>& all_timings,
const std::string& filename) {
std::ofstream file(filename);
if (!file.is_open()) {
std::cerr << "ERROR: Unable to open file " << filename << " for writing.\n";
return;
}
file << "name,type,start_time_epoch_ms,end_time_epoch_ms,duration_ms\n";
for (const auto& timing : all_timings) {
auto start_ms = get_epoch_ms(timing.start_time);
auto end_ms = get_epoch_ms(timing.end_time);
auto duration_ms = get_duration_ms(timing.start_time, timing.end_time);
std::string type_str = region_type_to_string(timing.type);
file << timing.name << "," << type_str << "," << start_ms << "," << end_ms
<< "," << duration_ms << "\n";
}
std::cout << "All timing data exported to " << filename << '\n';
}

std::string get_category_from_type(RegionType type) {
switch (type) {
case RegionType::UserRegion: return "REGION";
case RegionType::DeepCopy: return "DEEPCOPY";
case RegionType::ParallelFor:
case RegionType::ParallelScan:
case RegionType::ParallelReduce: return "KERNEL";
default: return "OTHER";
}
}

void print_all_timings_summary(std::ostream& os,
std::vector<TimingInfo>::const_iterator begin,
std::vector<TimingInfo>::const_iterator end) {
os << "\n==== TIMING SUMMARY ====\n";
os << "| Category | Name | Type "
" | Start (ms) | End (ms) | Duration (ms) |\n";
os << "|------------|----------------------------------|--------------"
"--|-------------------|-------------------|---------------|\n";
for (auto it = begin; it != end; ++it) {
const auto& timing_info = *it;
auto start_ms = get_epoch_ms(timing_info.start_time);
auto end_ms = get_epoch_ms(timing_info.end_time);
auto duration_ms =
get_duration_ms(timing_info.start_time, timing_info.end_time);
std::string type_str = region_type_to_string(timing_info.type);
std::string category = get_category_from_type(timing_info.type);
os << "| " << std::setw(COLUMN_WIDTH_CATEGORY) << std::left << category
<< " | " << std::setw(COLUMN_WIDTH_NAME) << std::left << timing_info.name
<< " | " << std::setw(COLUMN_WIDTH_TYPE) << std::left << type_str
<< " | " << std::setw(COLUMN_WIDTH_TIME) << std::right << start_ms
<< " | " << std::setw(COLUMN_WIDTH_TIME) << std::right << end_ms << " | "
<< std::setw(COLUMN_WIDTH_DURATION) << std::right << duration_ms
<< " |\n";
}
}

} // namespace EnergyProfiler
} // namespace KokkosTools
Loading