Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 107 additions & 11 deletions profiling/simple-kernel-timer/kp_kernel_timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,93 @@

namespace KokkosTools {
namespace KernelTimer {
void print_ascii(std::map<std::string, KernelPerformanceInfo*>& count_map,
double totalExecuteTime) {
std::vector<KernelPerformanceInfo*> kernelInfo;
double totalKernelsTime = 0;
uint64_t totalKernelsCalls = 0;

for (auto const& [name, info] : count_map) {
kernelInfo.push_back(info);
}

std::sort(kernelInfo.begin(), kernelInfo.end(), compareKernelPerformanceInfo);

// Calculate total time in kernels and total calls to kernels for summary
for (auto const& info : kernelInfo) {
if (info->getKernelType() != REGION) {
totalKernelsTime += info->getTime();
totalKernelsCalls += info->getCallCount();
}
}

// Header matching kp_reader.cpp
printf(
"\n (Type) Total Time, Call Count, Avg. Time per Call, %%Total Time in "
"Kernels, %%Total Program Time\n");
printf(
"------------------------------------------------------------------------"
"-\n\n");

char delimiter = ' ';
// We check for the environment delimiter if set during init
if (outputDelimiter != nullptr && strlen(outputDelimiter) > 0) {
delimiter = outputDelimiter[0];
}

auto print_row = [&](KernelPerformanceInfo* info) {
const double callCountDouble = (double)info->getCallCount();
const char* typeStr = " (Region) ";
switch (info->getKernelType()) {
case PARALLEL_FOR: typeStr = " (ParFor) "; break;
case PARALLEL_REDUCE: typeStr = " (ParRed) "; break;
case PARALLEL_SCAN: typeStr = " (ParScan) "; break;
default: break;
}

printf(
"- %s\n%s%c%f%c%" PRIu64 "%c%f%c%f%c%f\n", info->getName().c_str(),
typeStr, delimiter, info->getTime(), delimiter, info->getCallCount(),
delimiter, info->getTime() / std::max(1.0, callCountDouble), delimiter,
(info->getTime() / std::max(1e-9, totalKernelsTime)) * 100.0, delimiter,
(info->getTime() / std::max(1e-9, totalExecuteTime)) * 100.0);
};

printf("Regions: \n\n");
for (auto const& info : kernelInfo) {
if (info->getKernelType() == REGION) print_row(info);
}

printf(
"\n----------------------------------------------------------------------"
"---\n");
printf("Kernels: \n\n");
for (auto const& info : kernelInfo) {
if (info->getKernelType() != REGION) print_row(info);
}

printf(
"\n----------------------------------------------------------------------"
"---\n");
printf("Summary:\n\n");
printf(
"Total Execution Time (incl. Kokkos + non-Kokkos): %20.5f seconds\n",
totalExecuteTime);
printf(
"Total Time in Kokkos kernels: %20.5f seconds\n",
totalKernelsTime);
printf(
" -> Time outside Kokkos kernels: %20.5f seconds\n",
(totalExecuteTime - totalKernelsTime));
printf(" -> Percentage in Kokkos kernels: %20.2f %%\n",
(totalKernelsTime / std::max(1e-9, totalExecuteTime)) * 100.0);
printf("Total Calls to Kokkos Kernels: %20" PRIu64
"\n",
totalKernelsCalls);
printf(
"------------------------------------------------------------------------"
"-\n\n");
}

bool is_region(KernelPerformanceInfo const& kp) {
return kp.getKernelType() == REGION;
Expand Down Expand Up @@ -43,15 +130,25 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
}

void kokkosp_finalize_library() {
double finishTime = seconds();
double finishTime = seconds();
const double totalExecuteTime = (finishTime - initTime);

const char* kokkos_tools_timer_json_raw = getenv("KOKKOS_TOOLS_TIMER_JSON");
const bool kokkos_tools_timer_json =
kokkos_tools_timer_json_raw == NULL
? false
: strcmp(kokkos_tools_timer_json_raw, "1") == 0 ||
strcmp(kokkos_tools_timer_json_raw, "true") == 0 ||
strcmp(kokkos_tools_timer_json_raw, "True") == 0;
auto is_enabled = [](const char* env_var) {
const char* env_var_raw = getenv(env_var);
return env_var_raw != nullptr &&
(strcmp(env_var_raw, "1") == 0 || strcmp(env_var_raw, "true") == 0 ||
strcmp(env_var_raw, "True") == 0);
};

const bool kokkos_tools_timer_json = is_enabled("KOKKOS_TOOLS_TIMER_JSON");
const bool kokkos_tools_timer_binary =
is_enabled("KOKKOS_TOOLS_TIMER_BINARY");

// Quick return for ascii output (default)
if (!kokkos_tools_timer_json && !kokkos_tools_timer_binary) {
print_ascii(count_map, totalExecuteTime);
return;
}

double kernelTimes = 0;

Expand All @@ -65,15 +162,14 @@ void kokkosp_finalize_library() {
free(hostname);
FILE* output_data = fopen(fileOutput, "wb");

const double totalExecuteTime = (finishTime - initTime);
if (!kokkos_tools_timer_json) {
if (kokkos_tools_timer_binary) {
fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data);

for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end();
kernel_itr++) {
kernel_itr->second->writeToBinaryFile(output_data);
}
} else {
} else if (kokkos_tools_timer_json) {
std::vector<KernelPerformanceInfo*> kernelList;

for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end();
Expand Down
8 changes: 8 additions & 0 deletions tests/sampler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,11 @@ kp_add_executable_and_test(
KOKKOS_TOOLS_SAMPLER_PROB 51.6
KOKKOS_TOOLS_GLOBALFENCES 1
)

## tests for simple kernel timer init/finalize with print_ascii output

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please merge all the proposed tests into a single test file named "test_simple_kernel_timer"

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in db16c5f — all tests merged into test_simple_kernel_timer.cpp with a single test_simple_kernel_timer CMake target.

kp_add_executable_and_test(
TARGET_NAME test_simple_kernel_timer
SOURCE_FILE test_simple_kernel_timer.cpp
KOKKOS_TOOLS_LIBS kp_kernel_timer
)
Loading
Loading