diff --git a/profiling/simple-kernel-timer/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer.cpp index 882d3fefb..d75d2eac5 100644 --- a/profiling/simple-kernel-timer/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer.cpp @@ -13,6 +13,93 @@ namespace KokkosTools { namespace KernelTimer { +void print_ascii(std::map& count_map, + double totalExecuteTime) { + std::vector kernelInfo; + double totalKernelsTime = 0; + uint64_t totalKernelsCalls = 0; + + for (auto const& [name, info] : count_map) { + kernelInfo.push_back(info); + } + + std::sort(kernelInfo.begin(), kernelInfo.end(), compareKernelPerformanceInfo); + + // Calculate total time in kernels and total calls to kernels for summary + for (auto const& info : kernelInfo) { + if (info->getKernelType() != REGION) { + totalKernelsTime += info->getTime(); + totalKernelsCalls += info->getCallCount(); + } + } + + // Header matching kp_reader.cpp + printf( + "\n (Type) Total Time, Call Count, Avg. Time per Call, %%Total Time in " + "Kernels, %%Total Program Time\n"); + printf( + "------------------------------------------------------------------------" + "-\n\n"); + + char delimiter = ' '; + // We check for the environment delimiter if set during init + if (outputDelimiter != nullptr && strlen(outputDelimiter) > 0) { + delimiter = outputDelimiter[0]; + } + + auto print_row = [&](KernelPerformanceInfo* info) { + const double callCountDouble = (double)info->getCallCount(); + const char* typeStr = " (Region) "; + switch (info->getKernelType()) { + case PARALLEL_FOR: typeStr = " (ParFor) "; break; + case PARALLEL_REDUCE: typeStr = " (ParRed) "; break; + case PARALLEL_SCAN: typeStr = " (ParScan) "; break; + default: break; + } + + printf( + "- %s\n%s%c%f%c%" PRIu64 "%c%f%c%f%c%f\n", info->getName().c_str(), + typeStr, delimiter, info->getTime(), delimiter, info->getCallCount(), + delimiter, info->getTime() / std::max(1.0, callCountDouble), delimiter, + (info->getTime() / std::max(1e-9, totalKernelsTime)) * 100.0, delimiter, + (info->getTime() / std::max(1e-9, totalExecuteTime)) * 100.0); + }; + + printf("Regions: \n\n"); + for (auto const& info : kernelInfo) { + if (info->getKernelType() == REGION) print_row(info); + } + + printf( + "\n----------------------------------------------------------------------" + "---\n"); + printf("Kernels: \n\n"); + for (auto const& info : kernelInfo) { + if (info->getKernelType() != REGION) print_row(info); + } + + printf( + "\n----------------------------------------------------------------------" + "---\n"); + printf("Summary:\n\n"); + printf( + "Total Execution Time (incl. Kokkos + non-Kokkos): %20.5f seconds\n", + totalExecuteTime); + printf( + "Total Time in Kokkos kernels: %20.5f seconds\n", + totalKernelsTime); + printf( + " -> Time outside Kokkos kernels: %20.5f seconds\n", + (totalExecuteTime - totalKernelsTime)); + printf(" -> Percentage in Kokkos kernels: %20.2f %%\n", + (totalKernelsTime / std::max(1e-9, totalExecuteTime)) * 100.0); + printf("Total Calls to Kokkos Kernels: %20" PRIu64 + "\n", + totalKernelsCalls); + printf( + "------------------------------------------------------------------------" + "-\n\n"); +} bool is_region(KernelPerformanceInfo const& kp) { return kp.getKernelType() == REGION; @@ -43,15 +130,25 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } void kokkosp_finalize_library() { - double finishTime = seconds(); + double finishTime = seconds(); + const double totalExecuteTime = (finishTime - initTime); - const char* kokkos_tools_timer_json_raw = getenv("KOKKOS_TOOLS_TIMER_JSON"); - const bool kokkos_tools_timer_json = - kokkos_tools_timer_json_raw == NULL - ? false - : strcmp(kokkos_tools_timer_json_raw, "1") == 0 || - strcmp(kokkos_tools_timer_json_raw, "true") == 0 || - strcmp(kokkos_tools_timer_json_raw, "True") == 0; + auto is_enabled = [](const char* env_var) { + const char* env_var_raw = getenv(env_var); + return env_var_raw != nullptr && + (strcmp(env_var_raw, "1") == 0 || strcmp(env_var_raw, "true") == 0 || + strcmp(env_var_raw, "True") == 0); + }; + + const bool kokkos_tools_timer_json = is_enabled("KOKKOS_TOOLS_TIMER_JSON"); + const bool kokkos_tools_timer_binary = + is_enabled("KOKKOS_TOOLS_TIMER_BINARY"); + + // Quick return for ascii output (default) + if (!kokkos_tools_timer_json && !kokkos_tools_timer_binary) { + print_ascii(count_map, totalExecuteTime); + return; + } double kernelTimes = 0; @@ -65,15 +162,14 @@ void kokkosp_finalize_library() { free(hostname); FILE* output_data = fopen(fileOutput, "wb"); - const double totalExecuteTime = (finishTime - initTime); - if (!kokkos_tools_timer_json) { + if (kokkos_tools_timer_binary) { fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { kernel_itr->second->writeToBinaryFile(output_data); } - } else { + } else if (kokkos_tools_timer_json) { std::vector kernelList; for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); diff --git a/tests/sampler/CMakeLists.txt b/tests/sampler/CMakeLists.txt index a6c86f404..c8ae1dc69 100644 --- a/tests/sampler/CMakeLists.txt +++ b/tests/sampler/CMakeLists.txt @@ -84,3 +84,11 @@ kp_add_executable_and_test( KOKKOS_TOOLS_SAMPLER_PROB 51.6 KOKKOS_TOOLS_GLOBALFENCES 1 ) + +## tests for simple kernel timer init/finalize with print_ascii output + +kp_add_executable_and_test( + TARGET_NAME test_simple_kernel_timer + SOURCE_FILE test_simple_kernel_timer.cpp + KOKKOS_TOOLS_LIBS kp_kernel_timer +) diff --git a/tests/sampler/test_simple_kernel_timer.cpp b/tests/sampler/test_simple_kernel_timer.cpp new file mode 100644 index 000000000..7248cee4f --- /dev/null +++ b/tests/sampler/test_simple_kernel_timer.cpp @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::ContainsRegex; +using ::testing::HasSubstr; + +struct ParForFunctor { + KOKKOS_FUNCTION void operator()(const int) const {} +}; + +struct ParReduceFunctor { + KOKKOS_FUNCTION void operator()(const int, long int&) const {} +}; + +struct ParScanFunctor { + KOKKOS_FUNCTION void operator()(const int, long int&, bool) const {} +}; + +/** + * @brief Test fixture for simple kernel timer death tests. + * + * Death tests fork a child process via EXPECT_EXIT. The dup2 call that + * redirects stdout to stderr must be performed inside the child process + * (i.e., inside the EXPECT_EXIT block) because SetUp() runs in the parent + * process and would not affect the child's file descriptors. Therefore, + * dup2 remains inside each EXPECT_EXIT block. + */ +class SimpleKernelTimer_DeathTest : public ::testing::Test { + protected: + void SetUp() override { + // dup2(STDERR_FILENO, STDOUT_FILENO) cannot be moved here because + // SetUp() runs in the parent process, while EXPECT_EXIT forks a child + // process. The redirect must happen inside the EXPECT_EXIT block. + } +}; + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_for. + * The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parfor) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + for (int i = 0; i < 5; i++) { + Kokkos::parallel_for("named kernel", Kokkos::RangePolicy<>(0, 1), + ParForFunctor{}); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + HasSubstr("Total Time, Call Count, Avg. Time per Call"), + ContainsRegex("named kernel.*\\(ParFor\\)"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex( + "\\(ParFor\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_reduce. + * The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parreduce) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + long int sum; + for (int i = 0; i < 5; i++) { + sum = 0; + Kokkos::parallel_reduce("named kernel reduce", + Kokkos::RangePolicy<>(0, 1), + ParReduceFunctor{}, sum); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + HasSubstr("Total Time, Call Count, Avg. Time per Call"), + ContainsRegex("named kernel reduce.*\\(ParRed\\)"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex( + "\\(ParRed\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_scan. + * The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parscan) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + long int N = 1024; + long int result; + for (int i = 0; i < 5; i++) { + result = 0; + Kokkos::parallel_scan("named kernel scan", N, ParScanFunctor{}, + result); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + HasSubstr("Total Time, Call Count, Avg. Time per Call"), + ContainsRegex("named kernel scan.*\\(ParScan\\)"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex( + "\\(ParScan\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for regions. + * The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, region) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + for (int i = 0; i < 5; i++) { + Kokkos::Profiling::pushRegion("test region"); + Kokkos::Profiling::popRegion(); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + ContainsRegex("test region.*\\(Region\\)"), + ContainsRegex( + "\\(Region\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + // Regions are not counted as kernel calls + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+0"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_for combined + * with regions. The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parfor_region) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + for (int i = 0; i < 5; i++) { + Kokkos::Profiling::pushRegion("test region"); + Kokkos::parallel_for("named kernel", Kokkos::RangePolicy<>(0, 1), + ParForFunctor{}); + Kokkos::Profiling::popRegion(); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + ContainsRegex("test region.*\\(Region\\)"), + ContainsRegex("named kernel.*\\(ParFor\\)"), + // Only parallel_for calls count as kernel calls, not regions + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex( + "\\(ParFor\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_reduce combined + * with regions. The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parreduce_region) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + long int sum; + for (int i = 0; i < 5; i++) { + Kokkos::Profiling::pushRegion("test region"); + sum = 0; + Kokkos::parallel_reduce("named kernel reduce", + Kokkos::RangePolicy<>(0, 1), + ParReduceFunctor{}, sum); + Kokkos::Profiling::popRegion(); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + ContainsRegex("test region.*\\(Region\\)"), + ContainsRegex("named kernel reduce.*\\(ParRed\\)"), + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex( + "\\(ParRed\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +} + +/** + * @test This death test checks that kokkosp_init_library and + * kokkosp_finalize_library work correctly for parallel_scan combined + * with regions. The print_ascii output is captured and validated. + */ +TEST_F(SimpleKernelTimer_DeathTest, parscan_region) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT( + { + dup2(STDERR_FILENO, STDOUT_FILENO); + + Kokkos::initialize(); + + long int N = 1024; + long int result; + for (int i = 0; i < 5; i++) { + Kokkos::Profiling::pushRegion("test region"); + result = 0; + Kokkos::parallel_scan("named kernel scan", N, ParScanFunctor{}, + result); + Kokkos::Profiling::popRegion(); + } + + Kokkos::finalize(); + exit(0); + }, + ::testing::ExitedWithCode(0), + ::testing::AllOf( + HasSubstr("KokkosP: Simple Kernel Timer Library Initialized"), + ContainsRegex("test region.*\\(Region\\)"), + ContainsRegex("named kernel scan.*\\(ParScan\\)"), + ContainsRegex("Total Calls to Kokkos Kernels:[ ]+5"), + ContainsRegex("Total Execution Time \\(incl. Kokkos \\+ " + "non-Kokkos\\):[ ]+[0-9]+\\.[0-9]+ seconds"), + ContainsRegex( + "\\(ParScan\\)[ ]+[0-9]+\\.[0-9]+[ ]+5[ ]+[0-9]+\\.[0-9]+"))); +}