Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion debugging/kernel-logger/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CXX=g++
CXXFLAGS=-O3 -std=c++11 -g
CXXFLAGS=-O3 -std=c++11 -g -I../../profiling/all
SHARED_CXXFLAGS=-shared -fPIC

all: kp_kernel_logger.so
Expand Down
110 changes: 95 additions & 15 deletions debugging/kernel-logger/kp_kernel_logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,75 @@
#include <vector>
#include <string>
#include <limits>
#include <climits>
#include <cstring>
#include "impl/Kokkos_Profiling_Interface.hpp"

std::vector<std::string> regions;
static uint64_t uniqID;
struct SpaceHandle {
char name[64];
};

// Get a useful label from the deviceId
// NOTE: Relevant code is in:
// kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
std::string deviceIdToString(const uint32_t deviceId) {
using namespace Kokkos::Tools::Experimental;
std::string device_label("(");
ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
if (eid.type == DeviceType::Serial)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a switch case.

device_label += "Serial";
else if (eid.type == DeviceType::OpenMP)
device_label += "OpenMP";
else if (eid.type == DeviceType::Cuda)
device_label += "Cuda";
else if (eid.type == DeviceType::HIP)
device_label += "HIP";
else if (eid.type == DeviceType::OpenMPTarget)
device_label += "OpenMPTarget";
else if (eid.type == DeviceType::HPX)
device_label += "HPX";
else if (eid.type == DeviceType::Threads)
device_label += "Threads";
else if (eid.type == DeviceType::SYCL)
device_label += "SYCL";
else if (eid.type == DeviceType::OpenACC)
device_label += "OpenACC";
else if (eid.type == DeviceType::Unknown)
device_label += "Unknown";
else
device_label += "Unknown to KokkosTools";
Comment on lines +39 to +60
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't be opposed to pushing this part (or even the whole function) to Kokkos_Profiling_Interface.hpp. Getting string out of the device id doesn't seem to be specific to the kernel logger tool (also see https://github.com/kokkos/kokkos-tools/pull/265/files#diff-839f34fcb31addd9a48252bebf4d37cf674f6fcdd16539c7039813192674b9c0R165-R187).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concur. I also have to maintain this function in Trilinos, so I would love to put it in Kokkos_Profiling_Interface.hpp.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If @crtrott doesn't object, I'll open a Kokkos PR with that.


if (eid.instance_id ==
int_for_synchronization_reason(
SpecialSynchronizationCases::GlobalDeviceSynchronization))
device_label += " All Instances)";
else if (eid.instance_id ==
int_for_synchronization_reason(
SpecialSynchronizationCases::DeepCopyResourceSynchronization))
device_label += " DeepCopyResource)";
else
device_label += " Instance " + std::to_string(eid.instance_id) + ")";

return device_label;
}

bool suppressCounts() {
static bool value = [](){
const char* varVal = std::getenv("KOKKOS_TOOLS_LOGGER_SUPPRESS_COUNTS");
Comment thread
csiefer2 marked this conversation as resolved.
if (varVal) {
std::string v = std::string(varVal);
// default to false
if (v == "1" || v == "ON" || v == "on" || v == "TRUE" || v == "true" ||
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the time, it seems that this repo wants either 0 or 1 for the value of an environment variable.

Then use atoi.

I think it is important that all environment variables follow consistent conventions.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@romintomasetti This is a Tpetra-ism to allow it to take anything reasonable as an option. If @vlkale or @crtrott doesn't like it that's fine, but we've found the flexibility to be useful in Trilinos

v == "YES" || v == "yes")
return true;
}
return false;
}();
return value;
}

void kokkosp_print_region_stack_indent(const int level) {
printf("KokkosP: ");

Expand Down Expand Up @@ -66,12 +127,14 @@ extern "C" void kokkosp_finalize_library() {
extern "C" void kokkosp_begin_parallel_for(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;

printf(
"KokkosP: Executing parallel-for kernel on device %d with unique "
"KokkosP: Executing parallel-for kernel on device %s with unique "
"execution identifier %llu\n",
devID, (unsigned long long)(*kID));
deviceIdToString(devID).c_str(), (unsigned long long)(output));

int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
Expand All @@ -80,19 +143,23 @@ extern "C" void kokkosp_begin_parallel_for(const char* name,
}

extern "C" void kokkosp_end_parallel_for(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)(kID));
(unsigned long long)output);
}

extern "C" void kokkosp_begin_parallel_scan(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;

printf(
"KokkosP: Executing parallel-scan kernel on device %d with unique "
"KokkosP: Executing parallel-scan kernel on device %s with unique "
"execution identifier %llu\n",
devID, (unsigned long long)(*kID));
deviceIdToString(devID).c_str(), (unsigned long long)(output));
Comment thread
csiefer2 marked this conversation as resolved.
Outdated
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
deviceIdToString(devID).c_str(), (unsigned long long)(output));
deviceIdToString(devID).c_str(), suppressCounts() ? 0 : *kID);


int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
Expand All @@ -101,19 +168,23 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name,
}

extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)(kID));
(unsigned long long)(output));
}

extern "C" void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;

printf(
"KokkosP: Executing parallel-reduce kernel on device %d with unique "
"KokkosP: Executing parallel-reduce kernel on device %s with unique "
"execution identifier %llu\n",
devID, (unsigned long long)(*kID));
deviceIdToString(devID).c_str(), (unsigned long long)(output));

int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
Expand All @@ -122,8 +193,11 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name,
}

extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;

printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)(kID));
(unsigned long long)(output));
}

extern "C" void kokkosp_begin_fence(const char* name, const uint32_t devID,
Expand All @@ -139,10 +213,13 @@ extern "C" void kokkosp_begin_fence(const char* name, const uint32_t devID,
} else {
*kID = uniqID++;

int output = *kID;
if (suppressCounts()) output = 0;

printf(
"KokkosP: Executing fence on device %d with unique execution "
"KokkosP: Executing fence on device %s with unique execution "
"identifier %llu\n",
devID, (unsigned long long)(*kID));
deviceIdToString(devID).c_str(), (unsigned long long)(output));

int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
Expand All @@ -156,8 +233,11 @@ extern "C" void kokkosp_end_fence(const uint64_t kID) {
// dealing with the application's fence, which we filtered out in the callback
// for fences
if (kID != std::numeric_limits<uint64_t>::max()) {
int output = kID;
if (suppressCounts()) output = 0;

printf("KokkosP: Execution of fence %llu is completed.\n",
(unsigned long long)(kID));
(unsigned long long)(output));
}
}

Expand Down
9 changes: 9 additions & 0 deletions profiling/all/impl/Kokkos_Profiling_Interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ inline uint32_t device_id(ExecutionSpace const& space) noexcept {
<< num_instance_bits) +
space.impl_instance_id();
}

inline uint32_t int_for_synchronization_reason(
Kokkos::Tools::Experimental::SpecialSynchronizationCases reason) {
switch (reason) {
case GlobalDeviceSynchronization: return 0;
case DeepCopyResourceSynchronization: return 0x00ffffff;
}
return 0;
}
} // namespace Experimental
} // namespace Tools
} // end namespace Kokkos
Expand Down