-
Notifications
You must be signed in to change notification settings - Fork 75
Expand file tree
/
Copy pathkp_kernel_logger.cpp
More file actions
281 lines (238 loc) · 9.06 KB
/
kp_kernel_logger.cpp
File metadata and controls
281 lines (238 loc) · 9.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <cstdio>
#include <inttypes.h>
#include <vector>
#include <string>
#include <limits>
#include <climits>
#include <cstring>
#include "impl/Kokkos_Profiling_Interface.hpp"
std::vector<std::string> regions;
static uint64_t uniqID;
struct SpaceHandle {
char name[64];
};
// Get a useful label from the deviceId
// NOTE: Relevant code is in:
// kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
std::string deviceIdToString(const uint32_t deviceId) {
using namespace Kokkos::Tools::Experimental;
std::string device_label("(");
ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
if (eid.type == DeviceType::Serial)
device_label += "Serial";
else if (eid.type == DeviceType::OpenMP)
device_label += "OpenMP";
else if (eid.type == DeviceType::Cuda)
device_label += "Cuda";
else if (eid.type == DeviceType::HIP)
device_label += "HIP";
else if (eid.type == DeviceType::OpenMPTarget)
device_label += "OpenMPTarget";
else if (eid.type == DeviceType::HPX)
device_label += "HPX";
else if (eid.type == DeviceType::Threads)
device_label += "Threads";
else if (eid.type == DeviceType::SYCL)
device_label += "SYCL";
else if (eid.type == DeviceType::OpenACC)
device_label += "OpenACC";
else if (eid.type == DeviceType::Unknown)
device_label += "Unknown";
else
device_label += "Unknown to KokkosTools";
if (eid.instance_id ==
int_for_synchronization_reason(
SpecialSynchronizationCases::GlobalDeviceSynchronization))
device_label += " All Instances)";
else if (eid.instance_id ==
int_for_synchronization_reason(
SpecialSynchronizationCases::DeepCopyResourceSynchronization))
device_label += " DeepCopyResource)";
else
device_label += " Instance " + std::to_string(eid.instance_id) + ")";
return device_label;
}
bool suppressCounts() {
static bool value = [](){
const char* varVal = std::getenv("KOKKOS_TOOLS_LOGGER_SUPPRESS_COUNTS");
if (varVal) {
std::string v = std::string(varVal);
// default to false
if (v == "1" || v == "ON" || v == "on" || v == "TRUE" || v == "true" ||
v == "YES" || v == "yes")
return true;
}
return false;
}();
return value;
}
void kokkosp_print_region_stack_indent(const int level) {
printf("KokkosP: ");
for (int i = 0; i < level; i++) {
printf(" ");
}
}
int kokkosp_print_region_stack() {
int level = 0;
for (auto regItr = regions.begin(); regItr != regions.end(); regItr++) {
kokkosp_print_region_stack_indent(level);
printf("%s\n", (*regItr).c_str());
level++;
}
return level;
}
extern "C" void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
void* /*deviceInfo*/) {
printf(
"KokkosP: Kernel Logger Library Initialized (sequence is %d, version: "
"%llu)\n",
loadSeq, (unsigned long long)(interfaceVer));
uniqID = 0;
}
extern "C" void kokkosp_finalize_library() {
printf("KokkosP: Kokkos library finalization called.\n");
}
extern "C" void kokkosp_begin_parallel_for(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;
printf(
"KokkosP: Executing parallel-for kernel on device %s with unique "
"execution identifier %llu\n",
deviceIdToString(devID).c_str(), (unsigned long long)(output));
int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
printf(" %s\n", name);
}
extern "C" void kokkosp_end_parallel_for(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)output);
}
extern "C" void kokkosp_begin_parallel_scan(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;
printf(
"KokkosP: Executing parallel-scan kernel on device %s with unique "
"execution identifier %llu\n",
deviceIdToString(devID).c_str(), (unsigned long long)(output));
int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
printf(" %s\n", name);
}
extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)(output));
}
extern "C" void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;
printf(
"KokkosP: Executing parallel-reduce kernel on device %s with unique "
"execution identifier %llu\n",
deviceIdToString(devID).c_str(), (unsigned long long)(output));
int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
printf(" %s\n", name);
}
extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of kernel %llu is completed.\n",
(unsigned long long)(output));
}
extern "C" void kokkosp_begin_fence(const char* name, const uint32_t devID,
uint64_t* kID) {
// filter out fence as this is a duplicate and unneeded (causing the tool to
// hinder performance of application). We use strstr for checking if the
// string contains the label of a fence (we assume the user will always have
// the word fence in the label of the fence).
if (std::strstr(name, "Kokkos Profile Tool Fence")) {
// set the dereferenced execution identifier to be the maximum value of
// uint64_t, which is assumed to never be assigned
*kID = std::numeric_limits<uint64_t>::max();
} else {
*kID = uniqID++;
int output = *kID;
if (suppressCounts()) output = 0;
printf(
"KokkosP: Executing fence on device %s with unique execution "
"identifier %llu\n",
deviceIdToString(devID).c_str(), (unsigned long long)(output));
int level = kokkosp_print_region_stack();
kokkosp_print_region_stack_indent(level);
printf(" %s\n", name);
}
}
extern "C" void kokkosp_end_fence(const uint64_t kID) {
// if we find the kID to be maximum value uint64_t, then the callback is
// dealing with the application's fence, which we filtered out in the callback
// for fences
if (kID != std::numeric_limits<uint64_t>::max()) {
int output = kID;
if (suppressCounts()) output = 0;
printf("KokkosP: Execution of fence %llu is completed.\n",
(unsigned long long)(output));
}
}
extern "C" void kokkosp_push_profile_region(char* regionName) {
printf("KokkosP: Entering profiling region: %s\n", regionName);
std::string regionNameStr(regionName);
regions.push_back(regionNameStr);
}
extern "C" void kokkosp_pop_profile_region() {
if (regions.size() > 0) {
printf("KokkosP: Exiting profiling region: %s\n", regions.back().c_str());
regions.pop_back();
}
}
extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char* name,
void* ptr, uint64_t size) {
printf("KokkosP: Allocate<%s> name: %s pointer: %p size: %llu\n", handle.name,
name, ptr, (unsigned long long)(size));
}
extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char* name,
void* ptr, uint64_t size) {
printf("KokkosP: Deallocate<%s> name: %s pointer: %p size: %llu\n",
handle.name, name, ptr, (unsigned long long)(size));
}
extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle,
const char* dst_name,
const void* dst_ptr,
SpaceHandle src_handle,
const char* src_name,
const void* src_ptr, uint64_t size) {
printf(
"KokkosP: DeepCopy<%s,%s> DST(name: %s pointer: %p) SRC(name: %s pointer "
"%p) Size: %llu\n",
dst_handle.name, src_handle.name, dst_name, dst_ptr, src_name, src_ptr,
(unsigned long long)(size));
}