From 76f23345a70fca6ba7e31939a6443c959454239b Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Fri, 30 Aug 2024 08:43:10 -0700 Subject: [PATCH] Add Grid/Block To AMD Kernel Profiles (#983) Summary: Pull Request resolved: https://github.com/pytorch/kineto/pull/983 Roctracer does not give the grid/block alongside device activities; however, they do have the information in the launch event. Using the correlation we can then stitch these properties using a map from correlation to grid or block. Currently this won't work for RCCL events until https://github.com/ROCm/roctracer/issues/100 is resolved Reviewed By: leitian, aaronenyeshi Differential Revision: D61743013 fbshipit-source-id: 1205c62f45e8982b88f7a664857090d981f2cb3c --- libkineto/src/RoctracerActivity_inl.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/libkineto/src/RoctracerActivity_inl.h b/libkineto/src/RoctracerActivity_inl.h index 4761803bf..dea799812 100644 --- a/libkineto/src/RoctracerActivity_inl.h +++ b/libkineto/src/RoctracerActivity_inl.h @@ -20,6 +20,9 @@ namespace KINETO_NAMESPACE { using namespace libkineto; +static std::unordered_map correlationToGrid; +static std::unordered_map correlationToBlock; + const char* getGpuActivityKindString(uint32_t kind) { switch (kind) { case HIP_OP_COPY_KIND_DEVICE_TO_HOST_: @@ -99,11 +102,22 @@ inline void GpuActivity::log(ActivityLogger& logger) const { inline const std::string GpuActivity::metadataJson() const { const auto& gpuActivity = raw(); // clang-format off - return fmt::format(R"JSON( + + if (correlationToGrid.count(gpuActivity.id) > 0) { + return fmt::format(R"JSON( + "device": {}, "stream": {}, + "correlation": {}, "kind": "{}", + "grid": {}, "block": {})JSON", + gpuActivity.device, gpuActivity.queue, + gpuActivity.id, getGpuActivityKindString(gpuActivity.kind), + correlationToGrid[gpuActivity.id], correlationToBlock[gpuActivity.id]); + } else { + return fmt::format(R"JSON( "device": {}, "stream": {}, "correlation": {}, "kind": "{}")JSON", gpuActivity.device, gpuActivity.queue, gpuActivity.id, getGpuActivityKindString(gpuActivity.kind)); + } // clang-format on } @@ -144,6 +158,16 @@ inline const std::string RuntimeActivity::metadataJson() con "kernel": "{}", )JSON", demangle(hipKernelNameRef(raw().function))); } + //cache grid and block so we can pass it into async activity (GPU track) + correlationToGrid[raw().id] = fmt::format(R"JSON( + [{}, {}, {}])JSON", + raw().gridX, raw().gridY, raw().gridZ); + + correlationToBlock[raw().id] = fmt::format(R"JSON( + [{}, {}, {}])JSON", + raw().workgroupX, raw().workgroupY, raw().workgroupZ); + + return fmt::format(R"JSON( {}"cid": {}, "correlation": {}, "stream": "{}",