Skip to content
1 change: 1 addition & 0 deletions libkineto/include/ActivityType.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ enum class ActivityType {
HPU_OP, // HPU host side runtime event
XPU_RUNTIME, // host side xpu runtime events
COLLECTIVE_COMM, // collective communication
GPU_PM_COUNTER, // GPU performance monitoring counter

// PRIVATEUSE1 Activity types are used for custom backends.
// The corresponding device type is `DeviceType::PrivateUse1` in PyTorch.
Expand Down
423 changes: 423 additions & 0 deletions libkineto/include/KinetoDynamicPluginInterface.h

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions libkineto/libkineto_defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def get_libkineto_cpu_only_srcs(with_api = True):
"src/init.cpp",
"src/output_csv.cpp",
"src/output_json.cpp",
"src/dynamic_plugin/PluginLoader.h",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we not adding pluginutils.h?

"src/dynamic_plugin/PluginProfiler.h",
"src/dynamic_plugin/PluginTraceBuilder.h",
] + (get_libkineto_api_srcs() if with_api else [])

def get_libkineto_public_headers():
Expand All @@ -80,6 +83,7 @@ def get_libkineto_public_headers():
"include/ActivityType.h",
"include/Config.h",
"include/ClientInterface.h",
"include/KinetoDynamicPluginInterface.h",
"include/GenericTraceActivity.h",
"include/IActivityProfiler.h",
"include/ILoggerObserver.h",
Expand Down
1 change: 1 addition & 0 deletions libkineto/src/ActivityType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ static constexpr std::array<ActivityTypeName, activityTypeCount + 1> map{
{"hpu_op", ActivityType::HPU_OP},
{"xpu_runtime", ActivityType::XPU_RUNTIME},
{"collective_comm", ActivityType::COLLECTIVE_COMM},
{"gpu_pm_counter", ActivityType::GPU_PM_COUNTER},
{"privateuse1_runtime", ActivityType::PRIVATEUSE1_RUNTIME},
{"privateuse1_driver", ActivityType::PRIVATEUSE1_DRIVER},
{"ENUM_COUNT", ActivityType::ENUM_COUNT}}};
Expand Down
2 changes: 1 addition & 1 deletion libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@ void CuptiActivityProfiler::finalizeTrace(
if (!process_name.empty()) {
logger.handleDeviceInfo(
{pid, pid, process_name, "CPU"}, captureWindowStartTime_);
if (!cpuOnly_ && use_default_device_info) {
if (use_default_device_info) {
// Usually, GPU events use device id as pid (0-7).
// In some cases, CPU sockets are numbered starting from 0.
// In the worst case, 8 CPU sockets + 8 GPUs, so the max GPU ID is 15.
Expand Down
138 changes: 138 additions & 0 deletions libkineto/src/dynamic_plugin/PluginLoader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#pragma once

#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <filesystem>

#include "KinetoDynamicPluginInterface.h"
#include "Logger.h"
#include "PluginProfiler.h"

namespace libkineto {

class PluginRegistry {
public:
static PluginRegistry& instance() {
static PluginRegistry instance;
return instance;
}

int registerPluginProfiler(const KinetoPlugin_ProfilerInterface* pProfiler) {
if (pProfiler == nullptr) {
LOG(ERROR) << "Failed to register plugin profiler of nullptr";

return -1;
}

// Store in raw registry
rawPluginProfilers_.push_back(*pProfiler);

// Pass to internal registry
const auto& profiler = rawPluginProfilers_.back();
libkineto::api().registerProfilerFactory(
[profiler]() -> std::unique_ptr<IActivityProfiler> {
return std::make_unique<PluginProfiler>(profiler);
});

return 0;
}

const KinetoPlugin_Registry toCRegistry() {
return KinetoPlugin_Registry{
.unpaddedStructSize = KINETO_PLUGIN_REGISTRY_UNPADDED_STRUCT_SIZE,
.pRegistryHandle = reinterpret_cast<KinetoPlugin_RegistryHandle*>(this),
.registerProfiler = cRegisterProfiler};
}

private:
PluginRegistry() = default;
~PluginRegistry() = default;
PluginRegistry(const PluginRegistry&) = delete;
PluginRegistry& operator=(const PluginRegistry&) = delete;

static int cRegisterProfiler(
KinetoPlugin_RegistryHandle* pRegistryHandle,
const KinetoPlugin_ProfilerInterface* pProfiler) {
auto pPluginRegistry = reinterpret_cast<PluginRegistry*>(pRegistryHandle);
return pPluginRegistry->registerPluginProfiler(pProfiler);
}

std::vector<KinetoPlugin_ProfilerInterface> rawPluginProfilers_;
};

inline void loadPlugins() {
const char* pPluginLibDirPathEnvVar = KINETO_PLUGIN_LIB_DIR_PATH_ENV_VARIABLE;

const char* pPluginLibDirPath = getenv(pPluginLibDirPathEnvVar);

if (pPluginLibDirPath == nullptr) {
LOG(VERBOSE) << "Environment variable " << pPluginLibDirPathEnvVar
<< " not set";

return;
}

if (unsetenv(pPluginLibDirPathEnvVar) == -1) {
LOG(ERROR) << "Failed to unset environment variable "
<< pPluginLibDirPathEnvVar << " at unsetenv() with error "
<< strerror(errno);

return;
}

std::vector<std::string> libFilePaths;
try {
for (const auto& entry :
std::filesystem::directory_iterator(pPluginLibDirPath)) {
if (entry.is_regular_file() && entry.path().extension() == ".so") {
libFilePaths.push_back(entry.path().string());
}
}
} catch (const std::filesystem::filesystem_error& e) {
LOG(ERROR) << "Error: " << e.what();

return;
}

PluginRegistry& pluginRegistry = PluginRegistry::instance();
KinetoPlugin_Registry cPluginRegistry = pluginRegistry.toCRegistry();

for (const auto& libFilePath : libFilePaths) {
// Clear error state
dlerror();

void* pHandle = dlopen(libFilePath.c_str(), RTLD_LAZY);
if (pHandle == nullptr) {
char* pError = dlerror();
LOG(WARNING) << "Failed to open " << libFilePath
<< " at dlopen() with error " << pError;
continue;
}

int (*pfxRegister)(const KinetoPlugin_Registry* pRegistry) =
reinterpret_cast<int (*)(const KinetoPlugin_Registry* pRegistry)>(
dlsym(pHandle, "KinetoPlugin_register"));

if (pfxRegister == nullptr) {
char* pError = dlerror();
LOG(VERBOSE) << "Failed to find symbol KinetoPlugin_register() from "
<< libFilePath << " at dlsym() with error " << pError;

continue;
}

LOG(INFO) << "Found symbol KinetoPlugin_register() from " << libFilePath;

int errorCode = pfxRegister(&cPluginRegistry);
if (errorCode != 0) {
LOG(ERROR) << "Failed to register plugin profiler from " << libFilePath
<< " at pfxRegister() with error " << errorCode;
}
}

return;
}

} // namespace libkineto
Loading