Skip to content

Commit

Permalink
Add CUPTI/RoCM versions to traces (#985)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #985

Because of the differences that are emerging between different versions, it would be useful in the metadata we could see which third-party library version we are using. We add them to our kineto traces in this diff.

Reviewed By: aaronenyeshi

Differential Revision: D62538511

fbshipit-source-id: 813af45c1d2e82002ca7b4b7f3788407f13c254c
  • Loading branch information
sraikund16 authored and facebook-github-bot committed Sep 13, 2024
1 parent 76f2334 commit ca1eedb
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 13 deletions.
26 changes: 21 additions & 5 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo

void CuptiActivityProfiler::transferCpuTrace(
std::unique_ptr<libkineto::CpuTraceBuffer> cpuTrace) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
const string& trace_name = cpuTrace->span.name;
if (currentRunloopState_ != RunloopState::CollectTrace &&
currentRunloopState_ != RunloopState::ProcessTrace) {
Expand Down Expand Up @@ -248,6 +248,12 @@ void CuptiActivityProfiler::logGpuVersions() {
"cuda_runtime_version", std::to_string(cudaRuntimeVersion));
LOGGER_OBSERVER_ADD_METADATA(
"cuda_driver_version", std::to_string(cudaDriverVersion));
addVersionMetadata(
"cupti_version", std::to_string(cuptiVersion));
addVersionMetadata(
"cuda_runtime_version", std::to_string(cudaRuntimeVersion));
addVersionMetadata(
"cuda_driver_version", std::to_string(cudaDriverVersion));

#elif defined(HAS_ROCTRACER)
uint32_t majorVersion = roctracer_version_major();
Expand All @@ -267,13 +273,23 @@ void CuptiActivityProfiler::logGpuVersions() {
"hip_runtime_version", std::to_string(hipRuntimeVersion));
LOGGER_OBSERVER_ADD_METADATA(
"hip_driver_version", std::to_string(hipDriverVersion));
addVersionMetadata(
"roctracer_version", roctracerVersion);
addVersionMetadata(
"hip_runtime_version", std::to_string(hipRuntimeVersion));
addVersionMetadata(
"hip_driver_version", std::to_string(hipDriverVersion));

#endif
}

void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
LOG(INFO) << "Processing " << traceBuffers_->cpu.size() << " CPU buffers";
VLOG(0) << "Profile time range: " << captureWindowStartTime_ << " - "
<< captureWindowEndTime_;
for (auto& pair : versionMetadata_) {
addMetadata(pair.first, pair.second);
}
logger.handleTraceStart(metadata_);
setCpuActivityPresent(false);
setGpuActivityPresent(false);
Expand Down Expand Up @@ -948,7 +964,7 @@ void CuptiActivityProfiler::configureChildProfilers() {
void CuptiActivityProfiler::configure(
const Config& config,
const time_point<system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
if (isActive()) {
LOG(WARNING) << "CuptiActivityProfiler already busy, terminating";
return;
Expand Down Expand Up @@ -1171,7 +1187,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(

if (cupti_.stopCollection) {
// Go to process trace to clear any outstanding buffers etc
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
resetInternal();
LOG(ERROR) << "State: Warmup stopped by CUPTI. (Buffer size configured is " << config_->activitiesMaxGpuBufferSize() / 1024 / 1024 << "MB)";
Expand Down Expand Up @@ -1230,7 +1246,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
}
#endif // HAS_CUPTI || HAS_ROCTRACER

std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
VLOG_IF(0, collection_done) << "Reached profile end time";
UST_LOGGER_MARK_COMPLETED(kCollectionStage);
Expand All @@ -1254,7 +1270,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
}
// FIXME: Probably want to allow interruption here
// for quickly handling trace request via synchronous API
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
processTraceInternal(*logger_);
UST_LOGGER_MARK_COMPLETED(kPostProcessingStage);
resetInternal();
Expand Down
24 changes: 16 additions & 8 deletions libkineto/src/CuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,23 +155,23 @@ class CuptiActivityProfiler {
// Synchronous control API
void startTrace(
const std::chrono::time_point<std::chrono::system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
startTraceInternal(now);
}

void stopTrace(const std::chrono::time_point<std::chrono::system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
}

// Process CPU and GPU traces
void processTrace(ActivityLogger& logger) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
processTraceInternal(logger);
}

void reset() {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
resetInternal();
}

Expand All @@ -197,7 +197,7 @@ class CuptiActivityProfiler {
// as key, because that's what CUPTI records.
int32_t tid = threadId();
int32_t pid = processId();
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
recordThreadInfo(sysTid, tid, pid);
}

Expand All @@ -215,13 +215,18 @@ class CuptiActivityProfiler {
}

void addMetadata(const std::string& key, const std::string& value) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
metadata_[key] = value;
}

void addVersionMetadata(const std::string& key, const std::string& value) {
std::lock_guard<std::recursive_mutex> guard(mutex_);
versionMetadata_[key] = value;
}

void addChildActivityProfiler(
std::unique_ptr<IActivityProfiler> profiler) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
profilers_.push_back(std::move(profiler));
}

Expand Down Expand Up @@ -472,7 +477,7 @@ class CuptiActivityProfiler {
// ***************************************************************************

// Mutex to protect non-atomic access to below state
std::mutex mutex_;
std::recursive_mutex mutex_;

// Runloop phase
std::atomic<RunloopState> currentRunloopState_{RunloopState::WaitForRequest};
Expand Down Expand Up @@ -528,6 +533,9 @@ class CuptiActivityProfiler {
// Trace metadata
std::unordered_map<std::string, std::string> metadata_;

// Version metadata
std::unordered_map<std::string, std::string> versionMetadata_;

// child activity profilers
std::vector<std::unique_ptr<IActivityProfiler>> profilers_;

Expand Down

0 comments on commit ca1eedb

Please sign in to comment.