Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CUPTI/RoCM versions to traces #985

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo

void CuptiActivityProfiler::transferCpuTrace(
std::unique_ptr<libkineto::CpuTraceBuffer> cpuTrace) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
const string& trace_name = cpuTrace->span.name;
if (currentRunloopState_ != RunloopState::CollectTrace &&
currentRunloopState_ != RunloopState::ProcessTrace) {
Expand Down Expand Up @@ -248,6 +248,12 @@ void CuptiActivityProfiler::logGpuVersions() {
"cuda_runtime_version", std::to_string(cudaRuntimeVersion));
LOGGER_OBSERVER_ADD_METADATA(
"cuda_driver_version", std::to_string(cudaDriverVersion));
addVersionMetadata(
"cupti_version", std::to_string(cuptiVersion));
addVersionMetadata(
"cuda_runtime_version", std::to_string(cudaRuntimeVersion));
addVersionMetadata(
"cuda_driver_version", std::to_string(cudaDriverVersion));

#elif defined(HAS_ROCTRACER)
uint32_t majorVersion = roctracer_version_major();
Expand All @@ -267,13 +273,23 @@ void CuptiActivityProfiler::logGpuVersions() {
"hip_runtime_version", std::to_string(hipRuntimeVersion));
LOGGER_OBSERVER_ADD_METADATA(
"hip_driver_version", std::to_string(hipDriverVersion));
addVersionMetadata(
"roctracer_version", roctracerVersion);
addVersionMetadata(
"hip_runtime_version", std::to_string(hipRuntimeVersion));
addVersionMetadata(
"hip_driver_version", std::to_string(hipDriverVersion));

#endif
}

void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
LOG(INFO) << "Processing " << traceBuffers_->cpu.size() << " CPU buffers";
VLOG(0) << "Profile time range: " << captureWindowStartTime_ << " - "
<< captureWindowEndTime_;
for (auto& pair : versionMetadata_) {
addMetadata(pair.first, pair.second);
}
logger.handleTraceStart(metadata_);
setCpuActivityPresent(false);
setGpuActivityPresent(false);
Expand Down Expand Up @@ -948,7 +964,7 @@ void CuptiActivityProfiler::configureChildProfilers() {
void CuptiActivityProfiler::configure(
const Config& config,
const time_point<system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
if (isActive()) {
LOG(WARNING) << "CuptiActivityProfiler already busy, terminating";
return;
Expand Down Expand Up @@ -1171,7 +1187,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(

if (cupti_.stopCollection) {
// Go to process trace to clear any outstanding buffers etc
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
resetInternal();
LOG(ERROR) << "State: Warmup stopped by CUPTI. (Buffer size configured is " << config_->activitiesMaxGpuBufferSize() / 1024 / 1024 << "MB)";
Expand Down Expand Up @@ -1230,7 +1246,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
}
#endif // HAS_CUPTI || HAS_ROCTRACER

std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
VLOG_IF(0, collection_done) << "Reached profile end time";
UST_LOGGER_MARK_COMPLETED(kCollectionStage);
Expand All @@ -1254,7 +1270,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
}
// FIXME: Probably want to allow interruption here
// for quickly handling trace request via synchronous API
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
processTraceInternal(*logger_);
UST_LOGGER_MARK_COMPLETED(kPostProcessingStage);
resetInternal();
Expand Down
24 changes: 16 additions & 8 deletions libkineto/src/CuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,23 +155,23 @@ class CuptiActivityProfiler {
// Synchronous control API
void startTrace(
const std::chrono::time_point<std::chrono::system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
startTraceInternal(now);
}

void stopTrace(const std::chrono::time_point<std::chrono::system_clock>& now) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
}

// Process CPU and GPU traces
void processTrace(ActivityLogger& logger) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
processTraceInternal(logger);
}

void reset() {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
resetInternal();
}

Expand All @@ -197,7 +197,7 @@ class CuptiActivityProfiler {
// as key, because that's what CUPTI records.
int32_t tid = threadId();
int32_t pid = processId();
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
recordThreadInfo(sysTid, tid, pid);
}

Expand All @@ -215,13 +215,18 @@ class CuptiActivityProfiler {
}

void addMetadata(const std::string& key, const std::string& value) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
metadata_[key] = value;
}

void addVersionMetadata(const std::string& key, const std::string& value) {
std::lock_guard<std::recursive_mutex> guard(mutex_);
versionMetadata_[key] = value;
}

void addChildActivityProfiler(
std::unique_ptr<IActivityProfiler> profiler) {
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::recursive_mutex> guard(mutex_);
profilers_.push_back(std::move(profiler));
}

Expand Down Expand Up @@ -472,7 +477,7 @@ class CuptiActivityProfiler {
// ***************************************************************************

// Mutex to protect non-atomic access to below state
std::mutex mutex_;
std::recursive_mutex mutex_;

// Runloop phase
std::atomic<RunloopState> currentRunloopState_{RunloopState::WaitForRequest};
Expand Down Expand Up @@ -528,6 +533,9 @@ class CuptiActivityProfiler {
// Trace metadata
std::unordered_map<std::string, std::string> metadata_;

// Version metadata
std::unordered_map<std::string, std::string> versionMetadata_;

// child activity profilers
std::vector<std::unique_ptr<IActivityProfiler>> profilers_;

Expand Down
Loading