diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 1509de00f..286d5b359 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -193,7 +193,7 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo void CuptiActivityProfiler::transferCpuTrace( std::unique_ptr cpuTrace) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); const string& trace_name = cpuTrace->span.name; if (currentRunloopState_ != RunloopState::CollectTrace && currentRunloopState_ != RunloopState::ProcessTrace) { @@ -248,6 +248,12 @@ void CuptiActivityProfiler::logGpuVersions() { "cuda_runtime_version", std::to_string(cudaRuntimeVersion)); LOGGER_OBSERVER_ADD_METADATA( "cuda_driver_version", std::to_string(cudaDriverVersion)); + addVersionMetadata( + "cupti_version", std::to_string(cuptiVersion)); + addVersionMetadata( + "cuda_runtime_version", std::to_string(cudaRuntimeVersion)); + addVersionMetadata( + "cuda_driver_version", std::to_string(cudaDriverVersion)); #elif defined(HAS_ROCTRACER) uint32_t majorVersion = roctracer_version_major(); @@ -267,6 +273,13 @@ void CuptiActivityProfiler::logGpuVersions() { "hip_runtime_version", std::to_string(hipRuntimeVersion)); LOGGER_OBSERVER_ADD_METADATA( "hip_driver_version", std::to_string(hipDriverVersion)); + addVersionMetadata( + "roctracer_version", roctracerVersion); + addVersionMetadata( + "hip_runtime_version", std::to_string(hipRuntimeVersion)); + addVersionMetadata( + "hip_driver_version", std::to_string(hipDriverVersion)); + #endif } @@ -274,6 +287,9 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) { LOG(INFO) << "Processing " << traceBuffers_->cpu.size() << " CPU buffers"; VLOG(0) << "Profile time range: " << captureWindowStartTime_ << " - " << captureWindowEndTime_; + for (auto& pair : versionMetadata_) { + addMetadata(pair.first, pair.second); + } logger.handleTraceStart(metadata_); setCpuActivityPresent(false); setGpuActivityPresent(false); @@ -948,7 +964,7 @@ void CuptiActivityProfiler::configureChildProfilers() { void CuptiActivityProfiler::configure( const Config& config, const time_point& now) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); if (isActive()) { LOG(WARNING) << "CuptiActivityProfiler already busy, terminating"; return; @@ -1171,7 +1187,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( if (cupti_.stopCollection) { // Go to process trace to clear any outstanding buffers etc - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); stopTraceInternal(now); resetInternal(); LOG(ERROR) << "State: Warmup stopped by CUPTI. (Buffer size configured is " << config_->activitiesMaxGpuBufferSize() / 1024 / 1024 << "MB)"; @@ -1230,7 +1246,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( } #endif // HAS_CUPTI || HAS_ROCTRACER - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); stopTraceInternal(now); VLOG_IF(0, collection_done) << "Reached profile end time"; UST_LOGGER_MARK_COMPLETED(kCollectionStage); @@ -1254,7 +1270,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( } // FIXME: Probably want to allow interruption here // for quickly handling trace request via synchronous API - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); processTraceInternal(*logger_); UST_LOGGER_MARK_COMPLETED(kPostProcessingStage); resetInternal(); diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index ff8c70d6b..0669be2d9 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -155,23 +155,23 @@ class CuptiActivityProfiler { // Synchronous control API void startTrace( const std::chrono::time_point& now) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); startTraceInternal(now); } void stopTrace(const std::chrono::time_point& now) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); stopTraceInternal(now); } // Process CPU and GPU traces void processTrace(ActivityLogger& logger) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); processTraceInternal(logger); } void reset() { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); resetInternal(); } @@ -197,7 +197,7 @@ class CuptiActivityProfiler { // as key, because that's what CUPTI records. int32_t tid = threadId(); int32_t pid = processId(); - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); recordThreadInfo(sysTid, tid, pid); } @@ -215,13 +215,18 @@ class CuptiActivityProfiler { } void addMetadata(const std::string& key, const std::string& value) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); metadata_[key] = value; } + void addVersionMetadata(const std::string& key, const std::string& value) { + std::lock_guard guard(mutex_); + versionMetadata_[key] = value; + } + void addChildActivityProfiler( std::unique_ptr profiler) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); profilers_.push_back(std::move(profiler)); } @@ -472,7 +477,7 @@ class CuptiActivityProfiler { // *************************************************************************** // Mutex to protect non-atomic access to below state - std::mutex mutex_; + std::recursive_mutex mutex_; // Runloop phase std::atomic currentRunloopState_{RunloopState::WaitForRequest}; @@ -528,6 +533,9 @@ class CuptiActivityProfiler { // Trace metadata std::unordered_map metadata_; + // Version metadata + std::unordered_map versionMetadata_; + // child activity profilers std::vector> profilers_;