@@ -193,7 +193,7 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo
193
193
194
194
void CuptiActivityProfiler::transferCpuTrace (
195
195
std::unique_ptr<libkineto::CpuTraceBuffer> cpuTrace) {
196
- std::lock_guard<std::mutex > guard (mutex_);
196
+ std::lock_guard<std::recursive_mutex > guard (mutex_);
197
197
const string& trace_name = cpuTrace->span .name ;
198
198
if (currentRunloopState_ != RunloopState::CollectTrace &&
199
199
currentRunloopState_ != RunloopState::ProcessTrace) {
@@ -248,6 +248,12 @@ void CuptiActivityProfiler::logGpuVersions() {
248
248
" cuda_runtime_version" , std::to_string (cudaRuntimeVersion));
249
249
LOGGER_OBSERVER_ADD_METADATA (
250
250
" cuda_driver_version" , std::to_string (cudaDriverVersion));
251
+ addVersionMetadata (
252
+ " cupti_version" , std::to_string (cuptiVersion));
253
+ addVersionMetadata (
254
+ " cuda_runtime_version" , std::to_string (cudaRuntimeVersion));
255
+ addVersionMetadata (
256
+ " cuda_driver_version" , std::to_string (cudaDriverVersion));
251
257
252
258
#elif defined(HAS_ROCTRACER)
253
259
uint32_t majorVersion = roctracer_version_major ();
@@ -267,13 +273,23 @@ void CuptiActivityProfiler::logGpuVersions() {
267
273
" hip_runtime_version" , std::to_string (hipRuntimeVersion));
268
274
LOGGER_OBSERVER_ADD_METADATA (
269
275
" hip_driver_version" , std::to_string (hipDriverVersion));
276
+ addVersionMetadata (
277
+ " roctracer_version" , roctracerVersion);
278
+ addVersionMetadata (
279
+ " hip_runtime_version" , std::to_string (hipRuntimeVersion));
280
+ addVersionMetadata (
281
+ " hip_driver_version" , std::to_string (hipDriverVersion));
282
+
270
283
#endif
271
284
}
272
285
273
286
void CuptiActivityProfiler::processTraceInternal (ActivityLogger& logger) {
274
287
LOG (INFO) << " Processing " << traceBuffers_->cpu .size () << " CPU buffers" ;
275
288
VLOG (0 ) << " Profile time range: " << captureWindowStartTime_ << " - "
276
289
<< captureWindowEndTime_;
290
+ for (auto & pair : versionMetadata_) {
291
+ addMetadata (pair.first , pair.second );
292
+ }
277
293
logger.handleTraceStart (metadata_);
278
294
setCpuActivityPresent (false );
279
295
setGpuActivityPresent (false );
@@ -948,7 +964,7 @@ void CuptiActivityProfiler::configureChildProfilers() {
948
964
void CuptiActivityProfiler::configure (
949
965
const Config& config,
950
966
const time_point<system_clock>& now) {
951
- std::lock_guard<std::mutex > guard (mutex_);
967
+ std::lock_guard<std::recursive_mutex > guard (mutex_);
952
968
if (isActive ()) {
953
969
LOG (WARNING) << " CuptiActivityProfiler already busy, terminating" ;
954
970
return ;
@@ -1171,7 +1187,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
1171
1187
1172
1188
if (cupti_.stopCollection ) {
1173
1189
// Go to process trace to clear any outstanding buffers etc
1174
- std::lock_guard<std::mutex > guard (mutex_);
1190
+ std::lock_guard<std::recursive_mutex > guard (mutex_);
1175
1191
stopTraceInternal (now);
1176
1192
resetInternal ();
1177
1193
LOG (ERROR) << " State: Warmup stopped by CUPTI. (Buffer size configured is " << config_->activitiesMaxGpuBufferSize () / 1024 / 1024 << " MB)" ;
@@ -1230,7 +1246,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
1230
1246
}
1231
1247
#endif // HAS_CUPTI || HAS_ROCTRACER
1232
1248
1233
- std::lock_guard<std::mutex > guard (mutex_);
1249
+ std::lock_guard<std::recursive_mutex > guard (mutex_);
1234
1250
stopTraceInternal (now);
1235
1251
VLOG_IF (0 , collection_done) << " Reached profile end time" ;
1236
1252
UST_LOGGER_MARK_COMPLETED (kCollectionStage );
@@ -1254,7 +1270,7 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
1254
1270
}
1255
1271
// FIXME: Probably want to allow interruption here
1256
1272
// for quickly handling trace request via synchronous API
1257
- std::lock_guard<std::mutex > guard (mutex_);
1273
+ std::lock_guard<std::recursive_mutex > guard (mutex_);
1258
1274
processTraceInternal (*logger_);
1259
1275
UST_LOGGER_MARK_COMPLETED (kPostProcessingStage );
1260
1276
resetInternal ();
0 commit comments