@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {
422
422
423
423
// ================================================================================================
424
424
hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal (
425
- hsa_signal_value_t init_val, Timestamp* ts) {
425
+ hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
426
+
427
+ amd::Command* cmd = gpu_.command ();
428
+ // If no signal is needed, decrement the refcount and clear the hw_event of current command
429
+ if (!attach_signal) {
430
+ if (nullptr != cmd) {
431
+ if (cmd->HwEvent () != nullptr ) {
432
+ reinterpret_cast <ProfilingSignal*>(cmd->HwEvent ())->release ();
433
+ }
434
+ cmd->SetHwEvent (nullptr );
435
+ }
436
+ return hsa_signal_t {0 };
437
+ }
438
+
426
439
bool new_signal = false ;
427
440
428
441
// Peep signal +2 ahead to see if its done
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
503
516
prof_signal->engine_ = engine_;
504
517
prof_signal->flags_ .isPacketDispatch_ = false ;
505
518
506
- // Store the HW event
507
- amd::Command* cmd = gpu_.command ();
519
+
508
520
if (nullptr != cmd) {
509
521
// Release any existing HwEvent before setting new one for the same command
510
522
if (cmd->HwEvent () != nullptr ) {
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(
1026
1038
1027
1039
fence_state_ = static_cast <Device::CacheState>(expected_fence_state);
1028
1040
1029
- if (timestamp_ != nullptr || attach_signal) {
1030
- // Get active signal for current dispatch if profiling is necessary
1031
- packet->completion_signal = Barriers ().ActiveSignal (kInitSignalValueOne , timestamp_);
1032
-
1033
- if (std::is_same<decltype (packet), hsa_kernel_dispatch_packet_t *>::value) {
1034
- // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1035
- // retrieve this correlation ID to attribute waves to specific dispatch locations.
1036
- if (amd::activity_prof::IsEnabled (OP_ID_DISPATCH)) {
1037
- auto dispatchPacket = reinterpret_cast <hsa_kernel_dispatch_packet_t *>(packet);
1038
- dispatchPacket->reserved2 = timestamp_->command ().profilingInfo ().correlation_id_ ;
1039
- }
1040
-
1041
- ProfilingSignal* current_signal = Barriers ().GetLastSignal ();
1042
- current_signal->flags_ .isPacketDispatch_ = true ;
1041
+ bool attachSignal = timestamp_ != nullptr || attach_signal;
1042
+ // Get active signal for current dispatch if profiling is necessary
1043
+ packet->completion_signal = Barriers ().ActiveSignal (kInitSignalValueOne ,
1044
+ timestamp_, attachSignal);
1043
1045
1046
+ if (std::is_same<decltype (packet), hsa_kernel_dispatch_packet_t *>::value
1047
+ && timestamp_ != nullptr ) {
1048
+ // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1049
+ // retrieve this correlation ID to attribute waves to specific dispatch locations.
1050
+ if (amd::activity_prof::IsEnabled (OP_ID_DISPATCH) ) {
1051
+ auto dispatchPacket = reinterpret_cast <hsa_kernel_dispatch_packet_t *>(packet);
1052
+ dispatchPacket->reserved2 = timestamp_->command ().profilingInfo ().correlation_id_ ;
1044
1053
}
1054
+
1055
+ ProfilingSignal* current_signal = Barriers ().GetLastSignal ();
1056
+ current_signal->flags_ .isPacketDispatch_ = true ;
1045
1057
}
1046
1058
1059
+
1047
1060
// Make sure the slot is free for usage
1048
1061
while ((index - hsa_queue_load_read_index_scacquire (gpu_queue_)) >= sw_queue_size) {
1049
1062
amd::Os::yield ();
0 commit comments