Skip to content

Commit 072fb08

Browse files
authored
SWDEV-521647 - Fix tracking of hw_event (#206)
- When a command may possibly have two packets(like device heap initializer), and if there is no signal on the main kernel packet the tracking was broken as it marked HW event of the command as the first packet signal. - Make sure if no completion signal is attached to the second packet then clear the HW event for the command.
1 parent ce24936 commit 072fb08

File tree

5 files changed

+39
-24
lines changed

5 files changed

+39
-24
lines changed

rocclr/device/rocm/rocdevice.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -2863,7 +2863,11 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
28632863
bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
28642864
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
28652865
}
2866-
return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) == 0);
2866+
2867+
auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
2868+
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Check HW event = 0x%lx", signal.handle);
2869+
2870+
return (hsa_signal_load_relaxed(signal) == 0);
28672871
}
28682872

28692873
// ================================================================================================

rocclr/device/rocm/rocvirtual.cpp

+30-17
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {
422422

423423
// ================================================================================================
424424
hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
425-
hsa_signal_value_t init_val, Timestamp* ts) {
425+
hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
426+
427+
amd::Command* cmd = gpu_.command();
428+
// If no signal is needed, decrement the refcount and clear the hw_event of current command
429+
if (!attach_signal) {
430+
if (nullptr != cmd) {
431+
if (cmd->HwEvent() != nullptr) {
432+
reinterpret_cast<ProfilingSignal*>(cmd->HwEvent())->release();
433+
}
434+
cmd->SetHwEvent(nullptr);
435+
}
436+
return hsa_signal_t {0};
437+
}
438+
426439
bool new_signal = false;
427440

428441
// Peep signal +2 ahead to see if its done
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
503516
prof_signal->engine_ = engine_;
504517
prof_signal->flags_.isPacketDispatch_ = false;
505518

506-
// Store the HW event
507-
amd::Command* cmd = gpu_.command();
519+
508520
if (nullptr != cmd) {
509521
// Release any existing HwEvent before setting new one for the same command
510522
if (cmd->HwEvent() != nullptr) {
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(
10261038

10271039
fence_state_ = static_cast<Device::CacheState>(expected_fence_state);
10281040

1029-
if (timestamp_ != nullptr || attach_signal) {
1030-
// Get active signal for current dispatch if profiling is necessary
1031-
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne, timestamp_);
1032-
1033-
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value) {
1034-
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1035-
// retrieve this correlation ID to attribute waves to specific dispatch locations.
1036-
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) {
1037-
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
1038-
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
1039-
}
1040-
1041-
ProfilingSignal* current_signal = Barriers().GetLastSignal();
1042-
current_signal->flags_.isPacketDispatch_ = true;
1041+
bool attachSignal = timestamp_ != nullptr || attach_signal;
1042+
// Get active signal for current dispatch if profiling is necessary
1043+
packet->completion_signal = Barriers().ActiveSignal(kInitSignalValueOne,
1044+
timestamp_, attachSignal);
10431045

1046+
if (std::is_same<decltype(packet), hsa_kernel_dispatch_packet_t*>::value
1047+
&& timestamp_ != nullptr) {
1048+
// If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1049+
// retrieve this correlation ID to attribute waves to specific dispatch locations.
1050+
if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH) ) {
1051+
auto dispatchPacket = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
1052+
dispatchPacket->reserved2 = timestamp_->command().profilingInfo().correlation_id_;
10441053
}
1054+
1055+
ProfilingSignal* current_signal = Barriers().GetLastSignal();
1056+
current_signal->flags_.isPacketDispatch_ = true;
10451057
}
10461058

1059+
10471060
// Make sure the slot is free for usage
10481061
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) {
10491062
amd::Os::yield();

rocclr/device/rocm/rocvirtual.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ class VirtualGPU : public device::VirtualDevice {
254254

255255
//! Finds a free signal for the upcomming operation
256256
hsa_signal_t ActiveSignal(hsa_signal_value_t init_val = kInitSignalValueOne,
257-
Timestamp* ts = nullptr);
257+
Timestamp* ts = nullptr, bool attach_signal = true);
258258

259259
//! Wait for the curent active signal. Can idle the queue
260260
bool WaitCurrent() {

rocclr/platform/command.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
277277
notified_.clear();
278278
return false;
279279
}
280-
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
281280
command->enqueue();
282281
// Save notification, associated with the current event
283282
notify_event_ = command;
@@ -290,7 +289,6 @@ bool Event::notifyCmdQueue(bool cpu_wait) {
290289
notified_.clear();
291290
return false;
292291
}
293-
ClPrint(LOG_DEBUG, LOG_CMD, "Queue marker to command queue: %p", queue);
294292
command->enqueue();
295293
command->release();
296294
}
@@ -356,8 +354,8 @@ void Command::enqueue() {
356354
Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
357355
}
358356

359-
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p",
360-
amd::activity_prof::getOclCommandKindString(this->type()), this);
357+
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
358+
amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);
361359

362360
// Direct dispatch logic below will submit the command immediately, but the command status
363361
// update will occur later after flush() with a wait

rocclr/platform/command.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ class Event : public RuntimeObject {
213213
//! Returns the callback for this event
214214
const CallBackEntry* Callback() const { return callbacks_; }
215215

216-
// Saves HW event, associated with the current command
216+
//! Saves HW event, associated with the current command
217217
void SetHwEvent(void* hw_event) { hw_event_ = hw_event; }
218218

219219
//! Returns HW event, associated with the current command

0 commit comments

Comments
 (0)