Skip to content

Commit 5a48dc5

Browse files
authored
hip: event: correct event completion timestamping (#9365)
Before, event always timestamp before it launches its chain of commands, which causes some issues: * whenevery event synchronization is called, the event will timestamp even it has completed before. This patch also make the following change: * reorganizes the event sync() and submit() due to they almost the same except that submit() will not wait for dependencies to complete. Doing this change is because, the timestamp will be updated in these two functions * event query() function checks state only instead of checking each depedencies as event completion will be check in sync() and submit(). Signed-off-by: Wendy Liang <wendy.liang@amd.com>
1 parent f9033aa commit 5a48dc5

2 files changed

Lines changed: 52 additions & 74 deletions

File tree

src/runtime_src/hip/core/event.cpp

Lines changed: 51 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,40 @@ bool event::is_recorded()
7575
bool event::query()
7676
{
7777
//This function will return true if all commands in the appropriate stream which specified to hipEventRecord() have completed.
78-
std::lock_guard lock(m_recorded_cmds_lock);
79-
for (auto& rec_com : m_recorded_commands){
80-
state command_state = rec_com->get_state();
81-
if (command_state != state::completed){
82-
return false;
83-
}
84-
}
85-
return true;
78+
std::lock_guard lock(m_state_lock);
79+
return (get_state() == state::completed);
8680
}
8781

88-
// check if all dependencies are completed and update event state accordingly
82+
// check if all dependencies are completed, update event state, and launch chain of commands if all
83+
// dependencies are completed
8984
// @input: wait_for_dependencies - if true, wait for dependencies to be completed
90-
// @return: true if all dependencies are completed and event state is updated, false otherwise
91-
// Note: This function can throw exception if dependencies have error
92-
bool event::check_dependencies_update_state(bool wait_for_dependencies)
85+
// @return: true if all dependencies are completed, false otherwise
86+
// Note: This function can throw exception if dependencies have error, or launch chain of commands fails
87+
bool event::check_and_launch_chain(bool wait_for_dependencies)
9388
{
94-
bool dependencies_has_error = false;
95-
96-
// if event is recorded, wait for dependencies to be completed
89+
bool event_is_completed = false;
9790
{
91+
std::lock_guard lock(m_state_lock);
92+
auto event_state = get_state();
93+
if (!is_recorded_no_lock() || (event_state == state::running && !wait_for_dependencies)) {
94+
// if event is not recorded, or event is already running that is event is already checking
95+
// dependencies in another thread, and caller doesn't want to wait, return false as we haven't
96+
// launched the chain of commands yet. This is in the event submit() case.
97+
return false;
98+
}
99+
if (event_state == state::completed) {
100+
event_is_completed = true;
101+
}
102+
else {
103+
// if event is recorded, check all dependencies and mark the event state as running
104+
set_state(state::running);
105+
}
106+
}
107+
108+
bool dependencies_has_error = false;
109+
bool dependencies_completed = true;
110+
if (!event_is_completed) {
111+
// Check all dependencies
98112
std::lock_guard lock(m_recorded_cmds_lock);
99113
for (auto& rec_com : m_recorded_commands) {
100114
if (wait_for_dependencies)
@@ -107,46 +121,43 @@ bool event::check_dependencies_update_state(bool wait_for_dependencies)
107121
}
108122
else {
109123
// dependency is not completed, return false and no state update
110-
return false;
124+
dependencies_completed = false;
125+
break;
111126
}
112127
}
113128
}
114129

115-
// update event state after waiting for dependencies
116130
{
117131
std::lock_guard lock(m_state_lock);
118132
if (dependencies_has_error) {
133+
// dependencies have error, set event state to error
119134
set_state(state::error);
120135
throw_hip_error(hipErrorLaunchFailure, "event sync failed due to dependencies failure");
121136
}
137+
else if (!dependencies_completed) {
138+
// dependencies are not completed, return false and no state update
139+
set_state(state::recorded);
140+
return false;
141+
}
122142
set_state(state::completed);
143+
// update event completion time
144+
ctime = std::chrono::system_clock::now();
145+
}
146+
147+
//all commands depend on the event start running after the event is completed
148+
{
149+
std::lock_guard lock(m_chain_cmds_lock);
150+
for (auto it = m_chain_of_commands.begin(); it != m_chain_of_commands.end(); ) {
151+
(*it)->submit();
152+
it = m_chain_of_commands.erase(it);
153+
}
123154
}
124155
return true;
125156
}
126157

127158
bool event::synchronize()
128159
{
129-
{
130-
std::lock_guard lock(m_state_lock);
131-
132-
if (!is_recorded_no_lock())
133-
return false;
134-
// if event is recorded, check all dependencies and mark the event state as running
135-
set_state(state::running);
136-
}
137-
138-
139-
if (!check_dependencies_update_state(true)) {
140-
// if event is not completed, set state back to recorded and return
141-
std::lock_guard lock(m_state_lock);
142-
set_state(state::recorded);
143-
return false;
144-
}
145-
146-
ctime = std::chrono::system_clock::now();
147-
//all commands depend on the event start running after the event is completed
148-
launch_chain_of_commands();
149-
return true;
160+
return check_and_launch_chain(true);
150161
}
151162

152163
bool event::wait()
@@ -165,31 +176,8 @@ bool event::wait()
165176
// wait_event2 has two dependencies: wait_event1 and event2
166177
bool event::submit()
167178
{
168-
{
169-
std::lock_guard lock(m_state_lock);
170-
auto event_state = get_state();
171-
// event is already running, do not submit again to avoid deadlock.
172-
if (event_state == state::running)
173-
return true;
174-
175-
if (!is_recorded_no_lock())
176-
return false;
177-
// if event is recorded, check all dependencies and mark the event state as running
178-
set_state(state::running);
179-
}
180-
181-
182-
if (!check_dependencies_update_state(false)) {
183-
// if there are dependencies not complete, set state back to recorded and return
184-
std::lock_guard lock(m_state_lock);
185-
set_state(state::recorded);
186-
return false;
187-
}
188-
189-
ctime = std::chrono::system_clock::now();
190-
//all commands depend on the event start running after the event is completed
191-
launch_chain_of_commands();
192-
return true;
179+
// don't wait for dependencies to complete
180+
return check_and_launch_chain(false);
193181
}
194182

195183
bool event::is_recorded_stream(const stream* s) noexcept
@@ -210,15 +198,6 @@ void event::add_dependency(std::shared_ptr<command> cmd)
210198
m_recorded_commands.push_back(std::move(cmd));
211199
}
212200

213-
void event::launch_chain_of_commands()
214-
{
215-
std::lock_guard lock(m_chain_cmds_lock);
216-
for (auto it = m_chain_of_commands.begin(); it != m_chain_of_commands.end(); ) {
217-
(*it)->submit();
218-
it = m_chain_of_commands.erase(it);
219-
}
220-
}
221-
222201
kernel_start::kernel_start(std::shared_ptr<stream> s, std::shared_ptr<function> f, void** args)
223202
: command(type::kernel_start, std::move(s))
224203
, func{std::move(f)}

src/runtime_src/hip/core/event.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,7 @@ class event : public command
121121

122122
private:
123123
[[nodiscard]] bool is_recorded_no_lock() const;
124-
void launch_chain_of_commands();
125-
bool check_dependencies_update_state(bool wait_for_dependencies);
124+
bool check_and_launch_chain(bool wait_for_dependencies);
126125
};
127126

128127
class kernel_start : public command

0 commit comments

Comments
 (0)