Skip to content

Commit 1d06e29

Browse files
authored
[rocprofiler-sdk][rel-7.2] CherryPick: Fixing Code Object Data Race and Thread Safety & Adding validation te… (#3662)
* Fixing Code Object Data Race and Thread Safety & Adding validation test (#2014) * Update CMakeLists.txt * Comment out unused subdirectory additions Comment out hip-host and module-loading-test subdirectories. * Disable hip-host-tracing and code-object-multi-threaded Comment out hip-host-tracing and code-object-multi-threaded subdirectories.
1 parent da3d401 commit 1d06e29

11 files changed

Lines changed: 683 additions & 94 deletions

File tree

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/code_object.cpp

Lines changed: 87 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,14 @@ using code_object_unload_array_t = std::vector<hsa::code_object_unload>;
378378
std::vector<hsa::code_object_unload>
379379
shutdown(hsa_executable_t executable);
380380

381-
bool is_shutdown = false;
381+
std::atomic<bool> is_shutdown{false};
382+
383+
auto&
384+
get_destroy_mutex()
385+
{
386+
static auto _v = std::mutex{};
387+
return _v;
388+
}
382389

383390
auto*
384391
get_executables()
@@ -733,7 +740,8 @@ get_unloaded_code_objects(hsa_executable_t executable)
733740
{
734741
auto _unloaded = std::vector<hsa::code_object_unload>{};
735742

736-
if(!is_shutdown && get_loader_table().hsa_ven_amd_loader_executable_iterate_loaded_code_objects)
743+
if(!is_shutdown.load(std::memory_order_acquire) &&
744+
get_loader_table().hsa_ven_amd_loader_executable_iterate_loaded_code_objects)
737745
get_loader_table().hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
738746
executable, code_object_unload_callback, &_unloaded);
739747

@@ -837,7 +845,7 @@ executable_freeze_internal(hsa_executable_t executable)
837845

838846
if(!ctxs.empty())
839847
{
840-
code_obj_vec->rlock([](const code_object_array_t& data) {
848+
code_obj_vec->wlock([](code_object_array_t& data) {
841849
auto tidx = common::get_tid();
842850
// set the contexts for each code object
843851
for(const auto& ditr : data)
@@ -864,8 +872,10 @@ executable_freeze_internal(hsa_executable_t executable)
864872
// invoke callback
865873
auto& cb_data =
866874
citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
867-
auto& user_data = ditr->user_data[citr];
868-
cb_data.callback(record, &user_data, cb_data.data);
875+
ditr->user_data.wlock([&](auto& user_data_map) {
876+
auto& user_data = user_data_map[citr];
877+
cb_data.callback(record, &user_data, cb_data.data);
878+
});
869879
}
870880
}
871881

@@ -889,52 +899,57 @@ executable_freeze_internal(hsa_executable_t executable)
889899
// invoke callback
890900
auto& cb_data =
891901
citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
892-
auto& user_data = sitr->user_data[citr];
893-
cb_data.callback(record, &user_data, cb_data.data);
894-
895-
std::string device_name =
896-
CHECK_NOTNULL(get_hip_register_data())
897-
->rlock([sym_data](
902+
sitr->user_data.wlock([&](auto& user_data_map) {
903+
auto& user_data = user_data_map[citr];
904+
cb_data.callback(record, &user_data, cb_data.data);
905+
906+
std::string device_name =
907+
CHECK_NOTNULL(get_hip_register_data())
908+
->rlock(
909+
[sym_data](
898910
const hip::hip_register_data& register_data) {
899-
const auto& sym_map =
900-
register_data.kernel_symbol_device_map;
901-
const auto it = sym_map.find(*CHECK_NOTNULL(
902-
common::get_string_entry(sym_data.kernel_name)));
903-
if(it != sym_map.end()) return it->second;
904-
return std::string();
905-
});
906-
// Does not have a host function, skip
907-
if(device_name.empty()) continue;
908-
auto host_data =
909-
CHECK_NOTNULL(get_hip_register_data())
910-
->rlock([device_name](
911-
const hip::hip_register_data& register_data) {
912-
// Add check for out of range here
913-
const auto it =
914-
register_data.host_function_map.find(device_name);
915-
if(it == register_data.host_function_map.end())
916-
{
917-
return rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t{};
918-
}
919-
return it->second;
920-
});
921-
// when kernel_symbol_device_map kernels are not present in
922-
// host_function_map, skip.
923-
if(host_data.device_function == nullptr) continue;
924-
host_data.code_object_id = sym_data.code_object_id;
925-
host_data.kernel_id = sym_data.kernel_id;
926-
host_data.host_function_id = ++get_host_function_id();
927-
auto hip_record = rocprofiler_callback_tracing_record_t{
928-
.context_id = rocprofiler_context_id_t{citr->context_idx},
929-
.thread_id = tidx,
930-
.correlation_id = rocprofiler_correlation_id_t{},
931-
.kind = CODE_OBJECT_KIND,
932-
.operation = CODE_OBJECT_HOST_SYMBOL,
933-
.phase = ROCPROFILER_CALLBACK_PHASE_LOAD,
934-
.payload = static_cast<void*>(&host_data)};
935-
936-
// invoke callback
937-
cb_data.callback(hip_record, &user_data, cb_data.data);
911+
const auto& sym_map =
912+
register_data.kernel_symbol_device_map;
913+
const auto it = sym_map.find(
914+
*CHECK_NOTNULL(common::get_string_entry(
915+
sym_data.kernel_name)));
916+
if(it != sym_map.end()) return it->second;
917+
return std::string();
918+
});
919+
// Does not have a host function, skip
920+
if(device_name.empty()) return;
921+
auto host_data =
922+
CHECK_NOTNULL(get_hip_register_data())
923+
->rlock([device_name](const hip::hip_register_data&
924+
register_data) {
925+
// Add check for out of range here
926+
const auto it =
927+
register_data.host_function_map.find(
928+
device_name);
929+
if(it == register_data.host_function_map.end())
930+
{
931+
return rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t{};
932+
}
933+
return it->second;
934+
});
935+
// when kernel_symbol_device_map kernels are not present in
936+
// host_function_map, skip.
937+
if(host_data.device_function == nullptr) return;
938+
host_data.code_object_id = sym_data.code_object_id;
939+
host_data.kernel_id = sym_data.kernel_id;
940+
host_data.host_function_id = ++get_host_function_id();
941+
auto hip_record = rocprofiler_callback_tracing_record_t{
942+
.context_id = rocprofiler_context_id_t{citr->context_idx},
943+
.thread_id = tidx,
944+
.correlation_id = rocprofiler_correlation_id_t{},
945+
.kind = CODE_OBJECT_KIND,
946+
.operation = CODE_OBJECT_HOST_SYMBOL,
947+
.phase = ROCPROFILER_CALLBACK_PHASE_LOAD,
948+
.payload = static_cast<void*>(&host_data)};
949+
950+
// invoke callback
951+
cb_data.callback(hip_record, &user_data, cb_data.data);
952+
});
938953
}
939954
}
940955
}
@@ -964,7 +979,13 @@ executable_freeze(hsa_executable_t executable, const char* options)
964979
hsa_status_t
965980
executable_destroy(hsa_executable_t executable)
966981
{
967-
if(is_shutdown) return HSA_STATUS_SUCCESS;
982+
// Serialize all executable_destroy calls to prevent:
983+
// 1. Concurrent access to code objects in shutdown()
984+
// 2. Use-after-free when multiple threads destroy same executable
985+
// 3. Race on end_notified flags (now atomic, but still need serialization for callbacks)
986+
auto _lk = std::unique_lock{get_destroy_mutex()};
987+
988+
if(is_shutdown.load(std::memory_order_acquire)) return HSA_STATUS_SUCCESS;
968989

969990
auto _unloaded = shutdown(executable);
970991

@@ -1098,9 +1119,11 @@ shutdown(hsa_executable_t executable)
10981119
.payload = static_cast<void*>(&itr.object->rocp_data)};
10991120

11001121
// invoke callback
1101-
auto& cb_data = citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
1102-
auto& user_data = itr.object->user_data.at(citr);
1103-
cb_data.callback(record, &user_data, cb_data.data);
1122+
auto& cb_data = citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
1123+
itr.object->user_data.wlock([&](auto& user_data_map) {
1124+
auto& user_data = user_data_map.at(citr);
1125+
cb_data.callback(record, &user_data, cb_data.data);
1126+
});
11041127
}
11051128
}
11061129

@@ -1123,9 +1146,11 @@ shutdown(hsa_executable_t executable)
11231146
.payload = static_cast<void*>(&sitr->rocp_data)};
11241147

11251148
// invoke callback
1126-
auto& cb_data = citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
1127-
auto& user_data = sitr->user_data.at(citr);
1128-
cb_data.callback(record, &user_data, cb_data.data);
1149+
auto& cb_data = citr->callback_tracer->callback_data.at(CODE_OBJECT_KIND);
1150+
sitr->user_data.wlock([&](auto& user_data_map) {
1151+
auto& user_data = user_data_map.at(citr);
1152+
cb_data.callback(record, &user_data, cb_data.data);
1153+
});
11291154
}
11301155
}
11311156
}
@@ -1226,7 +1251,8 @@ get_kernel_id(uint64_t kernel_object)
12261251
void
12271252
finalize()
12281253
{
1229-
if(is_shutdown || !get_executables() || !get_code_objects()) return;
1254+
if(is_shutdown.load(std::memory_order_acquire) || !get_executables() || !get_code_objects())
1255+
return;
12301256

12311257
CHECK_NOTNULL(get_executables())->rlock([](const executable_array_t& edata) {
12321258
auto tmp = edata;
@@ -1237,13 +1263,14 @@ finalize()
12371263

12381264
CHECK_NOTNULL(get_code_objects())->wlock([](code_object_array_t& data) { data.clear(); });
12391265

1240-
is_shutdown = true;
1266+
is_shutdown.store(true, std::memory_order_release);
12411267
}
12421268

12431269
void
12441270
iterate_loaded_code_objects(code_object_iterator_t&& func)
12451271
{
1246-
if(is_shutdown || !get_executables() || !get_code_objects()) return;
1272+
if(is_shutdown.load(std::memory_order_acquire) || !get_executables() || !get_code_objects())
1273+
return;
12471274
CHECK_NOTNULL(get_code_objects())
12481275
->rlock(
12491276
[](const code_object_array_t& data, code_object_iterator_t&& func_v) {

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hsa/code_object.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,18 @@ code_object::operator=(code_object&& rhs) noexcept
4949
{
5050
if(this != &rhs)
5151
{
52-
beg_notified = rhs.beg_notified;
53-
end_notified = rhs.end_notified;
52+
beg_notified.store(rhs.beg_notified.load());
53+
end_notified.store(rhs.end_notified.load());
5454
uri = rhs.uri;
5555
hsa_executable = rhs.hsa_executable;
5656
hsa_code_object = rhs.hsa_code_object;
5757
rocp_data = rhs.rocp_data;
58-
user_data = std::move(rhs.user_data);
59-
rocp_data.uri = (uri) ? uri->c_str() : nullptr;
60-
symbols = std::move(rhs.symbols);
58+
// Manually move user_data by extracting and inserting under locks
59+
rhs.user_data.wlock([this](auto& rhs_map) {
60+
this->user_data.wlock([&rhs_map](auto& lhs_map) { lhs_map = std::move(rhs_map); });
61+
});
62+
rocp_data.uri = (uri) ? uri->c_str() : nullptr;
63+
symbols = std::move(rhs.symbols);
6164
}
6265

6366
return *this;

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hsa/code_object.hpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@
2222

2323
#pragma once
2424

25+
#include "lib/common/synchronized.hpp"
2526
#include "lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.hpp"
2627
#include "lib/rocprofiler-sdk/context/context.hpp"
2728

2829
#include <rocprofiler-sdk/fwd.h>
2930
#include <rocprofiler-sdk/hsa.h>
3031

32+
#include <atomic>
3133
#include <cstdint>
3234
#include <string>
3335
#include <unordered_map>
@@ -39,11 +41,11 @@ namespace code_object
3941
{
4042
namespace hsa
4143
{
42-
using context_t = context::context;
43-
using user_data_t = rocprofiler_user_data_t;
44-
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
45-
using context_array_t = context::context_array_t;
46-
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
44+
using context_t = context::context;
45+
using user_data_t = rocprofiler_user_data_t;
46+
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
47+
using synchronized_user_data_t = common::Synchronized<context_user_data_map_t>;
48+
using context_array_t = context::context_array_t;
4749

4850
struct code_object
4951
{
@@ -59,15 +61,15 @@ struct code_object
5961
code_object& operator=(const code_object&) = delete;
6062
code_object& operator =(code_object&&) noexcept;
6163

62-
bool beg_notified = false;
63-
bool end_notified = false;
64+
std::atomic<bool> beg_notified = false;
65+
std::atomic<bool> end_notified = false;
6466
const std::string* uri = nullptr;
6567
hsa_executable_t hsa_executable = {};
6668
hsa_loaded_code_object_t hsa_code_object = {};
6769
code_object_data_t rocp_data = common::init_public_api_struct(code_object_data_t{});
6870
symbol_array_t symbols = {};
6971
context_array_t contexts = {};
70-
context_user_data_map_t user_data = {};
72+
synchronized_user_data_t user_data = {};
7173
};
7274

7375
struct code_object_unload

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,17 @@ kernel_symbol::operator=(kernel_symbol&& rhs) noexcept
4949
{
5050
if(this != &rhs)
5151
{
52-
beg_notified = rhs.beg_notified;
53-
end_notified = rhs.end_notified;
54-
name = rhs.name;
55-
hsa_executable = rhs.hsa_executable;
56-
hsa_agent = rhs.hsa_agent;
57-
hsa_symbol = rhs.hsa_symbol;
58-
rocp_data = rhs.rocp_data;
59-
user_data = std::move(rhs.user_data);
52+
beg_notified.store(rhs.beg_notified.load());
53+
end_notified.store(rhs.end_notified.load());
54+
name = rhs.name;
55+
hsa_executable = rhs.hsa_executable;
56+
hsa_agent = rhs.hsa_agent;
57+
hsa_symbol = rhs.hsa_symbol;
58+
rocp_data = rhs.rocp_data;
59+
// Manually move user_data by extracting and inserting under locks
60+
rhs.user_data.wlock([this](auto& rhs_map) {
61+
this->user_data.wlock([&rhs_map](auto& lhs_map) { lhs_map = std::move(rhs_map); });
62+
});
6063
rocp_data.kernel_name = (name) ? name->c_str() : nullptr;
6164
}
6265

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hsa/kernel_symbol.hpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222

2323
#pragma once
2424

25+
#include "lib/common/synchronized.hpp"
2526
#include "lib/rocprofiler-sdk/context/context.hpp"
2627

2728
#include <rocprofiler-sdk/fwd.h>
2829
#include <rocprofiler-sdk/hsa.h>
2930

31+
#include <atomic>
3032
#include <cstdint>
3133
#include <string>
3234
#include <unordered_map>
@@ -38,11 +40,11 @@ namespace code_object
3840
{
3941
namespace hsa
4042
{
41-
using context_t = context::context;
42-
using user_data_t = rocprofiler_user_data_t;
43-
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
44-
using context_array_t = context::context_array_t;
45-
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
43+
using context_t = context::context;
44+
using user_data_t = rocprofiler_user_data_t;
45+
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
46+
using synchronized_user_data_t = common::Synchronized<context_user_data_map_t>;
47+
using context_array_t = context::context_array_t;
4648

4749
struct kernel_symbol
4850
{
@@ -58,14 +60,14 @@ struct kernel_symbol
5860
kernel_symbol& operator=(const kernel_symbol&) = delete;
5961
kernel_symbol& operator =(kernel_symbol&&) noexcept;
6062

61-
bool beg_notified = false;
62-
bool end_notified = false;
63-
const std::string* name = nullptr;
64-
hsa_executable_t hsa_executable = {};
65-
hsa_agent_t hsa_agent = {};
66-
hsa_executable_symbol_t hsa_symbol = {};
67-
kernel_symbol_data_t rocp_data = common::init_public_api_struct(kernel_symbol_data_t{});
68-
context_user_data_map_t user_data = {};
63+
std::atomic<bool> beg_notified = false;
64+
std::atomic<bool> end_notified = false;
65+
const std::string* name = nullptr;
66+
hsa_executable_t hsa_executable = {};
67+
hsa_agent_t hsa_agent = {};
68+
hsa_executable_symbol_t hsa_symbol = {};
69+
kernel_symbol_data_t rocp_data = common::init_public_api_struct(kernel_symbol_data_t{});
70+
synchronized_user_data_t user_data = {};
6971
};
7072

7173
bool

projects/rocprofiler-sdk/tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ add_subdirectory(counter-collection)
8282
add_subdirectory(openmp-tools)
8383
add_subdirectory(rocdecode)
8484
add_subdirectory(rocjpeg)
85+
# add_subdirectory(hip-host-tracing)
86+
# add_subdirectory(code-object-multi-threaded)
8587

8688
# rocpd validation tests
8789
add_subdirectory(rocpd)

projects/rocprofiler-sdk/tests/bin/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,5 @@ add_subdirectory(hsa-code-object)
4141
add_subdirectory(hip-streams)
4242
add_subdirectory(hip-streams-per-thread)
4343
add_subdirectory(attachment-test)
44+
# add_subdirectory(hip-host)
45+
# add_subdirectory(module-loading-test)

0 commit comments

Comments
 (0)