@@ -378,7 +378,14 @@ using code_object_unload_array_t = std::vector<hsa::code_object_unload>;
378378std::vector<hsa::code_object_unload>
379379shutdown (hsa_executable_t executable);
380380
381- bool is_shutdown = false ;
381+ std::atomic<bool > is_shutdown{false };
382+
383+ auto &
384+ get_destroy_mutex ()
385+ {
386+ static auto _v = std::mutex{};
387+ return _v;
388+ }
382389
383390auto *
384391get_executables ()
@@ -733,7 +740,8 @@ get_unloaded_code_objects(hsa_executable_t executable)
733740{
734741 auto _unloaded = std::vector<hsa::code_object_unload>{};
735742
736- if (!is_shutdown && get_loader_table ().hsa_ven_amd_loader_executable_iterate_loaded_code_objects )
743+ if (!is_shutdown.load (std::memory_order_acquire) &&
744+ get_loader_table ().hsa_ven_amd_loader_executable_iterate_loaded_code_objects )
737745 get_loader_table ().hsa_ven_amd_loader_executable_iterate_loaded_code_objects (
738746 executable, code_object_unload_callback, &_unloaded);
739747
@@ -837,7 +845,7 @@ executable_freeze_internal(hsa_executable_t executable)
837845
838846 if (!ctxs.empty ())
839847 {
840- code_obj_vec->rlock ([](const code_object_array_t & data) {
848+ code_obj_vec->wlock ([](code_object_array_t & data) {
841849 auto tidx = common::get_tid ();
842850 // set the contexts for each code object
843851 for (const auto & ditr : data)
@@ -864,8 +872,10 @@ executable_freeze_internal(hsa_executable_t executable)
864872 // invoke callback
865873 auto & cb_data =
866874 citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
867- auto & user_data = ditr->user_data [citr];
868- cb_data.callback (record, &user_data, cb_data.data );
875+ ditr->user_data .wlock ([&](auto & user_data_map) {
876+ auto & user_data = user_data_map[citr];
877+ cb_data.callback (record, &user_data, cb_data.data );
878+ });
869879 }
870880 }
871881
@@ -889,52 +899,57 @@ executable_freeze_internal(hsa_executable_t executable)
889899 // invoke callback
890900 auto & cb_data =
891901 citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
892- auto & user_data = sitr->user_data [citr];
893- cb_data.callback (record, &user_data, cb_data.data );
894-
895- std::string device_name =
896- CHECK_NOTNULL (get_hip_register_data ())
897- ->rlock ([sym_data](
902+ sitr->user_data .wlock ([&](auto & user_data_map) {
903+ auto & user_data = user_data_map[citr];
904+ cb_data.callback (record, &user_data, cb_data.data );
905+
906+ std::string device_name =
907+ CHECK_NOTNULL (get_hip_register_data ())
908+ ->rlock (
909+ [sym_data](
898910 const hip::hip_register_data& register_data) {
899- const auto & sym_map =
900- register_data.kernel_symbol_device_map ;
901- const auto it = sym_map.find (*CHECK_NOTNULL (
902- common::get_string_entry (sym_data.kernel_name )));
903- if (it != sym_map.end ()) return it->second ;
904- return std::string ();
905- });
906- // Does not have a host function, skip
907- if (device_name.empty ()) continue ;
908- auto host_data =
909- CHECK_NOTNULL (get_hip_register_data ())
910- ->rlock ([device_name](
911- const hip::hip_register_data& register_data) {
912- // Add check for out of range here
913- const auto it =
914- register_data.host_function_map .find (device_name);
915- if (it == register_data.host_function_map .end ())
916- {
917- return rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t {};
918- }
919- return it->second ;
920- });
921- // when kernel_symbol_device_map kernels are not present in
922- // host_function_map, skip.
923- if (host_data.device_function == nullptr ) continue ;
924- host_data.code_object_id = sym_data.code_object_id ;
925- host_data.kernel_id = sym_data.kernel_id ;
926- host_data.host_function_id = ++get_host_function_id ();
927- auto hip_record = rocprofiler_callback_tracing_record_t {
928- .context_id = rocprofiler_context_id_t {citr->context_idx },
929- .thread_id = tidx,
930- .correlation_id = rocprofiler_correlation_id_t {},
931- .kind = CODE_OBJECT_KIND ,
932- .operation = CODE_OBJECT_HOST_SYMBOL ,
933- .phase = ROCPROFILER_CALLBACK_PHASE_LOAD ,
934- .payload = static_cast <void *>(&host_data)};
935-
936- // invoke callback
937- cb_data.callback (hip_record, &user_data, cb_data.data );
911+ const auto & sym_map =
912+ register_data.kernel_symbol_device_map ;
913+ const auto it = sym_map.find (
914+ *CHECK_NOTNULL (common::get_string_entry (
915+ sym_data.kernel_name )));
916+ if (it != sym_map.end ()) return it->second ;
917+ return std::string ();
918+ });
919+ // Does not have a host function, skip
920+ if (device_name.empty ()) return ;
921+ auto host_data =
922+ CHECK_NOTNULL (get_hip_register_data ())
923+ ->rlock ([device_name](const hip::hip_register_data&
924+ register_data) {
925+ // Add check for out of range here
926+ const auto it =
927+ register_data.host_function_map .find (
928+ device_name);
929+ if (it == register_data.host_function_map .end ())
930+ {
931+ return rocprofiler_callback_tracing_code_object_host_kernel_symbol_register_data_t {};
932+ }
933+ return it->second ;
934+ });
935+ // when kernel_symbol_device_map kernels are not present in
936+ // host_function_map, skip.
937+ if (host_data.device_function == nullptr ) return ;
938+ host_data.code_object_id = sym_data.code_object_id ;
939+ host_data.kernel_id = sym_data.kernel_id ;
940+ host_data.host_function_id = ++get_host_function_id ();
941+ auto hip_record = rocprofiler_callback_tracing_record_t {
942+ .context_id = rocprofiler_context_id_t {citr->context_idx },
943+ .thread_id = tidx,
944+ .correlation_id = rocprofiler_correlation_id_t {},
945+ .kind = CODE_OBJECT_KIND ,
946+ .operation = CODE_OBJECT_HOST_SYMBOL ,
947+ .phase = ROCPROFILER_CALLBACK_PHASE_LOAD ,
948+ .payload = static_cast <void *>(&host_data)};
949+
950+ // invoke callback
951+ cb_data.callback (hip_record, &user_data, cb_data.data );
952+ });
938953 }
939954 }
940955 }
@@ -964,7 +979,13 @@ executable_freeze(hsa_executable_t executable, const char* options)
964979hsa_status_t
965980executable_destroy (hsa_executable_t executable)
966981{
967- if (is_shutdown) return HSA_STATUS_SUCCESS ;
982+ // Serialize all executable_destroy calls to prevent:
983+ // 1. Concurrent access to code objects in shutdown()
984+ // 2. Use-after-free when multiple threads destroy same executable
985+ // 3. Race on end_notified flags (now atomic, but still need serialization for callbacks)
986+ auto _lk = std::unique_lock{get_destroy_mutex ()};
987+
988+ if (is_shutdown.load (std::memory_order_acquire)) return HSA_STATUS_SUCCESS ;
968989
969990 auto _unloaded = shutdown (executable);
970991
@@ -1098,9 +1119,11 @@ shutdown(hsa_executable_t executable)
10981119 .payload = static_cast <void *>(&itr.object ->rocp_data )};
10991120
11001121 // invoke callback
1101- auto & cb_data = citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
1102- auto & user_data = itr.object ->user_data .at (citr);
1103- cb_data.callback (record, &user_data, cb_data.data );
1122+ auto & cb_data = citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
1123+ itr.object ->user_data .wlock ([&](auto & user_data_map) {
1124+ auto & user_data = user_data_map.at (citr);
1125+ cb_data.callback (record, &user_data, cb_data.data );
1126+ });
11041127 }
11051128 }
11061129
@@ -1123,9 +1146,11 @@ shutdown(hsa_executable_t executable)
11231146 .payload = static_cast <void *>(&sitr->rocp_data )};
11241147
11251148 // invoke callback
1126- auto & cb_data = citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
1127- auto & user_data = sitr->user_data .at (citr);
1128- cb_data.callback (record, &user_data, cb_data.data );
1149+ auto & cb_data = citr->callback_tracer ->callback_data .at (CODE_OBJECT_KIND );
1150+ sitr->user_data .wlock ([&](auto & user_data_map) {
1151+ auto & user_data = user_data_map.at (citr);
1152+ cb_data.callback (record, &user_data, cb_data.data );
1153+ });
11291154 }
11301155 }
11311156 }
@@ -1226,7 +1251,8 @@ get_kernel_id(uint64_t kernel_object)
12261251void
12271252finalize ()
12281253{
1229- if (is_shutdown || !get_executables () || !get_code_objects ()) return ;
1254+ if (is_shutdown.load (std::memory_order_acquire) || !get_executables () || !get_code_objects ())
1255+ return ;
12301256
12311257 CHECK_NOTNULL (get_executables ())->rlock ([](const executable_array_t & edata) {
12321258 auto tmp = edata;
@@ -1237,13 +1263,14 @@ finalize()
12371263
12381264 CHECK_NOTNULL (get_code_objects ())->wlock ([](code_object_array_t & data) { data.clear (); });
12391265
1240- is_shutdown = true ;
1266+ is_shutdown. store ( true , std::memory_order_release) ;
12411267}
12421268
12431269void
12441270iterate_loaded_code_objects (code_object_iterator_t && func)
12451271{
1246- if (is_shutdown || !get_executables () || !get_code_objects ()) return ;
1272+ if (is_shutdown.load (std::memory_order_acquire) || !get_executables () || !get_code_objects ())
1273+ return ;
12471274 CHECK_NOTNULL (get_code_objects ())
12481275 ->rlock (
12491276 [](const code_object_array_t & data, code_object_iterator_t && func_v) {
0 commit comments