@@ -69,17 +69,20 @@ std::shared_ptr<void> getPeerMemoryHandle(cudaIpcMemHandle_t ipcHandle) {
6969 };
7070#if defined(__HIP_PLATFORM_AMD__)
7171 static std::unordered_map<cudaIpcMemHandle_t, std::weak_ptr<void >> peerMemoryHandleMap;
72- std::mutex mutex;
72+ static std::mutex mutex;
7373 std::lock_guard<std::mutex> lock (mutex);
7474 auto it = peerMemoryHandleMap.find (ipcHandle);
7575 if (it != peerMemoryHandleMap.end ()) {
7676 if (auto ptr = it->second .lock ()) {
7777 return ptr;
7878 }
79- throw mscclpp::Error (" Failed to get peer memory handle, may already be closed" , mscclpp::ErrorCode::InvalidUsage);
8079 }
8180 MSCCLPP_CUDATHROW (cudaIpcOpenMemHandle (&addr, ipcHandle, cudaIpcMemLazyEnablePeerAccess));
82- std::shared_ptr<void > ptr = std::shared_ptr<void >(addr, deleter);
81+ std::shared_ptr<void > ptr = std::shared_ptr<void >(addr, [ipcHandle, deleter](void * p) {
82+ deleter (p);
83+ std::lock_guard<std::mutex> lock (mutex);
84+ peerMemoryHandleMap.erase (ipcHandle);
85+ });
8386 peerMemoryHandleMap[ipcHandle] = ptr;
8487 return ptr;
8588#else
@@ -304,6 +307,9 @@ RegisteredMemory::Impl::Impl(const std::vector<char>::const_iterator& begin,
304307#endif // !(CUDA_NVLS_API_AVAILABLE)
305308 } else if (getHostHash () == this ->hostHash ) {
306309 this ->peerHandle = getPeerMemoryHandle (entry.cudaIpcBaseHandle );
310+ if (!this ->peerHandle ) {
311+ throw Error (" Failed to open CUDA IPC handle, may already be closed" , ErrorCode::InvalidUsage);
312+ }
307313 this ->data = static_cast <char *>(this ->peerHandle .get ()) + entry.cudaIpcOffsetFromBase ;
308314 }
309315 }
0 commit comments