diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp index 5333b08543643..e3f92ab5a5f23 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_buffer.cpp @@ -64,9 +64,9 @@ ur_result_t EnqueueMemCopyRectHelper( UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(), Events.data(), Event)); - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + getAsanInterceptor() + ->getContextInfo(GetContext(Queue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 03907ed29e3fe..cf15c4f721585 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -951,6 +951,8 @@ ProgramInfo::getKernelMetadata(ur_kernel_handle_t Kernel) const { } ContextInfo::~ContextInfo() { + DeferredEvents.releaseAll(); + Stats.Print(Handle); InternalQueueMap.clear(); diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index d2f7ef198cca7..92b74b1549189 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -163,6 +163,8 @@ struct ContextInfo { std::optional m_Quarantine; + DeferredEventList DeferredEvents; + AsanStatsWrapper Stats; explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp index 082f3e2d2d831..62638725571d3 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -69,13 +69,11 @@ ur_result_t EnqueueMemCopyRectHelper( UR_CALL(getContext()->urDdiTable.Event.pfnWait(Events.size(), &Events[0])); } - if (Event) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(), - &Events[0], Event)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(), + &Events[0], Event)); + getMsanInterceptor() + ->getContextInfo(GetContext(Queue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 18690b94bf375..9ff3da0f9e516 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -79,14 +79,12 @@ ur_result_t urEnqueueUSMFill2DFallback(ur_queue_handle_t hQueue, void *pMem, WaitEvents.push_back(Event); } - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, WaitEvents.size(), WaitEvents.data(), phEvent)); - } + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, WaitEvents.size(), WaitEvents.data(), phEvent)); - for (const auto Event : WaitEvents) { - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(Event)); - } + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(WaitEvents); return UR_RESULT_SUCCESS; } @@ -830,21 +828,17 @@ ur_result_t urEnqueueMemBufferWrite( // Update shadow memory std::shared_ptr DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const char Val = 0; uptr ShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)pDst + offset); Event = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( - hQueue, (void *)ShadowAddr, 1, &Val, size, numEventsInWaitList, - phEventWaitList, &Event)); + UR_CALL(EnqueueUSMSetZero(hQueue, (void *)ShadowAddr, size, + numEventsInWaitList, phEventWaitList, &Event)); Events.push_back(Event); - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); } else { UR_CALL(pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, pSrc, numEventsInWaitList, phEventWaitList, @@ -1039,13 +1033,11 @@ ur_result_t urEnqueueMemBufferCopy( numEventsInWaitList, phEventWaitList, &Event)); Events.push_back(Event); - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); } else { UR_CALL(pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, @@ -1164,21 +1156,17 @@ ur_result_t urEnqueueMemBufferFill( // Update shadow memory std::shared_ptr DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); - const char Val = 0; uptr ShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)Handle + offset); Event = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( - hQueue, (void *)ShadowAddr, 1, &Val, size, numEventsInWaitList, - phEventWaitList, &Event)); + UR_CALL(EnqueueUSMSetZero(hQueue, (void *)ShadowAddr, size, + numEventsInWaitList, phEventWaitList, &Event)); Events.push_back(Event); - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); } else { UR_CALL(pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, @@ -1417,13 +1405,11 @@ ur_result_t urEnqueueUSMFill( // NOTE: No need to set origin, since its shadow is clean - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } @@ -1509,13 +1495,11 @@ ur_result_t urEnqueueUSMMemcpy( } } - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } @@ -1574,13 +1558,11 @@ ur_result_t urEnqueueUSMFill2D( // NOTE: No need to set origin, since its shadow is clean - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } @@ -1682,13 +1664,11 @@ ur_result_t urEnqueueUSMMemcpy2D( Events.push_back(Event); } - if (phEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, Events.size(), Events.data(), phEvent)); - } - - for (const auto E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + getMsanInterceptor() + ->getContextInfo(GetContext(hQueue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 63ae6486038ce..de432261c0a65 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -649,6 +649,7 @@ ProgramInfo::getKernelMetadata(ur_kernel_handle_t Kernel) const { } ContextInfo::~ContextInfo() { + DeferredEvents.releaseAll(); [[maybe_unused]] auto Result = getContext()->urDdiTable.Context.pfnRelease(Handle); assert(Result == UR_RESULT_SUCCESS); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index fc0dfb5c0f015..3bf225499c7c3 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -18,6 +18,7 @@ #include "msan_shadow.hpp" #include "sanitizer_common/sanitizer_common.hpp" #include "sanitizer_common/sanitizer_options.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" #include @@ -141,6 +142,7 @@ struct ContextInfo { std::atomic RefCount = 1; std::vector DeviceList; + DeferredEventList DeferredEvents; explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { [[maybe_unused]] auto Result = diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 742562ab4a05d..4b4aca0579a48 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -14,6 +14,7 @@ #include "sanitizer_libdevice.hpp" #include "unified-runtime/ur_api.h" +#include "ur/ur.hpp" #include "ur_sanitizer_layer.hpp" #include @@ -21,6 +22,30 @@ namespace ur_sanitizer_layer { +// Accumulates events whose release must be deferred until a safe point +// (e.g., context release). L0 may not retain input events passed to +// pfnEventsWait long enough for the caller to release them immediately. +struct DeferredEventList { + void add(const std::vector &Events) { + std::scoped_lock Lock(Mutex); + List.insert(List.end(), Events.begin(), Events.end()); + } + + void releaseAll() { + std::scoped_lock Lock(Mutex); + for (auto &E : List) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Event.pfnRelease(E); + assert(Result == UR_RESULT_SUCCESS); + } + List.clear(); + } + +private: + ur_shared_mutex Mutex; + std::vector List; +}; + struct ManagedQueue { ManagedQueue(ur_context_handle_t Context, ur_device_handle_t Device, bool IsOutOfOrder = false); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp index 20da5bed46149..afdfb6d5ad266 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp @@ -68,13 +68,11 @@ ur_result_t EnqueueMemCopyRectHelper( UR_CALL(getContext()->urDdiTable.Event.pfnWait(Events.size(), &Events[0])); } - if (Event) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(), - &Events[0], Event)); - } - - for (const auto &E : Events) - UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(), + &Events[0], Event)); + getTsanInterceptor() + ->getContextInfo(GetContext(Queue)) + ->DeferredEvents.add(Events); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp index f469805385222..67bc1f11eb039 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp @@ -64,6 +64,8 @@ struct ContextInfo { std::unordered_map> InternalQueueMap; + DeferredEventList DeferredEvents; + explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { [[maybe_unused]] auto Result = getContext()->urDdiTable.Context.pfnRetain(Context); @@ -71,6 +73,7 @@ struct ContextInfo { } ~ContextInfo() { + DeferredEvents.releaseAll(); InternalQueueMap.clear(); [[maybe_unused]] auto Result = getContext()->urDdiTable.Context.pfnRelease(Handle);