Skip to content

Commit 38b34cf

Browse files
committed
Add a timeout for when the stability thread becomes lost
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent d4b6151 commit 38b34cf

File tree

1 file changed

+28
-23
lines changed

1 file changed

+28
-23
lines changed

source/lib/ze_lib.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,8 @@ zelSetDelayLoaderContextTeardown()
424424
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL 1
425425
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED 2
426426
#define ZEL_STABILITY_CHECK_RESULT_EXCEPTION 3
427+
// The stability check thread timeout in milliseconds
428+
#define ZEL_STABILITY_CHECK_THREAD_TIMEOUT 100
427429

428430
/**
429431
* @brief Performs a stability check for the Level Zero loader.
@@ -453,33 +455,28 @@ zelSetDelayLoaderContextTeardown()
453455
* @exception This function catches all exceptions internally and does not throw.
454456
*/
455457
void stabilityCheck(std::promise<int> stabilityPromise) {
456-
try {
457-
if (!ze_lib::context->loaderDriverGet) {
458-
if (ze_lib::context->debugTraceEnabled) {
459-
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker thread.";
460-
ze_lib::context->debug_trace_message(message, "");
461-
}
462-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
463-
return;
458+
if (!ze_lib::context->loaderDriverGet) {
459+
if (ze_lib::context->debugTraceEnabled) {
460+
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker thread.";
461+
ze_lib::context->debug_trace_message(message, "");
464462
}
463+
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
464+
return;
465+
}
465466

466-
uint32_t driverCount = 0;
467-
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
468-
result = ze_lib::context->loaderDriverGet(&driverCount, nullptr);
469-
if (result != ZE_RESULT_SUCCESS || driverCount == 0) {
470-
if (ze_lib::context->debugTraceEnabled) {
471-
std::string message = "Loader stability check failed. Exiting stability checker thread.";
472-
ze_lib::context->debug_trace_message(message, "");
473-
}
474-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
475-
return;
467+
uint32_t driverCount = 0;
468+
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
469+
result = ze_lib::context->loaderDriverGet(&driverCount, nullptr);
470+
if (result != ZE_RESULT_SUCCESS || driverCount == 0) {
471+
if (ze_lib::context->debugTraceEnabled) {
472+
std::string message = "Loader stability check failed. Exiting stability checker thread.";
473+
ze_lib::context->debug_trace_message(message, "");
476474
}
477-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_SUCCESS);
478-
return;
479-
} catch (...) {
480-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
475+
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
481476
return;
482477
}
478+
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_SUCCESS);
479+
return;
483480
}
484481
#endif
485482

@@ -542,7 +539,15 @@ zelCheckIsLoaderInTearDown() {
542539
}
543540
});
544541
});
545-
threadResult = ze_lib::resultFutureResult.get();
542+
if (ze_lib::resultFutureResult.wait_for(std::chrono::milliseconds(ZEL_STABILITY_CHECK_THREAD_TIMEOUT)) == std::future_status::timeout) {
543+
if (ze_lib::context->debugTraceEnabled) {
544+
std::string message = "Stability Thread timeout, assuming thread has crashed";
545+
ze_lib::context->debug_trace_message(message, "");
546+
}
547+
threadResult = ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
548+
} else {
549+
threadResult = ze_lib::resultFutureResult.get();
550+
}
546551
} catch (const std::exception& e) {
547552
if (ze_lib::context->debugTraceEnabled) {
548553
std::string message = "Exception caught in parent thread: " + std::string(e.what());

0 commit comments

Comments
 (0)