Skip to content

Commit 2742c4f

Browse files
committed
Fix stabilityCheck to avoid data race at close
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 73edc5b commit 2742c4f

File tree

1 file changed

+41
-70
lines changed

1 file changed

+41
-70
lines changed

source/lib/ze_lib.cpp

Lines changed: 41 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -455,39 +455,32 @@ zelSetDelayLoaderContextTeardown()
455455
/**
456456
* @brief Performs a stability check for the Level Zero loader.
457457
*
458-
* This function checks the stability of the Level Zero loader by verifying
459-
* the presence of the loader module, the validity of the `zeDriverGet` function
460-
* pointer, and the ability to retrieve driver information. The result of the
461-
* stability check is communicated through the provided promise.
458+
* This function verifies the stability of the Level Zero loader by checking:
459+
* - The presence of the loader module.
460+
* - The validity of the `zeDriverGet` function pointer.
461+
* - The ability to retrieve driver information.
462462
*
463-
* @param stabilityPromise A promise object used to communicate the result of
464-
* the stability check. The promise is set with one of
465-
* the following values:
466-
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL: The
467-
* `zeDriverGet` function pointer is invalid.
468-
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED: The
469-
* loader failed to retrieve driver information.
470-
* - ZEL_STABILITY_CHECK_RESULT_EXCEPTION: An
471-
* exception occurred during the stability check.
472-
* - ZEL_STABILITY_CHECK_RESULT_SUCCESS: The stability
473-
* check was successful.
463+
* The result of the stability check is returned as an integer, with the following possible values:
464+
* - `ZEL_STABILITY_CHECK_RESULT_SUCCESS`: The stability check was successful.
465+
* - `ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL`: The `zeDriverGet` function pointer is invalid.
466+
* - `ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED`: The loader failed to retrieve driver information.
467+
* - `ZEL_STABILITY_CHECK_RESULT_EXCEPTION`: An exception occurred during the stability check.
474468
*
475-
* @note If debug tracing is enabled, debug messages are logged for each failure
476-
* scenario.
477-
* @note If the Loader is completely torn down, this thread is expected to be killed
478-
* due to invalid memory access and the stability check will determine a failure.
469+
* If debug tracing is enabled, debug messages are logged for each failure scenario.
479470
*
480-
* @exception This function catches all exceptions internally and does not throw.
471+
* @return An integer indicating the result of the stability check.
472+
*
473+
* @note If the loader is completely torn down, this function may fail due to invalid memory access.
474+
* @note This function catches all exceptions internally and does not throw.
481475
*/
482-
void stabilityCheck(std::promise<int> stabilityPromise) {
476+
int stabilityCheck() {
483477
try {
484478
if (!ze_lib::context->loaderDriverGet) {
485479
if (ze_lib::context->debugTraceEnabled) {
486480
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker.";
487481
ze_lib::context->debug_trace_message(message, "");
488482
}
489-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
490-
return;
483+
return ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL;
491484
}
492485

493486
uint32_t driverCount = 0;
@@ -498,14 +491,11 @@ void stabilityCheck(std::promise<int> stabilityPromise) {
498491
std::string message = "Loader stability check failed. Exiting stability checker.";
499492
ze_lib::context->debug_trace_message(message, "");
500493
}
501-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
502-
return;
494+
return ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED;
503495
}
504-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_SUCCESS);
505-
return;
496+
return ZEL_STABILITY_CHECK_RESULT_SUCCESS;
506497
} catch (...) {
507-
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
508-
return;
498+
return ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
509499
}
510500
}
511501
#endif
@@ -546,54 +536,35 @@ zelCheckIsLoaderInTearDown() {
546536
std::call_once(stabilityThreadFlag, []() {
547537
ze_lib::stabilityThread = new std::thread([]() {
548538
while (true) {
549-
try {
550-
std::promise<int> stabilityPromise;
551-
std::future<int> resultFuture = stabilityPromise.get_future();
552-
while(ze_lib::stabilityCheckThreadStarted && *ze_lib::stabilityCheckThreadStarted == 0) {
553-
std::this_thread::sleep_for(std::chrono::milliseconds(1));
554-
}
555-
if (!ze_lib::stabilityCheckThreadStarted) {
556-
break;
557-
}
558-
if (*ze_lib::stabilityCheckThreadStarted == -1) {
559-
break;
560-
}
561-
ze_lib::stabilityCheckThreadStarted->store(0);
562-
stabilityCheck(std::move(stabilityPromise));
563-
int result = resultFuture.get();
564-
if (result != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
565-
if (ze_lib::context->debugTraceEnabled) {
566-
std::string message = "Loader stability check thread failed with result: " + std::to_string(result);
567-
ze_lib::context->debug_trace_message(message, "");
568-
}
569-
if (ze_lib::stabilityPromiseResult) {
570-
ze_lib::stabilityPromiseResult->set_value(result);
571-
}
572-
break; // Exit the thread if stability check fails
573-
}
574-
if (ze_lib::stabilityPromiseResult) {
575-
ze_lib::stabilityPromiseResult->set_value(result);
576-
}
577-
} catch (const std::exception& e) {
539+
while(ze_lib::stabilityCheckThreadStarted && ze_lib::stabilityCheckThreadStarted->load() == 0) {
540+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
541+
}
542+
if (ze_lib::destruction || ze_lib::context == nullptr) {
543+
break;
544+
}
545+
if (!ze_lib::stabilityCheckThreadStarted) {
546+
break;
547+
}
548+
if (ze_lib::stabilityCheckThreadStarted->load() == -1) {
549+
break;
550+
}
551+
ze_lib::stabilityCheckThreadStarted->store(0);
552+
int result = stabilityCheck();
553+
if (result != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
578554
if (ze_lib::context->debugTraceEnabled) {
579-
std::string message = "Exception caught in stability check thread: " + std::string(e.what());
555+
std::string message = "Loader stability check thread failed with result: " + std::to_string(result);
580556
ze_lib::context->debug_trace_message(message, "");
581-
if (ze_lib::stabilityPromiseResult) {
582-
ze_lib::stabilityPromiseResult->set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
583-
}
584557
}
585-
} catch (...) {
586-
if (ze_lib::context->debugTraceEnabled) {
587-
std::string message = "Unknown exception caught in stability check thread.";
588-
ze_lib::context->debug_trace_message(message, "");
589-
if (ze_lib::stabilityPromiseResult) {
590-
ze_lib::stabilityPromiseResult->set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
591-
}
558+
if (ze_lib::stabilityPromiseResult) {
559+
ze_lib::stabilityPromiseResult->set_value(result);
592560
}
561+
break; // Exit the thread if stability check fails
562+
}
563+
if (ze_lib::stabilityPromiseResult) {
564+
ze_lib::stabilityPromiseResult->set_value(result);
593565
}
594566
}
595567
});
596-
ze_lib::stabilityThread->detach();
597568
});
598569
if (ze_lib::resultFutureResult->wait_for(std::chrono::milliseconds(ZEL_STABILITY_CHECK_THREAD_TIMEOUT)) == std::future_status::timeout) {
599570
if (ze_lib::context->debugTraceEnabled) {

0 commit comments

Comments
 (0)