Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions samples/zello_world/zello_world.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,13 @@ int main( int argc, char *argv[] )
zeEventHostSynchronize(event, UINT64_MAX );
std::cout << "Congratulations, the device completed execution!\n";

zelCheckIsLoaderInTearDown();
zeContextDestroy(context);
zelCheckIsLoaderInTearDown();
zeCommandListDestroy(command_list);
zelCheckIsLoaderInTearDown();
zeEventDestroy(event);
zelCheckIsLoaderInTearDown();
zeEventPoolDestroy(event_pool);

if (tracing_enabled) {
Expand Down
160 changes: 117 additions & 43 deletions source/lib/ze_lib.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

White space promotes readership?

/*
*
* Copyright (C) 2019-2021 Intel Corporation
Expand Down Expand Up @@ -27,6 +28,11 @@ namespace ze_lib
}
}
bool delayContextDestruction = false;
std::mutex *stabilityMutex = nullptr;
std::promise<int> *stabilityPromiseResult = nullptr;
std::future<int> *resultFutureResult = nullptr;
std::atomic<int> *stabilityCheckThreadStarted = nullptr;
std::thread *stabilityThread = nullptr;
#endif
bool destruction = false;

Expand All @@ -43,6 +49,35 @@ namespace ze_lib
if (loader) {
FREE_DRIVER_LIBRARY( loader );
}
if (ze_lib::stabilityCheckThreadStarted)
ze_lib::stabilityCheckThreadStarted->store(-1);
try {
if (stabilityThread && stabilityThread->joinable()) {
stabilityThread->join();
}
} catch (...) {
// Ignore any exceptions from thread join
}
if (stabilityThread) {
delete stabilityThread;
stabilityThread = nullptr;
}
if (stabilityMutex) {
delete stabilityMutex;
stabilityMutex = nullptr;
}
if (stabilityPromiseResult) {
delete stabilityPromiseResult;
stabilityPromiseResult = nullptr;
}
if (resultFutureResult) {
delete resultFutureResult;
resultFutureResult = nullptr;
}
if (stabilityCheckThreadStarted) {
delete stabilityCheckThreadStarted;
stabilityCheckThreadStarted = nullptr;
}
#endif
ze_lib::destruction = true;
};
Expand Down Expand Up @@ -149,6 +184,10 @@ namespace ze_lib
std::string version_message = "Loader API Version to be requested is v" + std::to_string(ZE_MAJOR_VERSION(version)) + "." + std::to_string(ZE_MINOR_VERSION(version));
debug_trace_message(version_message, "");
loaderDriverGet = reinterpret_cast<ze_pfnDriverGet_t>(GET_FUNCTION_PTR(loader, "zeDriverGet"));
stabilityMutex = new std::mutex();
stabilityPromiseResult = new std::promise<int>();
resultFutureResult = new std::future<int>(stabilityPromiseResult->get_future());
stabilityCheckThreadStarted = new std::atomic<int>(0);
#else
result = zeLoaderInit();
if( ZE_RESULT_SUCCESS == result ) {
Expand Down Expand Up @@ -410,61 +449,53 @@ zelSetDelayLoaderContextTeardown()
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL 1
#define ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED 2
#define ZEL_STABILITY_CHECK_RESULT_EXCEPTION 3
// The stability check thread timeout in milliseconds
#define ZEL_STABILITY_CHECK_THREAD_TIMEOUT 100

/**
* @brief Performs a stability check for the Level Zero loader.
*
* This function checks the stability of the Level Zero loader by verifying
* the presence of the loader module, the validity of the `zeDriverGet` function
* pointer, and the ability to retrieve driver information. The result of the
* stability check is communicated through the provided promise.
* This function verifies the stability of the Level Zero loader by checking:
* - The presence of the loader module.
* - The validity of the `zeDriverGet` function pointer.
* - The ability to retrieve driver information.
*
* The result of the stability check is returned as an integer, with the following possible values:
* - `ZEL_STABILITY_CHECK_RESULT_SUCCESS`: The stability check was successful.
* - `ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL`: The `zeDriverGet` function pointer is invalid.
* - `ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED`: The loader failed to retrieve driver information.
* - `ZEL_STABILITY_CHECK_RESULT_EXCEPTION`: An exception occurred during the stability check.
*
* @param stabilityPromise A promise object used to communicate the result of
* the stability check. The promise is set with one of
* the following values:
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL: The
* `zeDriverGet` function pointer is invalid.
* - ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED: The
* loader failed to retrieve driver information.
* - ZEL_STABILITY_CHECK_RESULT_EXCEPTION: An
* exception occurred during the stability check.
* - ZEL_STABILITY_CHECK_RESULT_SUCCESS: The stability
* check was successful.
* If debug tracing is enabled, debug messages are logged for each failure scenario.
*
* @note If debug tracing is enabled, debug messages are logged for each failure
* scenario.
* @note If the Loader is completely torn down, this thread is expected to be killed
* due to invalid memory access and the stability check will determine a failure.
* @return An integer indicating the result of the stability check.
*
* @exception This function catches all exceptions internally and does not throw.
* @note If the loader is completely torn down, this function may fail due to invalid memory access.
* @note This function catches all exceptions internally and does not throw.
*/
void stabilityCheck(std::promise<int> stabilityPromise) {
int stabilityCheck() {
try {
if (!ze_lib::context->loaderDriverGet) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker thread.";
std::string message = "LoaderDriverGet is a bad pointer. Exiting stability checker.";
ze_lib::context->debug_trace_message(message, "");
}
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL);
return;
return ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_NULL;
}

uint32_t driverCount = 0;
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
result = ze_lib::context->loaderDriverGet(&driverCount, nullptr);
if (result != ZE_RESULT_SUCCESS || driverCount == 0) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Loader stability check failed. Exiting stability checker thread.";
std::string message = "Loader stability check failed. Exiting stability checker.";
ze_lib::context->debug_trace_message(message, "");
}
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED);
return;
return ZEL_STABILITY_CHECK_RESULT_DRIVER_GET_FAILED;
}
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_SUCCESS);
return;
return ZEL_STABILITY_CHECK_RESULT_SUCCESS;
} catch (...) {
stabilityPromise.set_value(ZEL_STABILITY_CHECK_RESULT_EXCEPTION);
return;
return ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
}
}
#endif
Expand All @@ -490,18 +521,60 @@ zelCheckIsLoaderInTearDown() {
return true;
}
#ifdef DYNAMIC_LOAD_LOADER
std::promise<int> stabilityPromise;
std::future<int> resultFuture = stabilityPromise.get_future();
int result = -1;
static bool unstable = false;
int threadResult = -1;
if (unstable) {
return true;
}
try {
// Launch the stability checker thread
std::thread stabilityThread(stabilityCheck, std::move(stabilityPromise));
result = resultFuture.get(); // Blocks until the result is available
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Stability checker thread completed with result: " + std::to_string(result);
ze_lib::context->debug_trace_message(message, "");
// Launch the stability checker thread on the first call
static std::once_flag stabilityThreadFlag;
std::lock_guard<std::mutex> lock(*ze_lib::stabilityMutex);
*ze_lib::stabilityPromiseResult = std::promise<int>();
*ze_lib::resultFutureResult = ze_lib::stabilityPromiseResult->get_future();
ze_lib::stabilityCheckThreadStarted->store(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slight comment for readability, you are using 1 here (likely to indicate RUNNING) and back in the destruction you use -1 as a message to exit the thread. Maybe use define or enums so we can read it easier. :-)
1 = RUNNING
0 = STOPPED? QUISECED?
-1 = EXIT_THREAD?

std::call_once(stabilityThreadFlag, []() {
ze_lib::stabilityThread = new std::thread([]() {
while (true) {
while(ze_lib::stabilityCheckThreadStarted && ze_lib::stabilityCheckThreadStarted->load() == 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
if (ze_lib::destruction || ze_lib::context == nullptr) {
break;
}
if (!ze_lib::stabilityCheckThreadStarted) {
break;
}
if (ze_lib::stabilityCheckThreadStarted->load() == -1) {
break;
}
ze_lib::stabilityCheckThreadStarted->store(0);
int result = stabilityCheck();
if (result != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Loader stability check thread failed with result: " + std::to_string(result);
ze_lib::context->debug_trace_message(message, "");
}
if (ze_lib::stabilityPromiseResult) {
ze_lib::stabilityPromiseResult->set_value(result);
}
break; // Exit the thread if stability check fails
}
if (ze_lib::stabilityPromiseResult) {
ze_lib::stabilityPromiseResult->set_value(result);
}
}
});
});
if (ze_lib::resultFutureResult->wait_for(std::chrono::milliseconds(ZEL_STABILITY_CHECK_THREAD_TIMEOUT)) == std::future_status::timeout) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Stability Thread timeout, assuming thread has crashed";
ze_lib::context->debug_trace_message(message, "");
}
threadResult = ZEL_STABILITY_CHECK_RESULT_EXCEPTION;
} else {
threadResult = ze_lib::resultFutureResult->get();
}
stabilityThread.join();
} catch (const std::exception& e) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Exception caught in parent thread: " + std::string(e.what());
Expand All @@ -513,11 +586,12 @@ zelCheckIsLoaderInTearDown() {
ze_lib::context->debug_trace_message(message, "");
}
}
if (result != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
if (threadResult != ZEL_STABILITY_CHECK_RESULT_SUCCESS) {
if (ze_lib::context->debugTraceEnabled) {
std::string message = "Loader stability check failed with result: " + std::to_string(result);
std::string message = "Loader stability check failed with result: " + std::to_string(threadResult);
ze_lib::context->debug_trace_message(message, "");
}
unstable = true;
return true;
}
#endif
Expand Down
Loading