undreamai · amakropoulos · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/include/LLM_runtime.h b/include/LLM_runtime.h
@@ -53,6 +53,7 @@ using LibHandle = void *; ///< Unix library handle type
 #define LLM_FUNCTIONS_LIST(M)                                                                                     \
     M(LLMService_Registry, void, LLMProviderRegistry *)                                                           \
     M(LLMService_InjectErrorState, void, ErrorState *)                                                            \
+    M(LLMService_Supports_GPU, bool)                                                                              \
     M(LLMService_Construct, LLMProvider *, const char *, int, int, int, bool, int, int, bool, int, const char **) \
     M(LLMService_From_Command, LLMProvider *, const char *)
 
@@ -227,7 +228,7 @@ class UNDREAMAI_API LLMService : public LLMProvider
     /// @param llm_lib_filename Specific library filename to load
     /// @return true if library loaded successfully, false otherwise
     /// @details Internal method for loading specific library files
-    bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename);
+    bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library=false);
 };
 
 /// @brief Get OS-specific library directory

diff --git a/include/LLM_service.h b/include/LLM_service.h
@@ -279,6 +279,9 @@ extern "C"
     UNDREAMAI_API const char *LLMService_Command(LLMService *llm_service);
 
     UNDREAMAI_API void LLMService_InjectErrorState(ErrorState *error_state);
+
+    UNDREAMAI_API bool LLMService_Supports_GPU();
+
 }
 
 /// @}
diff --git a/src/LLM_runtime.cpp b/src/LLM_runtime.cpp
@@ -20,6 +20,8 @@ const std::string platform_name()
 #endif
 }
 
+const std::vector<std::string> GPU_LIBRARIES = {"cublas", "tinyblas", "hip", "vulkan"};
+
 const std::vector<std::string> available_architectures(bool gpu)
 {
     std::vector<std::string> architectures;
@@ -53,10 +55,8 @@ const std::vector<std::string> available_architectures(bool gpu)
 #if defined(_WIN32) || defined(__linux__)
     if (gpu)
     {
-        add_library("cublas");
-        add_library("tinyblas");
-        add_library("hip");
-        add_library("vulkan");
+        for (std::string gpu_library: GPU_LIBRARIES)
+            add_library(gpu_library);
     }
     if (has_avx512())
         add_library("avx512");
@@ -215,7 +215,7 @@ LibHandle load_library_safe(const std::string &path)
     return handle_out;
 }
 
-bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename)
+bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library)
 {
     sigjmp_buf local_jump_point;
     sigjmp_buf* old_jump_point = get_current_jump_point_ptr(); // Save the old one
@@ -269,6 +269,11 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
     }
             LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
 #undef DECLARE_AND_LOAD
+            if (is_gpu_library && !LLMService_Supports_GPU())
+            {
+                std::cout << "Doesn't support the GPU, skipping"<<std::endl;
+                continue;
+            }
 
             LLMService_Registry(&LLMProviderRegistry::instance());
             LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
@@ -299,7 +304,16 @@ bool LLMService::create_LLM_library(const std::string &command)
     for (const auto &llm_lib_filename : available_architectures(gpu))
     {
         fail("", 0);
-        bool success = create_LLM_library_backend(command, llm_lib_filename);
+        bool is_gpu_library = false;
+        for (std::string gpu_library: GPU_LIBRARIES)
+        {
+            if (llm_lib_filename.find(gpu_library) != std::string::npos) {
+                is_gpu_library = true;
+                break;
+            }
+        }
+
+        bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
         if (success)
         {
             std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;

diff --git a/src/LLM_service.cpp b/src/LLM_service.cpp
@@ -739,6 +739,11 @@ void LLMService_Registry(LLMProviderRegistry *existing_instance)
     LLMProviderRegistry::inject_registry(existing_instance);
 }
 
+bool LLMService_Supports_GPU()
+{
+    return llama_supports_gpu_offload();
+}
+
 LLMService *LLMService_Construct(const char *model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, const char **lora_paths)
 {
     std::vector<std::string> lora_paths_vector;

diff --git a/tools/llamalib_server.cpp b/tools/llamalib_server.cpp
@@ -14,6 +14,7 @@ int main(int argc, char **argv)
     llm->debug(1);
     llm->start();
     llm->start_server();
+    std::cout << "service started" << std::endl;
     llm->join_server();
 
     return 0;