separate handling of gpu/cpu architectures

amakropoulos · amakropoulos · commit 9203d12e31f9 · 2026-02-27T11:52:02.000+02:00
diff --git a/csharp/LlamaLib.cs b/csharp/LlamaLib.cs
@@ -645,7 +645,7 @@ public void Dispose() {}
         private List<IntPtr> dependencyHandles = new List<IntPtr>();
         private static int debugLevelGlobal = 0;
         private static CharArrayCallback loggingCallbackGlobal = null;
-        private string[] availableLibraries = null;
+        private List<Tuple<string, bool>> availableLibraries = null;
         private int currentLibraryIndex = 0;
 
         // Runtime lib
@@ -778,12 +778,20 @@ private string[] GetAvailableArchitectures(bool gpu)
 
         private void LoadLibraries(bool gpu)
         {
-            availableLibraries = GetAvailableArchitectures(gpu);
+            availableLibraries = new List<Tuple<string, bool>>();
+            bool[] arch_options = gpu ? new bool[] { true, false }: new bool[] { false };
+            foreach (bool arch_gpu in arch_options)
+            {
+                string[] archs = GetAvailableArchitectures(arch_gpu);
+                foreach (string arch in archs) availableLibraries.Add(new Tuple<string, bool>(arch, arch_gpu));
+            }
             currentLibraryIndex = -1;
 
             if (!TryNextLibrary())
             {
-                throw new InvalidOperationException($"Failed to load any library. Available libraries: {string.Join(", ", availableLibraries)}");
+                string libs = "";
+                foreach (Tuple<string, bool> arch in availableLibraries) libs += arch.Item1 + ", ";
+                throw new InvalidOperationException($"Failed to load any library. Available libraries: {libs.TrimEnd(',', ' ')}");
             }
         }
 
@@ -824,9 +832,9 @@ public bool TryNextLibrary()
                 libraryHandle = IntPtr.Zero;
             }
 
-            while (++currentLibraryIndex < availableLibraries.Length)
+            while (++currentLibraryIndex < availableLibraries.Count)
             {
-                string library = availableLibraries[currentLibraryIndex];
+                var (library, is_gpu_library) = availableLibraries[currentLibraryIndex];
                 try
                 {
                     string libraryPath = FindLibrary(library.Trim());
@@ -839,6 +847,8 @@ public bool TryNextLibrary()
                     libraryHandle = LibraryLoader.LoadLibrary(libraryPath);
 
                     LoadFunctionPointers();
+                    if (is_gpu_library && !LLMService_Supports_GPU()) continue;
+
                     architecture = library.Trim();
                     if (debugLevelGlobal > 0) Console.WriteLine("Successfully loaded: " + libraryPath);
                     return true;
@@ -885,6 +895,7 @@ private void LoadFunctionPointers()
             LLM_Debug = LibraryLoader.GetSymbolDelegate<LLM_Debug_Delegate>(libraryHandle, "LLM_Debug");
             LLM_Logging_Callback = LibraryLoader.GetSymbolDelegate<LLM_Logging_Callback_Delegate>(libraryHandle, "LLM_Logging_Callback");
             LLM_Logging_Stop = LibraryLoader.GetSymbolDelegate<LLM_Logging_Stop_Delegate>(libraryHandle, "LLM_Logging_Stop");
+            LLMService_Supports_GPU = LibraryLoader.GetSymbolDelegate<LLMService_Supports_GPU_Delegate>(libraryHandle, "LLMService_Supports_GPU");
 
             LLM_Enable_Reasoning_Internal = LibraryLoader.GetSymbolDelegate<LLM_Enable_Reasoning_Delegate>(libraryHandle, "LLM_Enable_Reasoning");
             LLM_Apply_Template_Internal = LibraryLoader.GetSymbolDelegate<LLM_Apply_Template_Delegate>(libraryHandle, "LLM_Apply_Template");
@@ -950,9 +961,13 @@ private void LoadFunctionPointers()
         [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
         public delegate void LLM_Logging_Stop_Delegate();
 
+        [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
+        public delegate bool LLMService_Supports_GPU_Delegate();
+
         public LLM_Debug_Delegate LLM_Debug;
         public LLM_Logging_Callback_Delegate LLM_Logging_Callback;
         public LLM_Logging_Stop_Delegate LLM_Logging_Stop;
+        public LLMService_Supports_GPU_Delegate LLMService_Supports_GPU;
 
         public static void Debug(int debugLevel)
         {
diff --git a/src/LLM_runtime.cpp b/src/LLM_runtime.cpp
@@ -20,8 +20,6 @@ const std::string platform_name()
 #endif
 }
 
-const std::vector<std::string> GPU_LIBRARIES = {"cublas", "tinyblas", "hip", "vulkan"};
-
 const std::vector<std::string> available_architectures(bool gpu)
 {
     std::vector<std::string> architectures;
@@ -52,23 +50,30 @@ const std::vector<std::string> available_architectures(bool gpu)
         architectures.push_back(path);
     };
 
-#if defined(_WIN32) || defined(__linux__)
     if (gpu)
     {
-        for (std::string gpu_library: GPU_LIBRARIES)
-            add_library(gpu_library);
+#if defined(_WIN32) || defined(__linux__)
+        add_library("cublas");
+        add_library("tinyblas");
+        add_library("hip");
+        add_library("vulkan");
+#endif
     }
-    if (has_avx512())
-        add_library("avx512");
-    if (has_avx2())
-        add_library("avx2");
-    if (has_avx())
-        add_library("avx");
-    add_library("noavx");
+    else
+    {
+#if defined(_WIN32) || defined(__linux__)
+        if (has_avx512())
+            add_library("avx512");
+        if (has_avx2())
+            add_library("avx2");
+        if (has_avx())
+            add_library("avx");
+        add_library("noavx");
 #elif defined(__APPLE__)
-    add_library("acc");
-    add_library("no-acc");
+        add_library("acc");
+        add_library("no-acc");
 #endif
+    }
     return architectures;
 }
 
@@ -269,11 +274,7 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
     }
             LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
 #undef DECLARE_AND_LOAD
-            if (is_gpu_library && !LLMService_Supports_GPU())
-            {
-                std::cout << "Doesn't support the GPU, skipping"<<std::endl;
-                continue;
-            }
+            if (is_gpu_library && !LLMService_Supports_GPU()) continue;
 
             LLMService_Registry(&LLMProviderRegistry::instance());
             LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
@@ -300,25 +301,23 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
 
 bool LLMService::create_LLM_library(const std::string &command)
 {
-    bool gpu = has_gpu_layers(command);
-    for (const auto &llm_lib_filename : available_architectures(gpu))
+    std::vector<std::string> archs_cpu = available_architectures(false);
+    std::vector<std::string> archs_gpu;
+    if (has_gpu_layers(command)) archs_gpu = available_architectures(true);
+
+    for (bool is_gpu_library: {true, false})
     {
-        fail("", 0);
-        bool is_gpu_library = false;
-        for (std::string gpu_library: GPU_LIBRARIES)
+        std::vector<std::string> archs = is_gpu_library? archs_gpu: archs_cpu;
+        for (const auto &llm_lib_filename : archs)
         {
-            if (llm_lib_filename.find(gpu_library) != std::string::npos) {
-                is_gpu_library = true;
-                break;
+            fail("", 0);
+            bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
+            if (success)
+            {
+                std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
+                return true;
             }
         }
-
-        bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
-        if (success)
-        {
-            std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
-            return true;
-        }
     }
     std::cerr << "Couldn't load a backend" << std::endl;
     return false;