Skip to content

Commit 9203d12

Browse files
committed
separate handling of gpu/cpu architectures
1 parent 238bc29 commit 9203d12

2 files changed

Lines changed: 53 additions & 39 deletions

File tree

csharp/LlamaLib.cs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ public void Dispose() {}
645645
private List<IntPtr> dependencyHandles = new List<IntPtr>();
646646
private static int debugLevelGlobal = 0;
647647
private static CharArrayCallback loggingCallbackGlobal = null;
648-
private string[] availableLibraries = null;
648+
private List<Tuple<string, bool>> availableLibraries = null;
649649
private int currentLibraryIndex = 0;
650650

651651
// Runtime lib
@@ -778,12 +778,20 @@ private string[] GetAvailableArchitectures(bool gpu)
778778

779779
private void LoadLibraries(bool gpu)
780780
{
781-
availableLibraries = GetAvailableArchitectures(gpu);
781+
availableLibraries = new List<Tuple<string, bool>>();
782+
bool[] arch_options = gpu ? new bool[] { true, false }: new bool[] { false };
783+
foreach (bool arch_gpu in arch_options)
784+
{
785+
string[] archs = GetAvailableArchitectures(arch_gpu);
786+
foreach (string arch in archs) availableLibraries.Add(new Tuple<string, bool>(arch, arch_gpu));
787+
}
782788
currentLibraryIndex = -1;
783789

784790
if (!TryNextLibrary())
785791
{
786-
throw new InvalidOperationException($"Failed to load any library. Available libraries: {string.Join(", ", availableLibraries)}");
792+
string libs = "";
793+
foreach (Tuple<string, bool> arch in availableLibraries) libs += arch.Item1 + ", ";
794+
throw new InvalidOperationException($"Failed to load any library. Available libraries: {libs.TrimEnd(',', ' ')}");
787795
}
788796
}
789797

@@ -824,9 +832,9 @@ public bool TryNextLibrary()
824832
libraryHandle = IntPtr.Zero;
825833
}
826834

827-
while (++currentLibraryIndex < availableLibraries.Length)
835+
while (++currentLibraryIndex < availableLibraries.Count)
828836
{
829-
string library = availableLibraries[currentLibraryIndex];
837+
var (library, is_gpu_library) = availableLibraries[currentLibraryIndex];
830838
try
831839
{
832840
string libraryPath = FindLibrary(library.Trim());
@@ -839,6 +847,8 @@ public bool TryNextLibrary()
839847
libraryHandle = LibraryLoader.LoadLibrary(libraryPath);
840848

841849
LoadFunctionPointers();
850+
if (is_gpu_library && !LLMService_Supports_GPU()) continue;
851+
842852
architecture = library.Trim();
843853
if (debugLevelGlobal > 0) Console.WriteLine("Successfully loaded: " + libraryPath);
844854
return true;
@@ -885,6 +895,7 @@ private void LoadFunctionPointers()
885895
LLM_Debug = LibraryLoader.GetSymbolDelegate<LLM_Debug_Delegate>(libraryHandle, "LLM_Debug");
886896
LLM_Logging_Callback = LibraryLoader.GetSymbolDelegate<LLM_Logging_Callback_Delegate>(libraryHandle, "LLM_Logging_Callback");
887897
LLM_Logging_Stop = LibraryLoader.GetSymbolDelegate<LLM_Logging_Stop_Delegate>(libraryHandle, "LLM_Logging_Stop");
898+
LLMService_Supports_GPU = LibraryLoader.GetSymbolDelegate<LLMService_Supports_GPU_Delegate>(libraryHandle, "LLMService_Supports_GPU");
888899

889900
LLM_Enable_Reasoning_Internal = LibraryLoader.GetSymbolDelegate<LLM_Enable_Reasoning_Delegate>(libraryHandle, "LLM_Enable_Reasoning");
890901
LLM_Apply_Template_Internal = LibraryLoader.GetSymbolDelegate<LLM_Apply_Template_Delegate>(libraryHandle, "LLM_Apply_Template");
@@ -950,9 +961,13 @@ private void LoadFunctionPointers()
950961
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
951962
public delegate void LLM_Logging_Stop_Delegate();
952963

964+
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
965+
public delegate bool LLMService_Supports_GPU_Delegate();
966+
953967
public LLM_Debug_Delegate LLM_Debug;
954968
public LLM_Logging_Callback_Delegate LLM_Logging_Callback;
955969
public LLM_Logging_Stop_Delegate LLM_Logging_Stop;
970+
public LLMService_Supports_GPU_Delegate LLMService_Supports_GPU;
956971

957972
public static void Debug(int debugLevel)
958973
{

src/LLM_runtime.cpp

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ const std::string platform_name()
2020
#endif
2121
}
2222

23-
const std::vector<std::string> GPU_LIBRARIES = {"cublas", "tinyblas", "hip", "vulkan"};
24-
2523
const std::vector<std::string> available_architectures(bool gpu)
2624
{
2725
std::vector<std::string> architectures;
@@ -52,23 +50,30 @@ const std::vector<std::string> available_architectures(bool gpu)
5250
architectures.push_back(path);
5351
};
5452

55-
#if defined(_WIN32) || defined(__linux__)
5653
if (gpu)
5754
{
58-
for (std::string gpu_library: GPU_LIBRARIES)
59-
add_library(gpu_library);
55+
#if defined(_WIN32) || defined(__linux__)
56+
add_library("cublas");
57+
add_library("tinyblas");
58+
add_library("hip");
59+
add_library("vulkan");
60+
#endif
6061
}
61-
if (has_avx512())
62-
add_library("avx512");
63-
if (has_avx2())
64-
add_library("avx2");
65-
if (has_avx())
66-
add_library("avx");
67-
add_library("noavx");
62+
else
63+
{
64+
#if defined(_WIN32) || defined(__linux__)
65+
if (has_avx512())
66+
add_library("avx512");
67+
if (has_avx2())
68+
add_library("avx2");
69+
if (has_avx())
70+
add_library("avx");
71+
add_library("noavx");
6872
#elif defined(__APPLE__)
69-
add_library("acc");
70-
add_library("no-acc");
73+
add_library("acc");
74+
add_library("no-acc");
7175
#endif
76+
}
7277
return architectures;
7378
}
7479

@@ -269,11 +274,7 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
269274
}
270275
LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
271276
#undef DECLARE_AND_LOAD
272-
if (is_gpu_library && !LLMService_Supports_GPU())
273-
{
274-
std::cout << "Doesn't support the GPU, skipping"<<std::endl;
275-
continue;
276-
}
277+
if (is_gpu_library && !LLMService_Supports_GPU()) continue;
277278

278279
LLMService_Registry(&LLMProviderRegistry::instance());
279280
LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
@@ -300,25 +301,23 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
300301

301302
bool LLMService::create_LLM_library(const std::string &command)
302303
{
303-
bool gpu = has_gpu_layers(command);
304-
for (const auto &llm_lib_filename : available_architectures(gpu))
304+
std::vector<std::string> archs_cpu = available_architectures(false);
305+
std::vector<std::string> archs_gpu;
306+
if (has_gpu_layers(command)) archs_gpu = available_architectures(true);
307+
308+
for (bool is_gpu_library: {true, false})
305309
{
306-
fail("", 0);
307-
bool is_gpu_library = false;
308-
for (std::string gpu_library: GPU_LIBRARIES)
310+
std::vector<std::string> archs = is_gpu_library? archs_gpu: archs_cpu;
311+
for (const auto &llm_lib_filename : archs)
309312
{
310-
if (llm_lib_filename.find(gpu_library) != std::string::npos) {
311-
is_gpu_library = true;
312-
break;
313+
fail("", 0);
314+
bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
315+
if (success)
316+
{
317+
std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
318+
return true;
313319
}
314320
}
315-
316-
bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
317-
if (success)
318-
{
319-
std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
320-
return true;
321-
}
322321
}
323322
std::cerr << "Couldn't load a backend" << std::endl;
324323
return false;

0 commit comments

Comments
 (0)