Skip to content

Commit bb81581

Browse files
committed
Added probe for GPU libs
1 parent c250d5c commit bb81581

1 file changed

Lines changed: 55 additions & 21 deletions

File tree

llamafile/cuda.c

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,55 @@ static bool LinkCuda(const char *dso) {
158158
return true;
159159
}
160160

161+
static void UnlinkCuda(void) {
162+
if (g_cuda.lib_handle) {
163+
cosmo_dlclose(g_cuda.lib_handle);
164+
g_cuda.lib_handle = NULL;
165+
}
166+
memset(&g_cuda.backend_init, 0, sizeof(g_cuda.backend_init));
167+
memset(&g_cuda.backend_reg, 0, sizeof(g_cuda.backend_reg));
168+
memset(&g_cuda.get_device_count, 0, sizeof(g_cuda.get_device_count));
169+
memset(&g_cuda.get_device_description, 0, sizeof(g_cuda.get_device_description));
170+
memset(&g_cuda.log_set, 0, sizeof(g_cuda.log_set));
171+
}
172+
173+
static bool TryGpuBackend(const char *dso, bool is_amd) {
174+
if (!llamafile_try_load_prebuilt_dso(dso, "cuda", LinkCuda))
175+
return false;
176+
177+
// Suppress the DSO's ggml logging before we touch any function that
178+
// triggers ggml_cuda_init() (e.g. get_device_count). Without this, a
179+
// failed init on the wrong backend would print a confusing error to
180+
// stderr even when --verbose is not set.
181+
if (!FLAG_verbose && (g_cuda.log_set.default_abi || g_cuda.log_set.windows_abi)) {
182+
if (IsWindows())
183+
g_cuda.log_set.windows_abi(llamafile_log_callback_null, NULL);
184+
else
185+
g_cuda.log_set.default_abi(llamafile_log_callback_null, NULL);
186+
}
187+
188+
// Verify the backend has at least one device before committing. The DSO
189+
// loads fine even when no compatible hardware is present, so we must
190+
// probe device count to avoid registering a 0-device backend (which
191+
// would then prevent fallback to other GPU backends in AUTO mode).
192+
if (g_cuda.get_device_count.default_abi || g_cuda.get_device_count.windows_abi) {
193+
int count;
194+
if (IsWindows())
195+
count = g_cuda.get_device_count.windows_abi();
196+
else
197+
count = g_cuda.get_device_count.default_abi();
198+
if (count <= 0) {
199+
llamafile_info("cuda", "%s library loaded but no devices detected; trying next backend",
200+
is_amd ? "ROCm" : "CUDA");
201+
UnlinkCuda();
202+
return false;
203+
}
204+
}
205+
206+
g_cuda.is_amd = is_amd;
207+
return true;
208+
}
209+
161210
static bool ImportCudaImpl(void) {
162211
// Skip on Apple Silicon (use Metal instead)
163212
if (IsXnuSilicon()) {
@@ -168,9 +217,7 @@ static bool ImportCudaImpl(void) {
168217
switch (FLAG_gpu) {
169218
case LLAMAFILE_GPU_AUTO:
170219
case LLAMAFILE_GPU_NVIDIA:
171-
break;
172220
case LLAMAFILE_GPU_AMD:
173-
g_cuda.is_amd = true;
174221
break;
175222
default:
176223
return false;
@@ -183,19 +230,16 @@ static bool ImportCudaImpl(void) {
183230
snprintf(cuda_dso, sizeof(cuda_dso), "ggml-cuda.%s", ext);
184231
snprintf(rocm_dso, sizeof(rocm_dso), "ggml-rocm.%s", ext);
185232

186-
// Try to load pre-built DSO
187-
if (FLAG_gpu == LLAMAFILE_GPU_AMD || FLAG_gpu == LLAMAFILE_GPU_AUTO) {
188-
if (llamafile_try_load_prebuilt_dso(rocm_dso, "cuda", LinkCuda)) {
189-
g_cuda.is_amd = true;
233+
// In AUTO mode, prefer CUDA over ROCm: it covers the common NVIDIA case
234+
// and lets ROCm be the fallback when CUDA is absent or has no devices.
235+
if (FLAG_gpu == LLAMAFILE_GPU_NVIDIA || FLAG_gpu == LLAMAFILE_GPU_AUTO) {
236+
if (TryGpuBackend(cuda_dso, false))
190237
goto RegisterBackend;
191-
}
192238
}
193239

194-
if (FLAG_gpu == LLAMAFILE_GPU_NVIDIA || FLAG_gpu == LLAMAFILE_GPU_AUTO) {
195-
if (llamafile_try_load_prebuilt_dso(cuda_dso, "cuda", LinkCuda)) {
196-
g_cuda.is_amd = false;
240+
if (FLAG_gpu == LLAMAFILE_GPU_AMD || FLAG_gpu == LLAMAFILE_GPU_AUTO) {
241+
if (TryGpuBackend(rocm_dso, true))
197242
goto RegisterBackend;
198-
}
199243
}
200244

201245
// No pre-built DSO found
@@ -206,16 +250,6 @@ static bool ImportCudaImpl(void) {
206250
return false;
207251

208252
RegisterBackend:
209-
// Suppress DSO's ggml logging before backend registration, which triggers
210-
// ggml_cuda_init() inside the DSO. Without this, CUDA device enumeration
211-
// messages appear even when --verbose is not set.
212-
if (!FLAG_verbose && (g_cuda.log_set.default_abi || g_cuda.log_set.windows_abi)) {
213-
if (IsWindows())
214-
g_cuda.log_set.windows_abi(llamafile_log_callback_null, NULL);
215-
else
216-
g_cuda.log_set.default_abi(llamafile_log_callback_null, NULL);
217-
}
218-
219253
// Register the CUDA backend with GGML
220254
if (g_cuda.backend_reg.default_abi || g_cuda.backend_reg.windows_abi) {
221255
ggml_backend_reg_t reg;

0 commit comments

Comments
 (0)