diff --git a/docs/building_dlls.md b/docs/building_dlls.md index 5baf3ebadb..ca6455e5c2 100644 --- a/docs/building_dlls.md +++ b/docs/building_dlls.md @@ -37,11 +37,16 @@ make setup # In the Windows terminal After the repo is set up, you can build the cuda / rocm / vulkan DLLs as follows. +The .bat files to run the builds are in the `llamafile` directory and accept the following +parameters: -- from powershell, open a Visual Studio 2022 Developer Command Prompt -``` - cmd /k "`"C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`" x64" -``` +- `--clean` to restart a build from scratch +- `--output` to provide a custom output filename for the dll (default is ggml-xxxx.dll in the current directory +for xxxx in (cuda, rocm, vulkan) +- only for the cuda libraries, you also have the `--cublas` option to link the library against NVIDIA's cublas instead of tinyblas + +Also note that for cuda and rocm libraries there are `*_parallel.bat` scripts that should work faster +by parallelizing compilation and taking advantage of your compute. Here's how you call the build scripts: - cd to the llamafile dir and start CUDA parallel build (this will run for a while...) ``` diff --git a/llama.cpp.patches/patches/ggml_src_ggml-vulkan_ggml-vulkan.cpp.patch b/llama.cpp.patches/patches/ggml_src_ggml-vulkan_ggml-vulkan.cpp.patch index 79d955f4b0..81b8e2ce0a 100644 --- a/llama.cpp.patches/patches/ggml_src_ggml-vulkan_ggml-vulkan.cpp.patch +++ b/llama.cpp.patches/patches/ggml_src_ggml-vulkan_ggml-vulkan.cpp.patch @@ -27,7 +27,34 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu static VkDeviceSize ggml_vk_get_max_buffer_range(const ggml_backend_vk_context * ctx, const vk_buffer &buf, const VkDeviceSize offset) { const VkDeviceSize range = std::min(VkDeviceSize{buf->size - offset}, -@@ -13428,20 +13428,28 @@ static bool ggml_backend_buffer_is_vk(ggml_backend_buffer_t buffer) { +@@ -3391,20 +3391,15 @@ static void ggml_vk_load_shaders(vk_device& device) { + if (!pipeline->needed || pipeline->compiled) { + continue; + } +- // TODO: We're no longer benefitting from the async compiles (shaders are +- // compiled individually, as needed) and this complexity can be removed. ++ // Compile synchronously to avoid threading issues in cross-module DLL loading. ++ // ggml_vk_create_pipeline_func asserts compile_count > 0 and decrements it ++ // on completion, so we still need to increment it here. + { +- // wait until fewer than N compiles are in progress +- uint32_t N = std::max(1u, std::thread::hardware_concurrency()); +- std::unique_lock guard(compile_count_mutex); +- while (compile_count >= N) { +- compile_count_cond.wait(guard); +- } ++ std::lock_guard guard(compile_count_mutex); + compile_count++; + } +- +- compiles.push_back(std::async(ggml_vk_create_pipeline_func, std::ref(device), std::ref(pipeline), spv_size, spv_data, entrypoint, +- parameter_count, wg_denoms, specialization_constants, disable_robustness, require_full_subgroups, required_subgroup_size)); ++ ggml_vk_create_pipeline_func(device, pipeline, spv_size, spv_data, entrypoint, ++ parameter_count, wg_denoms, specialization_constants, disable_robustness, require_full_subgroups, required_subgroup_size); + } + }; + +@@ -13428,20 +13423,28 @@ static bool ggml_backend_buffer_is_vk(ggml_backend_buffer_t buffer) { return buffer->buft->iface.get_name == ggml_backend_vk_buffer_type_name; } @@ -59,7 +86,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")"); if (tensor->view_src != nullptr) { GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft); -@@ -13449,7 +13457,7 @@ static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t +@@ -13449,7 +13452,7 @@ static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t return GGML_STATUS_SUCCESS; } @@ -68,7 +95,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_buffer_memset_tensor(" << buffer << ", " << tensor << ", " << value << ", " << offset << ", " << size << ")"); ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; vk_buffer buf = buf_ctx->dev_buffer; -@@ -13462,7 +13470,7 @@ static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, g +@@ -13462,7 +13465,7 @@ static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, g ggml_vk_buffer_memset(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, val32, size); } @@ -77,7 +104,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")"); ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; vk_buffer buf = buf_ctx->dev_buffer; -@@ -13474,7 +13482,7 @@ static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml +@@ -13474,7 +13477,7 @@ static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml ggml_vk_buffer_write(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size); } @@ -86,7 +113,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")"); ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; -@@ -13487,7 +13495,7 @@ static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, cons +@@ -13487,7 +13490,7 @@ static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, cons ggml_vk_buffer_read(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size); } @@ -95,7 +122,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu if (ggml_nbytes(src) == 0) { return true; } -@@ -13508,7 +13516,7 @@ static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, cons +@@ -13508,7 +13511,7 @@ static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, cons UNUSED(buffer); } @@ -104,7 +131,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context; ggml_vk_buffer_memset(ctx->dev_buffer, 0, value, buffer->size); -@@ -13524,16 +13532,17 @@ static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = { +@@ -13524,16 +13527,17 @@ static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = { /* .cpy_tensor = */ ggml_backend_vk_buffer_cpy_tensor, /* .clear = */ ggml_backend_vk_buffer_clear, /* .reset = */ NULL, @@ -124,7 +151,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_MEMORY("ggml_backend_vk_buffer_type_alloc_buffer(" << size << ")"); ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context; -@@ -13549,17 +13558,17 @@ static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer(ggml_backe +@@ -13549,17 +13553,17 @@ static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer(ggml_backe return ggml_backend_buffer_init(buft, ggml_backend_vk_buffer_interface, bufctx, size); } @@ -145,7 +172,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu return ggml_nbytes(tensor); UNUSED(buft); -@@ -13577,24 +13586,24 @@ ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num) { +@@ -13577,24 +13581,24 @@ ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num) { // host buffer type @@ -174,7 +201,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_MEMORY("ggml_backend_vk_host_buffer_type_alloc_buffer(" << size << ")"); size += 32; // Behave like the CPU buffer type -@@ -13610,19 +13619,20 @@ static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_ +@@ -13610,19 +13614,20 @@ static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size); buffer->buft = buft; buffer->iface.free_buffer = ggml_backend_vk_host_buffer_free_buffer; @@ -197,7 +224,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu return vk_instance.devices[0]->suballocation_block_size; UNUSED(buft); -@@ -13654,13 +13664,13 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { +@@ -13654,13 +13659,13 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { // backend @@ -213,7 +240,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; VK_LOG_DEBUG("ggml_backend_vk_free(" << ctx->name << ")"); -@@ -13676,7 +13686,7 @@ static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_type(ggml_b +@@ -13676,7 +13681,7 @@ static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_type(ggml_b return &ctx->device->buffer_type; } @@ -222,7 +249,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_set_tensor_async(" << size << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); -@@ -13723,7 +13733,7 @@ static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor +@@ -13723,7 +13728,7 @@ static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor } } @@ -231,7 +258,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_get_tensor_async(" << size << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); -@@ -13757,7 +13767,7 @@ static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_ +@@ -13757,7 +13762,7 @@ static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_ } } @@ -240,7 +267,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_cpy_tensor_async(" << src << " -> " << dst << ", size=" << ggml_nbytes(src) << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend_dst->context; -@@ -13882,7 +13892,7 @@ static void ggml_vk_synchronize(ggml_backend_vk_context * ctx) { +@@ -13882,7 +13887,7 @@ static void ggml_vk_synchronize(ggml_backend_vk_context * ctx) { } } @@ -249,7 +276,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_synchronize()"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; -@@ -14304,7 +14314,7 @@ static int32_t find_first_set(uint32_t x) { +@@ -14304,7 +14309,7 @@ static int32_t find_first_set(uint32_t x) { return ret; } @@ -258,7 +285,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; -@@ -14684,7 +14694,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg +@@ -14684,7 +14689,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg } // Sort the graph for improved parallelism. @@ -267,7 +294,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu { VK_LOG_DEBUG("ggml_vk_graph_optimize(" << graph->n_nodes << " nodes)"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; -@@ -14922,7 +14932,7 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * +@@ -14922,7 +14927,7 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * } } @@ -276,7 +303,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_event_record(backend=" << backend << ", event=" << event << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; vk_event *vkev = (vk_event *)event->context; -@@ -14960,7 +14970,7 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev +@@ -14960,7 +14965,7 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev ctx->compute_ctx.reset(); } @@ -285,7 +312,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_event_wait(backend=" << backend << ", event=" << event << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; vk_event *vkev = (vk_event *)event->context; -@@ -15055,7 +15065,10 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total +@@ -15055,7 +15060,10 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total *total += heap.size; if (membudget_supported && i < budgetprops.heapUsage.size()) { @@ -297,7 +324,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu } else { *free += heap.size; } -@@ -15123,38 +15136,38 @@ struct ggml_backend_vk_device_context { +@@ -15123,38 +15131,38 @@ struct ggml_backend_vk_device_context { int op_offload_min_batch_size; }; @@ -343,7 +370,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; props->name = ggml_backend_vk_device_get_name(dev); -@@ -15170,13 +15183,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml +@@ -15170,13 +15178,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml }; } @@ -359,7 +386,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; const vk_device& device = ggml_vk_get_device(ctx->device); -@@ -15714,7 +15727,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm +@@ -15714,7 +15722,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm UNUSED(dev); } @@ -368,7 +395,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu if (buft->iface.get_name != ggml_backend_vk_buffer_type_name) { return false; } -@@ -15740,13 +15753,13 @@ static int64_t ggml_vk_get_op_batch_size(const ggml_tensor * op) { +@@ -15740,13 +15748,13 @@ static int64_t ggml_vk_get_op_batch_size(const ggml_tensor * op) { } } @@ -384,7 +411,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; auto device = ggml_vk_get_device(ctx->device); -@@ -15769,7 +15782,7 @@ static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t +@@ -15769,7 +15777,7 @@ static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t }; } @@ -393,7 +420,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; auto device = ggml_vk_get_device(ctx->device); -@@ -15789,7 +15802,7 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe +@@ -15789,7 +15797,7 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe delete event; } @@ -402,7 +429,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_device_event_synchronize(backend=" << dev << ", event=" << event << ")"); ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; auto device = ggml_vk_get_device(ctx->device); -@@ -15846,7 +15859,7 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si +@@ -15846,7 +15854,7 @@ static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, si return buf; } @@ -411,7 +438,7 @@ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vu VK_LOG_DEBUG("ggml_backend_vk_device_buffer_from_host_ptr(backend=" << dev << ", ptr=" << ptr << ", size=" << size << ")"); GGML_UNUSED(max_tensor_size); -@@ -15884,17 +15897,17 @@ static const struct ggml_backend_device_i ggml_backend_vk_device_i = { +@@ -15884,17 +15892,17 @@ static const struct ggml_backend_device_i ggml_backend_vk_device_i = { /* .event_synchronize = */ ggml_backend_vk_device_event_synchronize, }; diff --git a/llamafile/cuda.bat b/llamafile/cuda.bat index 32055b9666..7168d9759c 100644 --- a/llamafile/cuda.bat +++ b/llamafile/cuda.bat @@ -36,6 +36,25 @@ echo Unknown option: %~1 exit /b 1 :done_args +:: -------- find Visual Studio / Build Tools -------- +where cl >nul 2>&1 +if errorlevel 1 ( + set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + if not exist "!VSWHERE!" ( + echo Error: cl.exe not found in PATH and vswhere.exe not found + echo Please run from a Visual Studio Developer Command Prompt + exit /b 1 + ) + for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do ( + set "VS_PATH=%%i" + ) + if not defined VS_PATH ( + echo Error: Visual Studio with C++ tools not found + exit /b 1 + ) + call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64 +) + set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp" set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda" set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src" @@ -115,7 +134,8 @@ if "%USE_CUBLAS%"=="0" set "COMMON_FLAGS=%COMMON_FLAGS% -I%BUILD_DIR%" set "COMMON_FLAGS=%COMMON_FLAGS% -I%GGML_INC_DIR% -I%GGML_SRC_DIR% -I%GGML_CUDA_DIR%" set "COMMON_FLAGS=%COMMON_FLAGS% --forward-unknown-to-host-compiler" set "COMMON_FLAGS=%COMMON_FLAGS% --std=c++17" -set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17"" +set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17 /Zc:preprocessor"" +set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221 -diag-suppress 550" set "COMMON_FLAGS=%COMMON_FLAGS% -DNDEBUG -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_BACKEND_SHARED=1 -DGGML_BACKEND_BUILD=1 -DGGML_MULTIPLATFORM" set "COMMON_FLAGS=%COMMON_FLAGS% %BLAS_DEFINE%" @@ -209,7 +229,7 @@ echo. :: -------- compile core GGML sources with host compiler -------- echo Compiling core GGML sources... -set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG" +set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\"" set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%"" diff --git a/llamafile/cuda.c b/llamafile/cuda.c index c6e4297ebd..e2fc017fa2 100644 --- a/llamafile/cuda.c +++ b/llamafile/cuda.c @@ -97,7 +97,8 @@ static bool LinkCuda(const char *dso) { void *lib = cosmo_dlopen(dso, RTLD_LAZY); if (!lib) { char *err = cosmo_dlerror(); - fprintf(stderr, "cuda: %s: failed to load library\n", err ? err : "unknown error"); + llamafile_info("cuda", "failed to load library %s: %s", + dso, err ? err : "unknown error"); return false; } @@ -142,7 +143,8 @@ static bool LinkCuda(const char *dso) { if (!ok) { char *err = cosmo_dlerror(); - fprintf(stderr, "cuda: %s: not all symbols could be imported\n", err ? err : "unknown error"); + llamafile_info("cuda", "could not import all symbols from %s: %s", + dso, err ? err : "unknown error"); memset(&g_cuda.backend_init, 0, sizeof(g_cuda.backend_init)); memset(&g_cuda.backend_reg, 0, sizeof(g_cuda.backend_reg)); memset(&g_cuda.get_device_count, 0, sizeof(g_cuda.get_device_count)); @@ -197,12 +199,10 @@ static bool ImportCudaImpl(void) { } // No pre-built DSO found - if (FLAG_verbose) { - fprintf(stderr, "cuda: no pre-built GPU library found\n"); - fprintf(stderr, "cuda: to enable GPU support, build with:\n"); - fprintf(stderr, "cuda: llamafile/cuda.sh (for NVIDIA)\n"); - fprintf(stderr, "cuda: llamafile/rocm.sh (for AMD)\n"); - } + llamafile_info("cuda", "no pre-built GPU library found"); + llamafile_info("cuda", "to enable GPU support, build with:"); + llamafile_info("cuda", " llamafile/cuda.sh (for NVIDIA)"); + llamafile_info("cuda", " llamafile/rocm.sh (for AMD)"); return false; RegisterBackend: @@ -225,9 +225,8 @@ static bool ImportCudaImpl(void) { reg = g_cuda.backend_reg.default_abi(); if (reg) { ggml_backend_register(reg); - if (FLAG_verbose) - fprintf(stderr, "cuda: %s backend registered with GGML\n", - g_cuda.is_amd ? "ROCm" : "CUDA"); + llamafile_info("cuda", "%s backend registered with GGML", + g_cuda.is_amd ? "ROCm" : "CUDA"); } } @@ -237,17 +236,15 @@ static bool ImportCudaImpl(void) { static void ImportCuda(void) { if (ImportCudaImpl()) { g_cuda.supported = true; - if (FLAG_verbose) { - fprintf(stderr, "cuda: %s GPU support successfully loaded\n", - g_cuda.is_amd ? "AMD ROCm" : "NVIDIA CUDA"); - if (g_cuda.get_device_count.default_abi || g_cuda.get_device_count.windows_abi) { - int count; - if (IsWindows()) - count = g_cuda.get_device_count.windows_abi(); - else - count = g_cuda.get_device_count.default_abi(); - fprintf(stderr, "cuda: found %d GPU device(s)\n", count); - } + llamafile_info("cuda", "%s GPU support successfully loaded", + g_cuda.is_amd ? "AMD ROCm" : "NVIDIA CUDA"); + if (g_cuda.get_device_count.default_abi || g_cuda.get_device_count.windows_abi) { + int count; + if (IsWindows()) + count = g_cuda.get_device_count.windows_abi(); + else + count = g_cuda.get_device_count.default_abi(); + llamafile_info("cuda", "found %d GPU device(s)", count); } } else if (FLAG_gpu == LLAMAFILE_GPU_NVIDIA || FLAG_gpu == LLAMAFILE_GPU_AMD) { fprintf(stderr, "fatal error: support for --gpu %s was explicitly requested, " diff --git a/llamafile/cuda_parallel.bat b/llamafile/cuda_parallel.bat index 67905b163c..50ed849326 100644 --- a/llamafile/cuda_parallel.bat +++ b/llamafile/cuda_parallel.bat @@ -47,6 +47,25 @@ echo Unknown option: %~1 exit /b 1 :done_args +:: -------- find Visual Studio / Build Tools -------- +where cl >nul 2>&1 +if errorlevel 1 ( + set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + if not exist "!VSWHERE!" ( + echo Error: cl.exe not found in PATH and vswhere.exe not found + echo Please run from a Visual Studio Developer Command Prompt + exit /b 1 + ) + for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do ( + set "VS_PATH=%%i" + ) + if not defined VS_PATH ( + echo Error: Visual Studio with C++ tools not found + exit /b 1 + ) + call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64 +) + set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp" set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda" set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src" @@ -137,8 +156,8 @@ if "%USE_CUBLAS%"=="0" set "COMMON_FLAGS=%COMMON_FLAGS% -I%BUILD_DIR%" set "COMMON_FLAGS=%COMMON_FLAGS% -I%GGML_INC_DIR% -I%GGML_SRC_DIR% -I%GGML_CUDA_DIR%" set "COMMON_FLAGS=%COMMON_FLAGS% --forward-unknown-to-host-compiler" set "COMMON_FLAGS=%COMMON_FLAGS% --std=c++17" -set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17"" -set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221" +set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17 /Zc:preprocessor"" +set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221 -diag-suppress 550" set "COMMON_FLAGS=%COMMON_FLAGS% -DNDEBUG -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_BACKEND_SHARED=1 -DGGML_BACKEND_BUILD=1 -DGGML_MULTIPLATFORM" set "COMMON_FLAGS=%COMMON_FLAGS% %BLAS_DEFINE%" @@ -229,7 +248,7 @@ echo. :: -------- compile core GGML sources with host compiler -------- echo Compiling core GGML sources... -set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG" +set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\"" set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%"" diff --git a/llamafile/llamafile.c b/llamafile/llamafile.c index 348045e213..d29e551c24 100644 --- a/llamafile/llamafile.c +++ b/llamafile/llamafile.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -730,9 +731,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name, break; } + llamafile_info(backend_name, "probing library %s (bundled)", extracted); if (link_fn(extracted)) { - if (FLAG_verbose) - fprintf(stderr, "%s: loaded bundled %s\n", backend_name, name); + llamafile_info(backend_name, "loaded bundled library %s", extracted); return true; } } @@ -741,9 +742,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name, llamafile_get_app_dir(app_dir, PATH_MAX); snprintf(dso, PATH_MAX, "%s%s", app_dir, name); if (llamafile_file_exists(dso)) { + llamafile_info(backend_name, "probing library %s (app directory)", dso); if (link_fn(dso)) { - if (FLAG_verbose) - fprintf(stderr, "%s: loaded %s from app directory\n", backend_name, name); + llamafile_info(backend_name, "loaded library %s from app directory", dso); return true; } } @@ -753,9 +754,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name, if (home && *home) { snprintf(dso, PATH_MAX, "%s/%s", home, name); if (llamafile_file_exists(dso)) { + llamafile_info(backend_name, "probing library %s (home directory)", dso); if (link_fn(dso)) { - if (FLAG_verbose) - fprintf(stderr, "%s: loaded %s from home directory\n", backend_name, name); + llamafile_info(backend_name, "loaded library %s from home directory", dso); return true; } } @@ -774,6 +775,17 @@ void llamafile_log_callback_null(int level, const char *text, void *user_data) { (void)user_data; } +void llamafile_info(const char *backend, const char *fmt, ...) { + if (!FLAG_verbose) + return; + fprintf(stderr, "%s: INFO: ", backend); + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); +} + // ============================================================================== // GPU support // ============================================================================== diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index 534c0f1a4f..b3b45410c5 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -120,6 +120,13 @@ typedef void (*llamafile_log_callback)(int level, const char *text, void *user_d // No-op log callback to disable logging (defined in llamafile.c) void llamafile_log_callback_null(int level, const char *text, void *user_data); +// Print an INFO-level diagnostic tagged with a backend name. +// No-op unless FLAG_verbose is set. Adds the ": INFO: " prefix +// and a trailing newline, so callers pass only the message body. +// Defined in llamafile.c. +void llamafile_info(const char *backend, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + // Set logging callback for Metal dylib (defined in metal.c) // Pass a no-op callback to disable logging void llamafile_metal_log_set(llamafile_log_callback log_callback, void *user_data); diff --git a/llamafile/metal.c b/llamafile/metal.c index 68ae1fa397..778ef1b02f 100644 --- a/llamafile/metal.c +++ b/llamafile/metal.c @@ -276,8 +276,7 @@ static bool PreprocessMetalShader(const char *app_dir) { free(impl_content); free(metal_content); - if (FLAG_verbose) - fprintf(stderr, "metal: preprocessed %s\n", metal_path); + llamafile_info("metal", "preprocessed %s", metal_path); return true; } @@ -293,8 +292,7 @@ static bool BuildMetal(const char *dso) { // Since we use versioned paths, source updates come with new versions struct stat dso_stat; if (stat(dso, &dso_stat) == 0 && !FLAG_recompile) { - if (FLAG_verbose) - fprintf(stderr, "metal: using cached %s\n", dso); + llamafile_info("metal", "using cached %s", dso); return true; } @@ -359,8 +357,7 @@ static bool BuildMetal(const char *dso) { // Compile dynamic shared object if (needs_rebuild || FLAG_recompile) { - if (FLAG_verbose) - fprintf(stderr, "metal: building ggml-metal.dylib with xcode...\n"); + llamafile_info("metal", "building ggml-metal.dylib with xcode..."); char tmpdso[PATH_MAX]; snprintf(tmpdso, PATH_MAX, "%s.XXXXXX", dso); @@ -435,10 +432,12 @@ static bool BuildMetal(const char *dso) { args[argc] = NULL; if (FLAG_verbose) { - fprintf(stderr, "metal: executing: cc"); - for (int j = 1; args[j]; j++) - fprintf(stderr, " %s", args[j]); - fprintf(stderr, "\n"); + char cmd[4096]; + size_t off = 0; + off += snprintf(cmd + off, sizeof(cmd) - off, "executing: cc"); + for (int j = 1; args[j] && off < sizeof(cmd); j++) + off += snprintf(cmd + off, sizeof(cmd) - off, " %s", args[j]); + llamafile_info("metal", "%s", cmd); } int pid, ws; @@ -499,10 +498,12 @@ static bool BuildMetal(const char *dso) { args[argc] = NULL; if (FLAG_verbose) { - fprintf(stderr, "metal: executing: cc"); - for (int j = 1; args[j]; j++) - fprintf(stderr, " %s", args[j]); - fprintf(stderr, "\n"); + char cmd[4096]; + size_t off = 0; + off += snprintf(cmd + off, sizeof(cmd) - off, "executing: cc"); + for (int j = 1; args[j] && off < sizeof(cmd); j++) + off += snprintf(cmd + off, sizeof(cmd) - off, " %s", args[j]); + llamafile_info("metal", "%s", cmd); } int pid, ws; @@ -538,8 +539,7 @@ static bool BuildMetal(const char *dso) { return false; } - if (FLAG_verbose) - fprintf(stderr, "metal: successfully built %s\n", dso); + llamafile_info("metal", "successfully built %s", dso); } return true; @@ -550,7 +550,8 @@ static bool LinkMetal(const char *dso) { void *lib = cosmo_dlopen(dso, RTLD_LAZY); if (!lib) { char *err = cosmo_dlerror(); - fprintf(stderr, "metal: %s: failed to load library\n", err ? err : "unknown error"); + llamafile_info("metal", "failed to load library %s: %s", + dso, err ? err : "unknown error"); return false; } @@ -570,7 +571,8 @@ static bool LinkMetal(const char *dso) { if (!ok) { char *err = cosmo_dlerror(); - fprintf(stderr, "metal: %s: not all symbols could be imported\n", err ? err : "unknown error"); + llamafile_info("metal", "could not import all symbols from %s: %s", + dso, err ? err : "unknown error"); cosmo_dlclose(lib); return false; } @@ -617,8 +619,7 @@ static bool ImportMetalImpl(void) { ggml_backend_reg_t reg = g_metal.backend_metal_reg(); if (reg) { ggml_backend_register(reg); - if (FLAG_verbose) - fprintf(stderr, "metal: Metal backend registered with GGML\n"); + llamafile_info("metal", "Metal backend registered with GGML"); } } return true; @@ -630,8 +631,7 @@ static bool ImportMetalImpl(void) { static void ImportMetal(void) { if (ImportMetalImpl()) { g_metal.supported = true; - if (FLAG_verbose) - fprintf(stderr, "metal: Apple Metal GPU support successfully loaded\n"); + llamafile_info("metal", "Apple Metal GPU support successfully loaded"); } else if (FLAG_gpu == LLAMAFILE_GPU_APPLE) { fprintf(stderr, "fatal error: support for --gpu %s was explicitly requested, " "but it wasn't available\n", llamafile_describe_gpu()); diff --git a/llamafile/rocm.bat b/llamafile/rocm.bat index db51fe2a97..c136a82a2b 100644 --- a/llamafile/rocm.bat +++ b/llamafile/rocm.bat @@ -33,6 +33,25 @@ echo Unknown option: %~1 exit /b 1 :done_args +:: -------- find Visual Studio / Build Tools -------- +where cl >nul 2>&1 +if errorlevel 1 ( + set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + if not exist "!VSWHERE!" ( + echo Error: cl.exe not found in PATH and vswhere.exe not found + echo Please run from a Visual Studio Developer Command Prompt + exit /b 1 + ) + for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do ( + set "VS_PATH=%%i" + ) + if not defined VS_PATH ( + echo Error: Visual Studio with C++ tools not found + exit /b 1 + ) + call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64 +) + set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp" set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda" set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src" @@ -196,7 +215,7 @@ echo. :: -------- compile core GGML sources with host compiler -------- echo Compiling core GGML sources... -set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG" +set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\"" set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%"" diff --git a/llamafile/rocm_parallel.bat b/llamafile/rocm_parallel.bat index fdeda1ffcb..ee1c441cb2 100644 --- a/llamafile/rocm_parallel.bat +++ b/llamafile/rocm_parallel.bat @@ -44,6 +44,25 @@ echo Unknown option: %~1 exit /b 1 :done_args +:: -------- find Visual Studio / Build Tools -------- +where cl >nul 2>&1 +if errorlevel 1 ( + set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + if not exist "!VSWHERE!" ( + echo Error: cl.exe not found in PATH and vswhere.exe not found + echo Please run from a Visual Studio Developer Command Prompt + exit /b 1 + ) + for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do ( + set "VS_PATH=%%i" + ) + if not defined VS_PATH ( + echo Error: Visual Studio with C++ tools not found + exit /b 1 + ) + call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64 +) + set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp" set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda" set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src" @@ -215,7 +234,7 @@ echo. :: -------- compile core GGML sources with host compiler -------- echo Compiling core GGML sources... -set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG" +set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\"" set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%"" diff --git a/llamafile/vulkan.bat b/llamafile/vulkan.bat index 8feead1546..4cb9d19d8f 100644 --- a/llamafile/vulkan.bat +++ b/llamafile/vulkan.bat @@ -93,12 +93,23 @@ if not exist "%VULKAN_SDK%\Lib\vulkan-1.lib" ( exit /b 1 ) -:: -------- check MSVC -------- +:: -------- find Visual Studio / Build Tools -------- where cl >nul 2>&1 if errorlevel 1 ( - echo Error: cl.exe not found in PATH - echo Please run from a Visual Studio Developer Command Prompt - exit /b 1 + set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + if not exist "!VSWHERE!" ( + echo Error: cl.exe not found in PATH and vswhere.exe not found + echo Please run from a Visual Studio Developer Command Prompt + exit /b 1 + ) + for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do ( + set "VS_PATH=%%i" + ) + if not defined VS_PATH ( + echo Error: Visual Studio with C++ tools not found + exit /b 1 + ) + call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64 ) :: -------- build parallel job runner -------- @@ -210,7 +221,7 @@ echo. :: ======================================================================== echo Phase 4: Compiling shader C++ files... -set "CXX_FLAGS=/c /nologo /EHsc /O2 /GR /MT /std:c++17" +set "CXX_FLAGS=/c /nologo /EHsc /O2 /GR /MT /std:c++17 /Zc:preprocessor" set "CXX_FLAGS=%CXX_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%" /I"%BUILD_DIR%"" set "CXX_FLAGS=%CXX_FLAGS% /DNDEBUG /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" @@ -261,7 +272,7 @@ echo. :: ======================================================================== echo Phase 6: Compiling core GGML sources... -set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG" +set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM" set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\"" set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%"" diff --git a/llamafile/vulkan.c b/llamafile/vulkan.c index 386657d4d8..91d4a2fcdb 100644 --- a/llamafile/vulkan.c +++ b/llamafile/vulkan.c @@ -93,7 +93,8 @@ static bool LinkVulkan(const char *dso) { void *lib = cosmo_dlopen(dso, RTLD_LAZY); if (!lib) { char *err = cosmo_dlerror(); - fprintf(stderr, "vulkan: %s: failed to load library\n", err ? err : "unknown error"); + llamafile_info("vulkan", "failed to load library %s: %s", + dso, err ? err : "unknown error"); return false; } @@ -138,7 +139,8 @@ static bool LinkVulkan(const char *dso) { if (!ok) { char *err = cosmo_dlerror(); - fprintf(stderr, "vulkan: %s: not all symbols could be imported\n", err ? err : "unknown error"); + llamafile_info("vulkan", "could not import all symbols from %s: %s", + dso, err ? err : "unknown error"); memset(&g_vulkan.backend_init, 0, sizeof(g_vulkan.backend_init)); memset(&g_vulkan.backend_reg, 0, sizeof(g_vulkan.backend_reg)); memset(&g_vulkan.get_device_count, 0, sizeof(g_vulkan.get_device_count)); @@ -173,11 +175,9 @@ static bool ImportVulkanImpl(void) { // Try to load pre-built DSO if (!llamafile_try_load_prebuilt_dso(vulkan_dso, "vulkan", LinkVulkan)) { // No pre-built DSO found - if (FLAG_verbose) { - fprintf(stderr, "vulkan: no pre-built GPU library found\n"); - fprintf(stderr, "vulkan: to enable Vulkan support, build with:\n"); - fprintf(stderr, "vulkan: llamafile/vulkan.sh\n"); - } + llamafile_info("vulkan", "no pre-built GPU library found"); + llamafile_info("vulkan", "to enable Vulkan support, build with:"); + llamafile_info("vulkan", " llamafile/vulkan.sh"); return false; } @@ -200,8 +200,7 @@ static bool ImportVulkanImpl(void) { reg = g_vulkan.backend_reg.default_abi(); if (reg) { ggml_backend_register(reg); - if (FLAG_verbose) - fprintf(stderr, "vulkan: Vulkan backend registered with GGML\n"); + llamafile_info("vulkan", "Vulkan backend registered with GGML"); } } @@ -211,16 +210,14 @@ static bool ImportVulkanImpl(void) { static void ImportVulkan(void) { if (ImportVulkanImpl()) { g_vulkan.supported = true; - if (FLAG_verbose) { - fprintf(stderr, "vulkan: Vulkan GPU support successfully loaded\n"); - if (g_vulkan.get_device_count.default_abi || g_vulkan.get_device_count.windows_abi) { - int count; - if (IsWindows()) - count = g_vulkan.get_device_count.windows_abi(); - else - count = g_vulkan.get_device_count.default_abi(); - fprintf(stderr, "vulkan: found %d GPU device(s)\n", count); - } + llamafile_info("vulkan", "Vulkan GPU support successfully loaded"); + if (g_vulkan.get_device_count.default_abi || g_vulkan.get_device_count.windows_abi) { + int count; + if (IsWindows()) + count = g_vulkan.get_device_count.windows_abi(); + else + count = g_vulkan.get_device_count.default_abi(); + llamafile_info("vulkan", "found %d GPU device(s)", count); } } else if (FLAG_gpu == LLAMAFILE_GPU_VULKAN) { fprintf(stderr, "fatal error: support for --gpu vulkan was explicitly requested, "