Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions docs/building_dlls.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,16 @@ make setup
# In the Windows terminal

After the repo is set up, you can build the cuda / rocm / vulkan DLLs as follows.
The .bat files to run the builds are in the `llamafile` directory and accept the following
parameters:

- from powershell, open a Visual Studio 2022 Developer Command Prompt
```
cmd /k "`"C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`" x64"
```
- `--clean` to restart a build from scratch
- `--output` to provide a custom output filename for the dll (default is ggml-xxxx.dll in the current directory
for xxxx in (cuda, rocm, vulkan)
- only for the cuda libraries, you also have the `--cublas` option to link the library against NVIDIA's cublas instead of tinyblas

Also note that for cuda and rocm libraries there are `*_parallel.bat` scripts that should work faster
by parallelizing compilation and taking advantage of your compute. Here's how you call the build scripts:

- cd to the llamafile dir and start CUDA parallel build (this will run for a while...)
```
Expand Down

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions llamafile/cuda.bat
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,25 @@ echo Unknown option: %~1
exit /b 1
:done_args

:: -------- find Visual Studio / Build Tools --------
where cl >nul 2>&1
if errorlevel 1 (
set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
if not exist "!VSWHERE!" (
echo Error: cl.exe not found in PATH and vswhere.exe not found
echo Please run from a Visual Studio Developer Command Prompt
exit /b 1
)
for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do (
set "VS_PATH=%%i"
)
if not defined VS_PATH (
echo Error: Visual Studio with C++ tools not found
exit /b 1
)
call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64
)

set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp"
set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda"
set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src"
Expand Down Expand Up @@ -115,7 +134,8 @@ if "%USE_CUBLAS%"=="0" set "COMMON_FLAGS=%COMMON_FLAGS% -I%BUILD_DIR%"
set "COMMON_FLAGS=%COMMON_FLAGS% -I%GGML_INC_DIR% -I%GGML_SRC_DIR% -I%GGML_CUDA_DIR%"
set "COMMON_FLAGS=%COMMON_FLAGS% --forward-unknown-to-host-compiler"
set "COMMON_FLAGS=%COMMON_FLAGS% --std=c++17"
set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17""
set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17 /Zc:preprocessor""
set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221 -diag-suppress 550"
set "COMMON_FLAGS=%COMMON_FLAGS% -DNDEBUG -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_BACKEND_SHARED=1 -DGGML_BACKEND_BUILD=1 -DGGML_MULTIPLATFORM"
set "COMMON_FLAGS=%COMMON_FLAGS% %BLAS_DEFINE%"

Expand Down Expand Up @@ -209,7 +229,7 @@ echo.
:: -------- compile core GGML sources with host compiler --------
echo Compiling core GGML sources...

set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG"
set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG"
set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM"
set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\""
set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%""
Expand Down
41 changes: 19 additions & 22 deletions llamafile/cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ static bool LinkCuda(const char *dso) {
void *lib = cosmo_dlopen(dso, RTLD_LAZY);
if (!lib) {
char *err = cosmo_dlerror();
fprintf(stderr, "cuda: %s: failed to load library\n", err ? err : "unknown error");
llamafile_info("cuda", "failed to load library %s: %s",
dso, err ? err : "unknown error");
return false;
}

Expand Down Expand Up @@ -142,7 +143,8 @@ static bool LinkCuda(const char *dso) {

if (!ok) {
char *err = cosmo_dlerror();
fprintf(stderr, "cuda: %s: not all symbols could be imported\n", err ? err : "unknown error");
llamafile_info("cuda", "could not import all symbols from %s: %s",
dso, err ? err : "unknown error");
memset(&g_cuda.backend_init, 0, sizeof(g_cuda.backend_init));
memset(&g_cuda.backend_reg, 0, sizeof(g_cuda.backend_reg));
memset(&g_cuda.get_device_count, 0, sizeof(g_cuda.get_device_count));
Expand Down Expand Up @@ -197,12 +199,10 @@ static bool ImportCudaImpl(void) {
}

// No pre-built DSO found
if (FLAG_verbose) {
fprintf(stderr, "cuda: no pre-built GPU library found\n");
fprintf(stderr, "cuda: to enable GPU support, build with:\n");
fprintf(stderr, "cuda: llamafile/cuda.sh (for NVIDIA)\n");
fprintf(stderr, "cuda: llamafile/rocm.sh (for AMD)\n");
}
llamafile_info("cuda", "no pre-built GPU library found");
llamafile_info("cuda", "to enable GPU support, build with:");
llamafile_info("cuda", " llamafile/cuda.sh (for NVIDIA)");
llamafile_info("cuda", " llamafile/rocm.sh (for AMD)");
return false;

RegisterBackend:
Expand All @@ -225,9 +225,8 @@ static bool ImportCudaImpl(void) {
reg = g_cuda.backend_reg.default_abi();
if (reg) {
ggml_backend_register(reg);
if (FLAG_verbose)
fprintf(stderr, "cuda: %s backend registered with GGML\n",
g_cuda.is_amd ? "ROCm" : "CUDA");
llamafile_info("cuda", "%s backend registered with GGML",
g_cuda.is_amd ? "ROCm" : "CUDA");
}
}

Expand All @@ -237,17 +236,15 @@ static bool ImportCudaImpl(void) {
static void ImportCuda(void) {
if (ImportCudaImpl()) {
g_cuda.supported = true;
if (FLAG_verbose) {
fprintf(stderr, "cuda: %s GPU support successfully loaded\n",
g_cuda.is_amd ? "AMD ROCm" : "NVIDIA CUDA");
if (g_cuda.get_device_count.default_abi || g_cuda.get_device_count.windows_abi) {
int count;
if (IsWindows())
count = g_cuda.get_device_count.windows_abi();
else
count = g_cuda.get_device_count.default_abi();
fprintf(stderr, "cuda: found %d GPU device(s)\n", count);
}
llamafile_info("cuda", "%s GPU support successfully loaded",
g_cuda.is_amd ? "AMD ROCm" : "NVIDIA CUDA");
if (g_cuda.get_device_count.default_abi || g_cuda.get_device_count.windows_abi) {
int count;
if (IsWindows())
count = g_cuda.get_device_count.windows_abi();
else
count = g_cuda.get_device_count.default_abi();
llamafile_info("cuda", "found %d GPU device(s)", count);
}
} else if (FLAG_gpu == LLAMAFILE_GPU_NVIDIA || FLAG_gpu == LLAMAFILE_GPU_AMD) {
fprintf(stderr, "fatal error: support for --gpu %s was explicitly requested, "
Expand Down
25 changes: 22 additions & 3 deletions llamafile/cuda_parallel.bat
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@ echo Unknown option: %~1
exit /b 1
:done_args

:: -------- find Visual Studio / Build Tools --------
where cl >nul 2>&1
if errorlevel 1 (
set "VSWHERE=%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
if not exist "!VSWHERE!" (
echo Error: cl.exe not found in PATH and vswhere.exe not found
echo Please run from a Visual Studio Developer Command Prompt
exit /b 1
)
for /f "usebackq tokens=*" %%i in (`"!VSWHERE!" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do (
set "VS_PATH=%%i"
)
if not defined VS_PATH (
echo Error: Visual Studio with C++ tools not found
exit /b 1
)
call "!VS_PATH!\VC\Auxiliary\Build\vcvarsall.bat" x64
)

set "LLAMA_CPP_DIR=%REPO_DIR%\llama.cpp"
set "GGML_CUDA_DIR=%LLAMA_CPP_DIR%\ggml\src\ggml-cuda"
set "GGML_SRC_DIR=%LLAMA_CPP_DIR%\ggml\src"
Expand Down Expand Up @@ -137,8 +156,8 @@ if "%USE_CUBLAS%"=="0" set "COMMON_FLAGS=%COMMON_FLAGS% -I%BUILD_DIR%"
set "COMMON_FLAGS=%COMMON_FLAGS% -I%GGML_INC_DIR% -I%GGML_SRC_DIR% -I%GGML_CUDA_DIR%"
set "COMMON_FLAGS=%COMMON_FLAGS% --forward-unknown-to-host-compiler"
set "COMMON_FLAGS=%COMMON_FLAGS% --std=c++17"
set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17""
set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221"
set "COMMON_FLAGS=%COMMON_FLAGS% -Xcompiler="/nologo /EHsc /O2 /GR /MT /std:c++17 /Zc:preprocessor""
set "COMMON_FLAGS=%COMMON_FLAGS% -diag-suppress 177 -diag-suppress 221 -diag-suppress 550"
set "COMMON_FLAGS=%COMMON_FLAGS% -DNDEBUG -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_BACKEND_SHARED=1 -DGGML_BACKEND_BUILD=1 -DGGML_MULTIPLATFORM"
set "COMMON_FLAGS=%COMMON_FLAGS% %BLAS_DEFINE%"

Expand Down Expand Up @@ -229,7 +248,7 @@ echo.
:: -------- compile core GGML sources with host compiler --------
echo Compiling core GGML sources...

set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /DNDEBUG"
set "HOST_FLAGS=/nologo /EHsc /O2 /GR /MT /Zc:preprocessor /DNDEBUG"
set "HOST_FLAGS=%HOST_FLAGS% /DGGML_BUILD=1 /DGGML_SHARED=1 /DGGML_BACKEND_SHARED=1 /DGGML_BACKEND_BUILD=1 /DGGML_MULTIPLATFORM"
set "HOST_FLAGS=%HOST_FLAGS% /DGGML_VERSION=\"!GGML_VERSION!\" /DGGML_COMMIT=\"!GGML_COMMIT!\""
set "HOST_FLAGS=%HOST_FLAGS% /I"%GGML_INC_DIR%" /I"%GGML_SRC_DIR%""
Expand Down
24 changes: 18 additions & 6 deletions llamafile/llamafile.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <limits.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -730,9 +731,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name,
break;
}

llamafile_info(backend_name, "probing library %s (bundled)", extracted);
if (link_fn(extracted)) {
if (FLAG_verbose)
fprintf(stderr, "%s: loaded bundled %s\n", backend_name, name);
llamafile_info(backend_name, "loaded bundled library %s", extracted);
return true;
}
}
Expand All @@ -741,9 +742,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name,
llamafile_get_app_dir(app_dir, PATH_MAX);
snprintf(dso, PATH_MAX, "%s%s", app_dir, name);
if (llamafile_file_exists(dso)) {
llamafile_info(backend_name, "probing library %s (app directory)", dso);
if (link_fn(dso)) {
if (FLAG_verbose)
fprintf(stderr, "%s: loaded %s from app directory\n", backend_name, name);
llamafile_info(backend_name, "loaded library %s from app directory", dso);
return true;
}
}
Expand All @@ -753,9 +754,9 @@ bool llamafile_try_load_prebuilt_dso(const char *name, const char *backend_name,
if (home && *home) {
snprintf(dso, PATH_MAX, "%s/%s", home, name);
if (llamafile_file_exists(dso)) {
llamafile_info(backend_name, "probing library %s (home directory)", dso);
if (link_fn(dso)) {
if (FLAG_verbose)
fprintf(stderr, "%s: loaded %s from home directory\n", backend_name, name);
llamafile_info(backend_name, "loaded library %s from home directory", dso);
return true;
}
}
Expand All @@ -774,6 +775,17 @@ void llamafile_log_callback_null(int level, const char *text, void *user_data) {
(void)user_data;
}

void llamafile_info(const char *backend, const char *fmt, ...) {
if (!FLAG_verbose)
return;
fprintf(stderr, "%s: INFO: ", backend);
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
fputc('\n', stderr);
}

// ==============================================================================
// GPU support
// ==============================================================================
Expand Down
7 changes: 7 additions & 0 deletions llamafile/llamafile.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ typedef void (*llamafile_log_callback)(int level, const char *text, void *user_d
// No-op log callback to disable logging (defined in llamafile.c)
void llamafile_log_callback_null(int level, const char *text, void *user_data);

// Print an INFO-level diagnostic tagged with a backend name.
// No-op unless FLAG_verbose is set. Adds the "<backend>: INFO: " prefix
// and a trailing newline, so callers pass only the message body.
// Defined in llamafile.c.
void llamafile_info(const char *backend, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));

// Set logging callback for Metal dylib (defined in metal.c)
// Pass a no-op callback to disable logging
void llamafile_metal_log_set(llamafile_log_callback log_callback, void *user_data);
Expand Down
44 changes: 22 additions & 22 deletions llamafile/metal.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,7 @@ static bool PreprocessMetalShader(const char *app_dir) {
free(impl_content);
free(metal_content);

if (FLAG_verbose)
fprintf(stderr, "metal: preprocessed %s\n", metal_path);
llamafile_info("metal", "preprocessed %s", metal_path);

return true;
}
Expand All @@ -293,8 +292,7 @@ static bool BuildMetal(const char *dso) {
// Since we use versioned paths, source updates come with new versions
struct stat dso_stat;
if (stat(dso, &dso_stat) == 0 && !FLAG_recompile) {
if (FLAG_verbose)
fprintf(stderr, "metal: using cached %s\n", dso);
llamafile_info("metal", "using cached %s", dso);
return true;
}

Expand Down Expand Up @@ -359,8 +357,7 @@ static bool BuildMetal(const char *dso) {

// Compile dynamic shared object
if (needs_rebuild || FLAG_recompile) {
if (FLAG_verbose)
fprintf(stderr, "metal: building ggml-metal.dylib with xcode...\n");
llamafile_info("metal", "building ggml-metal.dylib with xcode...");

char tmpdso[PATH_MAX];
snprintf(tmpdso, PATH_MAX, "%s.XXXXXX", dso);
Expand Down Expand Up @@ -435,10 +432,12 @@ static bool BuildMetal(const char *dso) {
args[argc] = NULL;

if (FLAG_verbose) {
fprintf(stderr, "metal: executing: cc");
for (int j = 1; args[j]; j++)
fprintf(stderr, " %s", args[j]);
fprintf(stderr, "\n");
char cmd[4096];
size_t off = 0;
off += snprintf(cmd + off, sizeof(cmd) - off, "executing: cc");
for (int j = 1; args[j] && off < sizeof(cmd); j++)
off += snprintf(cmd + off, sizeof(cmd) - off, " %s", args[j]);
llamafile_info("metal", "%s", cmd);
}

int pid, ws;
Expand Down Expand Up @@ -499,10 +498,12 @@ static bool BuildMetal(const char *dso) {
args[argc] = NULL;

if (FLAG_verbose) {
fprintf(stderr, "metal: executing: cc");
for (int j = 1; args[j]; j++)
fprintf(stderr, " %s", args[j]);
fprintf(stderr, "\n");
char cmd[4096];
size_t off = 0;
off += snprintf(cmd + off, sizeof(cmd) - off, "executing: cc");
for (int j = 1; args[j] && off < sizeof(cmd); j++)
off += snprintf(cmd + off, sizeof(cmd) - off, " %s", args[j]);
llamafile_info("metal", "%s", cmd);
}

int pid, ws;
Expand Down Expand Up @@ -538,8 +539,7 @@ static bool BuildMetal(const char *dso) {
return false;
}

if (FLAG_verbose)
fprintf(stderr, "metal: successfully built %s\n", dso);
llamafile_info("metal", "successfully built %s", dso);
}

return true;
Expand All @@ -550,7 +550,8 @@ static bool LinkMetal(const char *dso) {
void *lib = cosmo_dlopen(dso, RTLD_LAZY);
if (!lib) {
char *err = cosmo_dlerror();
fprintf(stderr, "metal: %s: failed to load library\n", err ? err : "unknown error");
llamafile_info("metal", "failed to load library %s: %s",
dso, err ? err : "unknown error");
return false;
}

Expand All @@ -570,7 +571,8 @@ static bool LinkMetal(const char *dso) {

if (!ok) {
char *err = cosmo_dlerror();
fprintf(stderr, "metal: %s: not all symbols could be imported\n", err ? err : "unknown error");
llamafile_info("metal", "could not import all symbols from %s: %s",
dso, err ? err : "unknown error");
cosmo_dlclose(lib);
return false;
}
Expand Down Expand Up @@ -617,8 +619,7 @@ static bool ImportMetalImpl(void) {
ggml_backend_reg_t reg = g_metal.backend_metal_reg();
if (reg) {
ggml_backend_register(reg);
if (FLAG_verbose)
fprintf(stderr, "metal: Metal backend registered with GGML\n");
llamafile_info("metal", "Metal backend registered with GGML");
}
}
return true;
Expand All @@ -630,8 +631,7 @@ static bool ImportMetalImpl(void) {
static void ImportMetal(void) {
if (ImportMetalImpl()) {
g_metal.supported = true;
if (FLAG_verbose)
fprintf(stderr, "metal: Apple Metal GPU support successfully loaded\n");
llamafile_info("metal", "Apple Metal GPU support successfully loaded");
} else if (FLAG_gpu == LLAMAFILE_GPU_APPLE) {
fprintf(stderr, "fatal error: support for --gpu %s was explicitly requested, "
"but it wasn't available\n", llamafile_describe_gpu());
Expand Down
Loading
Loading