Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions diffusionfile/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ DIFFUSIONFILE_CPPFLAGS := \

DIFFUSIONFILE_LLAMAFILE_OBJS := \
o/$(MODE)/llamafile/llamafile.o \
o/$(MODE)/llamafile/metal.o \
o/$(MODE)/llamafile/cuda.o \
o/$(MODE)/llamafile/vulkan.o \
o/$(MODE)/llamafile/gpu.a \
o/$(MODE)/llamafile/zip.o \
o/$(MODE)/llamafile/check_cpu.o

Expand Down
6 changes: 5 additions & 1 deletion docs/skills/llamafile/SKILL.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
name: llamafile
description: This skill should be used when the user asks to "build llamafile", "rebuild llamafile", "run llamafile", "run llamafile tests", "debug llamafile", "set up llamafile", "update patches", "fix patch conflict", "update llama.cpp", "pull latest llama.cpp", "sync upstream llama.cpp", "reset submodules", "write a test for llamafile", "how does llamafile work", "llamafile architecture", or needs guidance on the llamafile build system, patch workflow, submodule integration, cosmocc toolchain, or development practices.
version: 0.1.3
version: 0.1.4
---

# Llamafile Development Guide
Expand Down Expand Up @@ -120,6 +120,10 @@ Outputs: `o/$(MODE)/package/file.o`

Binaries include both x86_64 and aarch64 code paths with runtime CPU feature detection (AVX, AVX2, AVX-512, ARM NEON).

### GPU Backend Loaders

Dynamically-loaded backends that export the ggml C ABI — CUDA, ROCm, Vulkan — all go through the shared probe core in `llamafile/gpu_backend.c`. Each is just a `GpuBackendDesc` + a link thunk; the core does load → log-suppress → **device-count gate** (reject 0-device DSOs so AUTO falls back) → register, with a SIGSEGV/SIGABRT crash guard around the foreign probe call (driver init can fault across the cosmo/ms_abi boundary — issue #988). Metal stays separate by design (runtime-compiled, no ms_abi split, no device gate). When adding/changing a backend: route it through the core, keep the gate, and add a case to `tests/gpu_backend_test.cpp`. A more detailed design doc lives separately.

## Main Executables

After building, find binaries in `o/$(MODE)/`:
Expand Down
4 changes: 1 addition & 3 deletions llama.cpp.patches/llamafile-files/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -426,9 +426,7 @@ UI_GEN_OBJ := $(UI_CPP_GEN:%.cpp=%.cpp.o)
# llamafile objects are used to add dynamic GPU support (Metal, CUDA, ROCm, Vulkan)
TOOL_LLAMAFILE_OBJS := \
o/$(MODE)/llamafile/llamafile.o \
o/$(MODE)/llamafile/metal.o \
o/$(MODE)/llamafile/cuda.o \
o/$(MODE)/llamafile/vulkan.o \
o/$(MODE)/llamafile/gpu.a \
o/$(MODE)/llamafile/zip.o

# Server objects depend on the llamafile bridge header and on the
Expand Down
19 changes: 19 additions & 0 deletions llamafile/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ LLAMAFILE_HIGHLIGHT_SRCS := \
LLAMAFILE_SRCS_C := \
llamafile/bestline.c \
llamafile/cuda.c \
llamafile/gpu_backend.c \
llamafile/llamafile.c \
llamafile/metal.c \
llamafile/vulkan.c \
Expand Down Expand Up @@ -334,6 +335,24 @@ o/$(MODE)/llamafile/%.o: llamafile/%.c
@mkdir -p $(@D)
$(CC) $(CFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<

# ==============================================================================
# GPU backend archive
# ==============================================================================
# Single linkable unit grouping the runtime GPU loaders (CUDA/ROCm, Vulkan,
# Metal) and their shared probe core. The non-llamafile executables
# (whisperfile, diffusionfile, the llama.cpp tools) pull these in via
# llamafile_has_gpu(); linking the archive instead of listing each object means
# adding a GPU backend source does not require editing every consumer's
# BUILD.mk. Tidiness only: every consumer references llamafile_has_gpu(), so all
# members are pulled and the build output is unchanged.
LLAMAFILE_GPU_OBJS := \
o/$(MODE)/llamafile/cuda.o \
o/$(MODE)/llamafile/gpu_backend.o \
o/$(MODE)/llamafile/metal.o \
o/$(MODE)/llamafile/vulkan.o

o/$(MODE)/llamafile/gpu.a: $(LLAMAFILE_GPU_OBJS)

o/$(MODE)/llamafile/%.o: llamafile/%.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<
Expand Down
Loading
Loading