mozilla-ai
diff --git a/‎docs/skills/llamafile/testing.md‎
Lines changed: 3 additions & 6 deletions b/‎docs/skills/llamafile/testing.md‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎llama.cpp‎ b/‎llama.cpp‎
diff --git a/‎llama.cpp.patches/README.md‎
Lines changed: 5 additions & 3 deletions b/‎llama.cpp.patches/README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎llama.cpp.patches/llamafile-files/BUILD.mk‎
Lines changed: 26 additions & 2 deletions b/‎llama.cpp.patches/llamafile-files/BUILD.mk‎
Lines changed: 26 additions & 2 deletions
diff --git a/‎llama.cpp.patches/llamafile-files/common/license.cpp‎
Lines changed: 81 additions & 0 deletions b/‎llama.cpp.patches/llamafile-files/common/license.cpp‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎llama.cpp.patches/patches/common_arg.cpp.patch‎
Lines changed: 1 addition & 1 deletion b/‎llama.cpp.patches/patches/common_arg.cpp.patch‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama.cpp.patches/patches/common_chat.cpp.patch‎
Lines changed: 1 addition & 1 deletion b/‎llama.cpp.patches/patches/common_chat.cpp.patch‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama.cpp.patches/patches/common_common.cpp.patch‎
Lines changed: 1 addition & 1 deletion b/‎llama.cpp.patches/patches/common_common.cpp.patch‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama.cpp.patches/patches/common_download.cpp.patch‎
Lines changed: 1 addition & 1 deletion b/‎llama.cpp.patches/patches/common_download.cpp.patch‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama.cpp.patches/patches/common_ngram-mod.cpp.patch‎
Lines changed: 11 additions & 0 deletions b/‎llama.cpp.patches/patches/common_ngram-mod.cpp.patch‎
Lines changed: 11 additions & 0 deletions
@@ -82,8 +82,7 @@ Tests in llamafile use the `.runs` suffix convention:
 # In tests/BUILD.mk
 .PHONY: o/$(MODE)/tests
 o/$(MODE)/tests: \
-    o/$(MODE)/tests/extract_data_uris_test.runs \
-    o/$(MODE)/tests/minja/minja_integration_test.runs
+    o/$(MODE)/tests/extract_data_uris_test.runs 
 ```
 
 The `.runs` file is a timestamp marker indicating the test passed. The build system:
@@ -125,8 +124,6 @@ Currently in the `new_build_wip` branch, these tests are saved in:
 
 ```
 tests/
-└── minja
-     └── *_test.c     # Jinja template parsing tests
 └── sgemm
      └── *_test.c     # Optimized CPU kernels tests
 ...
@@ -188,10 +185,10 @@ the `tests/BUILD.mk` file, thus they need to be manually compiled and run.
 
 ```sh
 # Build the test
-.cosmocc/4.0.2/bin/make o//tests/minja/minja_integration_test
+.cosmocc/4.0.2/bin/make o//tests/extract_data_uris_test
 
 # Run directly
-.o/tests/minja/minja_integration_test
+./o/tests/extract_data_uris_test
 ```
 
 ### Debug Build
 
@@ -11,7 +11,9 @@ llama.cpp.patches/
 ├── renames.sh             # Script for file renames/moves (if any)
 ├── llamafile-files/       # Additional files to copy into llama.cpp
 │   ├── BUILD.mk           # Makefile for building llama.cpp with cosmocc
-│   └── README.llamafile   # License and modification notes
+│   ├── README.llamafile   # License and modification notes
+│   └── common/
+│       └── license.cpp    # Llama.cpp's license file (cmake creates this at build time)
 └── patches/               # Patch files for upstream sources
 ```
 
@@ -40,6 +42,7 @@ These patches address compatibility issues when building with Cosmopolitan libc
 | `common_arg.cpp.patch` | Adds `COSMOCC` platform detection for `PATH_MAX` (includes `linux/limits.h`) |
 | `common_common.cpp.patch` | Adds platform-aware cache directory detection for Cosmopolitan (checks `LOCALAPPDATA`, `XDG_CACHE_HOME`, falls back to `~/.cache/`) |
 | `common_download.cpp.patch` | Adds `COSMOCC` platform detection for `PATH_MAX` |
+| `common_ngram-mod.cpp.patch` | Adds missing `#include <algorithm>` for `std::fill` |
 
 ### Threading and Signal Handling
 
@@ -49,7 +52,7 @@ Cosmopolitan libc has specific behaviors with condition variables and signals th
 |-------|-------------|
 | `common_log.cpp.patch` | Blocks `SIGINT`/`SIGTERM` on logger thread to prevent `EINTR` exceptions; uses `wait_for()` instead of `wait()` to work around XNU futex timeout bug (~72 minute expiry) |
 | `tools_server_server-queue.cpp.patch` | Same threading fixes for server queue: signal masking and `wait_for()` timeouts |
-| `vendor_cpp-httplib_httplib.h.patch` | Fixes httplib thread pool with `wait_for()` instead of `wait()` for XNU futex compatibility |
+| `vendor_cpp-httplib_httplib.cpp.patch` | Fixes httplib thread pool with `wait_for()` instead of `wait()` for XNU futex compatibility |
 
 ### Cross-Module Memory Management
 
@@ -93,7 +96,6 @@ These patches integrate llamafile's file handling APIs for loading models from b
 | Patch | Description |
 |-------|-------------|
 | `vendor_miniaudio_miniaudio.h.patch` | Removes `__COSMOPOLITAN__` from Windows platform detection (Cosmopolitan handles this at runtime) |
-| `vendor_minja_minja.hpp.patch` | Replaces regex-based Jinja comment parsing with manual parsing to prevent stack overflow on large templates |
 
 ### Miscellaneous
 
 
@@ -23,6 +23,7 @@ GGML_SRCS_C := \
 	llama.cpp/ggml/src/ggml-cpu/quants.c
 
 GGML_SRCS_CPP := \
+	llama.cpp/ggml/src/ggml-backend-dl.cpp \
 	llama.cpp/ggml/src/ggml-backend-reg.cpp \
 	llama.cpp/ggml/src/ggml-backend.cpp \
 	llama.cpp/ggml/src/ggml-opt.cpp \
@@ -71,12 +72,14 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/models/deci.cpp \
 	llama.cpp/src/models/deepseek.cpp \
 	llama.cpp/src/models/deepseek2.cpp \
+	llama.cpp/src/models/delta-net-base.cpp \
 	llama.cpp/src/models/dots1.cpp \
 	llama.cpp/src/models/dream.cpp \
 	llama.cpp/src/models/ernie4-5-moe.cpp \
 	llama.cpp/src/models/ernie4-5.cpp \
 	llama.cpp/src/models/exaone.cpp \
 	llama.cpp/src/models/exaone4.cpp \
+	llama.cpp/src/models/exaone-moe.cpp \
 	llama.cpp/src/models/falcon-h1.cpp \
 	llama.cpp/src/models/falcon.cpp \
 	llama.cpp/src/models/gemma-embedding.cpp \
@@ -90,14 +93,16 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/models/gptneox.cpp \
 	llama.cpp/src/models/granite-hybrid.cpp \
 	llama.cpp/src/models/granite.cpp \
-	llama.cpp/src/models/graph-context-mamba.cpp \
+	llama.cpp/src/models/mamba-base.cpp \
 	llama.cpp/src/models/grok.cpp \
 	llama.cpp/src/models/grovemoe.cpp \
 	llama.cpp/src/models/hunyuan-dense.cpp \
 	llama.cpp/src/models/hunyuan-moe.cpp \
 	llama.cpp/src/models/internlm2.cpp \
 	llama.cpp/src/models/jais.cpp \
+	llama.cpp/src/models/jais2.cpp \
 	llama.cpp/src/models/jamba.cpp \
+	llama.cpp/src/models/kimi-linear.cpp \
 	llama.cpp/src/models/lfm2.cpp \
 	llama.cpp/src/models/llada-moe.cpp \
 	llama.cpp/src/models/llada.cpp \
@@ -120,6 +125,7 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/models/openai-moe-iswa.cpp \
 	llama.cpp/src/models/openelm.cpp \
 	llama.cpp/src/models/orion.cpp \
+	llama.cpp/src/models/paddleocr.cpp \
 	llama.cpp/src/models/pangu-embedded.cpp \
 	llama.cpp/src/models/phi2.cpp \
 	llama.cpp/src/models/phi3.cpp \
@@ -134,6 +140,8 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/models/qwen3.cpp \
 	llama.cpp/src/models/qwen3moe.cpp \
 	llama.cpp/src/models/qwen3next.cpp \
+	llama.cpp/src/models/qwen35.cpp \
+	llama.cpp/src/models/qwen35moe.cpp \
 	llama.cpp/src/models/qwen3vl-moe.cpp \
 	llama.cpp/src/models/qwen3vl.cpp \
 	llama.cpp/src/models/refact.cpp \
@@ -148,6 +156,7 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/models/smollm3.cpp \
 	llama.cpp/src/models/stablelm.cpp \
 	llama.cpp/src/models/starcoder.cpp \
+	llama.cpp/src/models/step35-iswa.cpp \
 	llama.cpp/src/models/starcoder2.cpp \
 	llama.cpp/src/models/t5-dec.cpp \
 	llama.cpp/src/models/t5-enc.cpp \
@@ -167,14 +176,15 @@ LLAMA_SRCS_CPP := \
 	llama.cpp/src/llama-kv-cache-iswa.cpp \
 	llama.cpp/src/llama-kv-cache.cpp \
 	llama.cpp/src/llama-memory-hybrid.cpp \
+	llama.cpp/src/llama-memory-hybrid-iswa.cpp \
 	llama.cpp/src/llama-memory-recurrent.cpp \
 	llama.cpp/src/llama-memory.cpp \
 	llama.cpp/src/llama-mmap.cpp \
 	llama.cpp/src/llama-model-loader.cpp \
 	llama.cpp/src/llama-model-saver.cpp \
 	llama.cpp/src/llama-model.cpp \
 	llama.cpp/src/llama-quant.cpp \
-	llama.cpp/src/llama-sampling.cpp \
+	llama.cpp/src/llama-sampler.cpp \
 	llama.cpp/src/llama-vocab.cpp \
 	llama.cpp/src/unicode-data.cpp \
 	llama.cpp/src/unicode.cpp
@@ -193,12 +203,22 @@ COMMON_SRCS_CPP := \
 	llama.cpp/common/chat.cpp \
 	llama.cpp/common/common.cpp \
 	llama.cpp/common/console.cpp \
+	llama.cpp/common/debug.cpp \
 	llama.cpp/common/download.cpp \
+	llama.cpp/common/jinja/caps.cpp \
+	llama.cpp/common/jinja/lexer.cpp \
+	llama.cpp/common/jinja/parser.cpp \
+	llama.cpp/common/jinja/runtime.cpp \
+	llama.cpp/common/jinja/string.cpp \
+	llama.cpp/common/jinja/value.cpp \
 	llama.cpp/common/json-partial.cpp \
 	llama.cpp/common/json-schema-to-grammar.cpp \
+	llama.cpp/common/license.cpp \
 	llama.cpp/common/llguidance.cpp \
 	llama.cpp/common/log.cpp \
 	llama.cpp/common/ngram-cache.cpp \
+	llama.cpp/common/ngram-map.cpp \
+	llama.cpp/common/ngram-mod.cpp \
 	llama.cpp/common/peg-parser.cpp \
 	llama.cpp/common/preset.cpp \
 	llama.cpp/common/regex-partial.cpp \
@@ -256,10 +276,14 @@ MTMD_SRCS_CPP := \
 	llama.cpp/tools/mtmd/models/conformer.cpp \
 	llama.cpp/tools/mtmd/models/glm4v.cpp \
 	llama.cpp/tools/mtmd/models/internvl.cpp \
+	llama.cpp/tools/mtmd/models/kimik25.cpp \
 	llama.cpp/tools/mtmd/models/kimivl.cpp \
 	llama.cpp/tools/mtmd/models/llama4.cpp \
 	llama.cpp/tools/mtmd/models/llava.cpp \
 	llama.cpp/tools/mtmd/models/minicpmv.cpp \
+	llama.cpp/tools/mtmd/models/mobilenetv5.cpp \
+	llama.cpp/tools/mtmd/models/nemotron-v2-vl.cpp \
+	llama.cpp/tools/mtmd/models/paddleocr.cpp \
 	llama.cpp/tools/mtmd/models/pixtral.cpp \
 	llama.cpp/tools/mtmd/models/qwen2vl.cpp \
 	llama.cpp/tools/mtmd/models/qwen3vl.cpp \
 
@@ -0,0 +1,81 @@
+// Generated by CMake
+
+const char* LICENSES[] = {
+R"=L=(License for llama.cpp
+=====================
+
+MIT License
+
+Copyright (c) 2023-2026 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+)=L=",
+R"=L=(License for cpp-httplib
+=======================
+
+The MIT License (MIT)
+
+Copyright (c) 2017 yhirose
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+)=L=",
+R"=L=(License for jsonhpp
+===================
+
+MIT License
+
+Copyright (c) 2013-2025 Niels Lohmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+)=L=",
+nullptr
+};
@@ -1,7 +1,7 @@
 diff --git a/common/arg.cpp b/common/arg.cpp
 --- a/llama.cpp/common/arg.cpp
 +++ b/llama.cpp/common/arg.cpp
-@@ -34,6 +34,8 @@
+@@ -36,6 +36,8 @@
  #ifndef __EMSCRIPTEN__
  #ifdef __linux__
  #include <linux/limits.h>
 
@@ -1,7 +1,7 @@
 diff --git a/common/chat.cpp b/common/chat.cpp
 --- a/llama.cpp/common/chat.cpp
 +++ b/llama.cpp/common/chat.cpp
-@@ -1698,7 +1698,7 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
+@@ -1791,7 +1791,7 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
      };
 
      auto prompt = apply(tmpl, inputs,
 
@@ -1,7 +1,7 @@
 diff --git a/common/common.cpp b/common/common.cpp
 --- a/llama.cpp/common/common.cpp
 +++ b/llama.cpp/common/common.cpp
-@@ -920,6 +920,16 @@ std::string fs_get_cache_directory() {
+@@ -874,6 +874,16 @@ std::string fs_get_cache_directory() {
          cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
  #elif defined(_WIN32)
          cache_directory = std::getenv("LOCALAPPDATA");
 
@@ -1,7 +1,7 @@
 diff --git a/common/download.cpp b/common/download.cpp
 --- a/llama.cpp/common/download.cpp
 +++ b/llama.cpp/common/download.cpp
-@@ -29,6 +29,8 @@
+@@ -24,6 +24,8 @@
  #ifndef __EMSCRIPTEN__
  #ifdef __linux__
  #include <linux/limits.h>
 
@@ -0,0 +1,11 @@
+diff --git a/common/ngram-mod.cpp b/common/ngram-mod.cpp
+--- a/llama.cpp/common/ngram-mod.cpp
++++ b/llama.cpp/common/ngram-mod.cpp
+@@ -1,5 +1,7 @@
+ #include "ngram-mod.h"
+ 
++#include <algorithm>
++
+ //
+ // common_ngram_mod
+ //