undreamai · amakropoulos · Dec 2, 2024 · Oct 29, 2024 · Nov 17, 2024 · Nov 18, 2024
diff --git a/.github/doxygen/Doxyfile b/.github/doxygen/Doxyfile
@@ -48,7 +48,7 @@ PROJECT_NAME           = "LLM for Unity"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER = v2.3.0
+PROJECT_NUMBER = v2.4.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,21 @@
+## v2.4.0
+#### 🚀 Features
+
+- iOS deployment (PR: #267)
+- Improve building process (PR: #282)
+- Add structured output / function calling sample (PR: #281)
+- Update LlamaLib to v1.2.0 (llama.cpp b4218) (PR: #283)
+
+#### 🐛 Fixes
+
+- Clear temp build directory before building (PR: #278)
+
+#### 📦 General
+
+- Remove support for extras (flash attention, iQ quants) (PR: #284)
+- remove support for LLM base prompt (PR: #285)
+
+
 ## v2.3.0
 #### 🚀 Features
 

diff --git a/CHANGELOG.release.md b/CHANGELOG.release.md
@@ -1,8 +1,16 @@
 ### 🚀 Features
 
-- Implement Retrieval Augmented Generation (RAG) in LLMUnity (PR: #246)
+- iOS deployment (PR: #267)
+- Improve building process (PR: #282)
+- Add structured output / function calling sample (PR: #281)
+- Update LlamaLib to v1.2.0 (llama.cpp b4218) (PR: #283)
 
 ### 🐛 Fixes
 
-- Fixed build conflict, endless import of resources. (PR: #266)
+- Clear temp build directory before building (PR: #278)
+
+### 📦 General
+
+- Remove support for extras (flash attention, iQ quants) (PR: #284)
+- remove support for LLM base prompt (PR: #285)
 
diff --git a/Editor/LLMEditor.cs b/Editor/LLMEditor.cs
@@ -111,7 +111,6 @@ public override void AddModelSettings(SerializedObject llmScriptSO)
             if (llmScriptSO.FindProperty("advancedOptions").boolValue)
             {
                 attributeClasses.Add(typeof(ModelAdvancedAttribute));
-                if (LLMUnitySetup.FullLlamaLib) attributeClasses.Add(typeof(ModelExtrasAttribute));
             }
             ShowPropertiesOfClass("", llmScriptSO, attributeClasses, false);
             Space();
@@ -445,18 +444,12 @@ private void CopyToClipboard(string text)
             te.Copy();
         }
 
-        public void AddExtrasToggle()
-        {
-            if (ToggleButton("Use extras", LLMUnitySetup.FullLlamaLib)) LLMUnitySetup.SetFullLlamaLib(!LLMUnitySetup.FullLlamaLib);
-        }
-
         public override void AddOptionsToggles(SerializedObject llmScriptSO)
         {
             AddDebugModeToggle();
 
             EditorGUILayout.BeginHorizontal();
             AddAdvancedOptionsToggle(llmScriptSO);
-            AddExtrasToggle();
             EditorGUILayout.EndHorizontal();
             Space();
         }

diff --git a/README.md b/README.md
@@ -28,22 +28,22 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger
 <a href="#games-using-llm-for-unity" style=color: black>Games using LLM for Unity</a>&nbsp;&nbsp;•&nbsp;
 <a href="#setup" style=color: black>Setup</a>&nbsp;&nbsp;•&nbsp;
 <a href="#how-to-use" style=color: black>How to use</a>&nbsp;&nbsp;•&nbsp;
-<a href="#semantic-search-with-a-retrieval-augmented-generation-(rag)-system" style=color: black>RAG</a>&nbsp;&nbsp;•&nbsp;
+<a href="#semantic-search-with-a-retrieval-augmented-generation-rag-system" style=color: black>RAG</a>&nbsp;&nbsp;•&nbsp;
 <a href="#llm-model-management" style=color: black>LLM model management</a>&nbsp;&nbsp;•&nbsp;
 <a href="#examples" style=color: black>Examples</a>&nbsp;&nbsp;•&nbsp;
 <a href="#options" style=color: black>Options</a>&nbsp;&nbsp;•&nbsp;
 <a href="#license" style=color: black>License</a>
 </sub>
 
 ## At a glance
-- 💻 Cross-platform! Windows, Linux, macOS and Android
+- 💻 Cross-platform! Windows, Linux, macOS, iOS and Android
 - 🏠 Runs locally without internet access. No data ever leave the game!
 - ⚡ Blazing fast inference on CPU and GPU (Nvidia, AMD, Apple Metal)
 - 🤗 Supports all major LLM models
 - 🔧 Easy to setup, call with a single line of code
 - 💰 Free to use for both personal and commercial purposes
 
-🧪 Tested on Unity: 2021 LTS, 2022 LTS, 2023<br>
+🧪 Tested on Unity: 2021 LTS, 2022 LTS, 2023, Unity 6<br>
 🚦 [Upcoming Releases](https://github.com/orgs/undreamai/projects/2/views/10)
 
 ## How to help
@@ -140,18 +140,57 @@ That's all ✨!
 You can also:
 
 <details>
-<summary>Build a mobile app on Android</summary>
+<summary>Build a mobile app</summary>
 
-To build an Android app you need to specify the `IL2CPP` scripting backend and the `ARM64` as the target architecture in the player settings.<br>
-These settings can be accessed from the `Edit > Project Settings` menu within the `Player > Other Settings` section.<br>
+**iOS**
+iOS can be built with the default player settings.
 
+**Android**
+On Android you need to specify the `IL2CPP` scripting backend and the `ARM64` as the target architecture in the player settings.<br>
+These settings can be accessed from the `Edit > Project Settings` menu within the `Player > Other Settings` section.<br>
 <img width="400" src=".github/android.png">
 
-It is also a good idea to enable the `Download on Build` option in the LLM GameObject to download the model on launch in order to keep the app size small.
+Since mobile app sizes are typically small, you can download the LLM models the first time the app launches.
+This functionality can be enabled with the `Download on Build` option.
+In your project you can wait until the model download is complete with:
+``` c#
+await LLM.WaitUntilModelSetup();
+```
+You can also receive calls during the download with the download progress:
+``` c#
+await LLM.WaitUntilModelSetup(SetProgress);
+
+void SetProgress(float progress){
+  string progressPercent = ((int)(progress * 100)).ToString() + "%";
+  Debug.Log($"Download progress: {progressPercent}");
+}
+```
+This is useful to present a progress bar or something similar.
+The [MobileDemo](Samples~/MobileDemo) is an example application for Android / iOS.
 
 </details>
 <details>
-<summary>Save / Load your chat history</summary>
+<summary>Restrict the output of the LLM / Function calling</summary>
+
+To restrict the output of the LLM you can use a GBNF grammar, read more [here](https://github.com/ggerganov/llama.cpp/tree/master/grammars).<br>
+The grammar can be saved in a .gbnf file and loaded at the LLMCharacter with the `Load Grammar` button (Advanced options).<br>
+For instance to receive replies in json format you can use the [json.gbnf](https://github.com/ggerganov/llama.cpp/blob/b4218/grammars/json.gbnf) grammar.<br>
+
+Alternatively you can set the grammar directly with code:
+``` c#
+llmCharacter.grammarString = "your grammar here";
+```
+
+For function calling you can define similarly a grammar that allows only the function names as output, and then call the respective function.<br>
+You can look into the [FunctionCalling](Samples~/FunctionCalling) sample for an example implementation.
+
+</details>
+<details>
+<summary>Access / Save / Load your chat history</summary>
+The chat history of a `LLMCharacter` is retained in the `chat` variable that is a list of `ChatMessage` objects.<br>
+The ChatMessage is a struct that defines the `role` of the message and the `content`.<br>
+The first element of the list is always the system prompt and then alternating messages with the player prompt and the AI reply.<br>
+You can modify the chat history directly in this list.<br>
 
 To automatically save / load your chat history, you can specify the `Save` parameter of the LLMCharacter to the filename (or relative path) of your choice.
 The file is saved in the [persistentDataPath folder of Unity](https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html).
@@ -375,6 +414,14 @@ public class MyScript : MonoBehaviour
 }
 ```
 
+You can also add / search text inputs for groups of data e.g. for a specific character or scene:
+``` c#
+    // add the inputs to the RAG for a group of data e.g. an orc character
+    foreach (string input in inputs) await rag.Add(input, "orc");
+    // get the 2 most similar inputs for the group of data e.g. the orc character
+    (string[] results, float[] distances) = await rag.Search("how do you feel?", 2, "orc");
+...
+
 You can save the RAG state (stored in the `Assets/StreamingAssets` folder):
 ``` c#
 rag.Save("rag.zip");
@@ -429,12 +476,13 @@ If you have loaded a model locally you need to set its URL through the expanded
 
 ## Examples
 The [Samples~](Samples~) folder contains several examples of interaction 🤖:
-- [SimpleInteraction](Samples~/SimpleInteraction): Demonstrates a simple interaction with an AI character
-- [MultipleCharacters](Samples~/MultipleCharacters): Demonstrates a simple interaction using multiple AI characters
-- [RAG](Samples~/RAG): RAG sample. Includes an example using the RAG to feed information to a LLM
-- [ChatBot](Samples~/ChatBot): Demonstrates interaction between a player and a AI with a UI similar to a messaging app (see image below)
+- [SimpleInteraction](Samples~/SimpleInteraction): Simple interaction with an AI character
+- [MultipleCharacters](Samples~/MultipleCharacters): Simple interaction using multiple AI characters
+- [FunctionCalling](Samples~/FunctionCalling): Function calling sample with structured output from the LLM
+- [RAG](Samples~/RAG): Semantic search using a Retrieval Augmented Generation (RAG) system. Includes example using a RAG to feed information to a LLM
+- [MobileDemo](Samples~/MobileDemo): Example mobile app for Android / iOS with an initial screen displaying the model download progress
+- [ChatBot](Samples~/ChatBot): Interaction between a player and a AI with a UI similar to a messaging app (see image below)
 - [KnowledgeBaseGame](Samples~/KnowledgeBaseGame): Simple detective game using a knowledge base to provide information to the LLM based on [google/mysteryofthreebots](https://github.com/google/mysteryofthreebots)
-- [AndroidDemo](Samples~/AndroidDemo): Example Android app with an initial screen with model download progress
 
 <img width="400" src=".github/demo.gif">
 
@@ -451,8 +499,7 @@ Save the scene, run and enjoy!
 ### LLM Settings
 
 - `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
-- `Log Level` select how verbose the log messages are
-- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
+- `Log Level` select how verbose the log messages arequants)
 
 #### 💻 Setup Settings
 
@@ -503,22 +550,13 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
   - `Chat Template` the chat template being used for the LLM
   - `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder)
   - `Lora Weights` the weights of the LoRAs being used
-  - `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled)
-
-</details>
-
-#### 🗨️ Chat Settings
-- <details><summary>Advanced options</summary>
-
-- `Base Prompt` a common base prompt to use across all LLMCharacter objects using the LLM
 
 </details>
 
 ### LLMCharacter Settings
 
 - `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
 - `Log Level` select how verbose the log messages are
-- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
 
 #### 💻 Setup Settings
 <div>

diff --git a/Resources/usearch/arm64/libusearch_c.dylib b/Resources/usearch/arm64/libusearch_c.dylib
diff --git a/Resources/usearch/arm64/libusearch_c.dylib.meta b/Resources/usearch/arm64/libusearch_c.dylib.meta
diff --git a/Resources/usearch/arm64.meta → Resources/usearch/iOS.meta b/Resources/usearch/arm64.meta → Resources/usearch/iOS.meta
diff --git a/Resources/usearch/iOS/libusearch_static_c.a b/Resources/usearch/iOS/libusearch_static_c.a
diff --git a/Resources/usearch/iOS/libusearch_static_c.a.meta b/Resources/usearch/iOS/libusearch_static_c.a.meta
diff --git a/Resources/usearch/x86_64.meta → Resources/usearch/linux.meta b/Resources/usearch/x86_64.meta → Resources/usearch/linux.meta
diff --git a/Resources/usearch/x86_64/libusearch_c.so → Resources/usearch/linux/libusearch_c.so b/Resources/usearch/x86_64/libusearch_c.so → Resources/usearch/linux/libusearch_c.so
diff --git a/...urces/usearch/x86_64/libusearch_c.so.meta → Resources/usearch/linux/libusearch_c.so.meta b/...urces/usearch/x86_64/libusearch_c.so.meta → Resources/usearch/linux/libusearch_c.so.meta
diff --git a/Resources/usearch/macos.meta b/Resources/usearch/macos.meta
diff --git a/Resources/usearch/x86_64/libusearch_c.dylib → Resources/usearch/macos/libusearch_c.dylib b/Resources/usearch/x86_64/libusearch_c.dylib → Resources/usearch/macos/libusearch_c.dylib
diff --git a/Resources/usearch/macos/libusearch_c.dylib.meta b/Resources/usearch/macos/libusearch_c.dylib.meta