undreamai · amakropoulos · Aug 27, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/.github/doxygen/Doxyfile b/.github/doxygen/Doxyfile
@@ -48,7 +48,7 @@ PROJECT_NAME           = "LLM for Unity"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER = v2.1.1
+PROJECT_NUMBER = v2.2.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,23 @@
+## v2.2.0
+#### 🚀 Features
+
+- Implement embedding and lora adapter functionality (PR: #210)
+- Read context length and warn if it is very large (PR: #211)
+- Setup allowing to use extra features: flash attention and IQ quants (PR: #216)
+- Allow HTTP request retries for remote server (PR: #217)
+- Allow to set lora weights at startup, add unit test (PR: #219)
+- allow relative StreamingAssets paths for models (PR: #221)
+
+#### 🐛 Fixes
+
+- Fix set template for remote setup (PR: #208)
+- fix crash when stopping scene before LLM creation (PR: #214)
+
+#### 📦 General
+
+- Documentation/point to gguf format for lora (PR: #215)
+
+
 ## v2.1.1
 #### 🐛 Fixes
 

diff --git a/CHANGELOG.release.md b/CHANGELOG.release.md
@@ -1,3 +1,18 @@
+### 🚀 Features
+
+- Implement embedding and lora adapter functionality (PR: #210)
+- Read context length and warn if it is very large (PR: #211)
+- Setup allowing to use extra features: flash attention and IQ quants (PR: #216)
+- Allow HTTP request retries for remote server (PR: #217)
+- Allow to set lora weights at startup, add unit test (PR: #219)
+- allow relative StreamingAssets paths for models (PR: #221)
+
 ### 🐛 Fixes
 
-- Resolve build directory creation
+- Fix set template for remote setup (PR: #208)
+- fix crash when stopping scene before LLM creation (PR: #214)
+
+### 📦 General
+
+- Documentation/point to gguf format for lora (PR: #215)
+
diff --git a/Editor/LLMEditor.cs b/Editor/LLMEditor.cs
@@ -74,6 +74,7 @@ public void AddModelSettings(SerializedObject llmScriptSO)
             if (llmScriptSO.FindProperty("advancedOptions").boolValue)
             {
                 attributeClasses.Add(typeof(ModelAdvancedAttribute));
+                if (LLMUnitySetup.FullLlamaLib) attributeClasses.Add(typeof(ModelExtrasAttribute));
             }
             ShowPropertiesOfClass("", llmScriptSO, attributeClasses, false);
             Space();
@@ -142,7 +143,7 @@ void SetModelIfNone(string filename, bool lora)
             LLM llmScript = (LLM)target;
             int num = LLMManager.Num(lora);
             if (!lora && llmScript.model == "" && num == 1) llmScript.SetModel(filename);
-            if (lora && llmScript.lora == "" && num == 1) llmScript.SetLora(filename);
+            if (lora) llmScript.AddLora(filename);
         }
 
         async Task createCustomURLField()
@@ -205,7 +206,7 @@ async Task createButtons()
             }
             else if (modelIndex > 1)
             {
-                if (modelLicenses[modelIndex] != null) Debug.LogWarning($"The {modelOptions[modelIndex]} model is released under the following license: {modelLicenses[modelIndex]}. By using this model, you agree to the terms of the license.");
+                if (modelLicenses[modelIndex] != null) LLMUnitySetup.LogWarning($"The {modelOptions[modelIndex]} model is released under the following license: {modelLicenses[modelIndex]}. By using this model, you agree to the terms of the license.");
                 string filename = await LLMManager.DownloadModel(modelURLs[modelIndex], true, modelOptions[modelIndex]);
                 SetModelIfNone(filename, false);
                 UpdateModels(true);
@@ -237,7 +238,7 @@ async Task createButtons()
                 {
                     EditorApplication.delayCall += () =>
                     {
-                        string path = EditorUtility.OpenFilePanelWithFilters("Select a bin lora file", "", new string[] { "Model Files", "bin" });
+                        string path = EditorUtility.OpenFilePanelWithFilters("Select a gguf lora file", "", new string[] { "Model Files", "gguf" });
                         if (!string.IsNullOrEmpty(path))
                         {
                             string filename = LLMManager.LoadLora(path, true);
@@ -299,10 +300,10 @@ void OnEnable()
                     }
                     else
                     {
-                        isSelected = llmScript.lora == entry.filename;
-                        bool newSelected = EditorGUI.Toggle(selectRect, isSelected, EditorStyles.radioButton);
-                        if (newSelected && !isSelected) llmScript.SetLora(entry.filename);
-                        else if (!newSelected && isSelected) llmScript.SetLora("");
+                        isSelected = llmScript.loraManager.Contains(entry.filename);
+                        bool newSelected = EditorGUI.Toggle(selectRect, isSelected);
+                        if (newSelected && !isSelected) llmScript.AddLora(entry.filename);
+                        else if (!newSelected && isSelected) llmScript.RemoveLora(entry.filename);
                     }
 
                     DrawCopyableLabel(nameRect, entry.label, entry.filename);
@@ -347,6 +348,11 @@ void OnEnable()
 
                     if (GUI.Button(actionRect, trashIcon))
                     {
+                        if (isSelected)
+                        {
+                            if (!entry.lora) llmScript.SetModel("");
+                            else llmScript.RemoveLora(entry.filename);
+                        }
                         LLMManager.Remove(entry);
                         UpdateModels(true);
                     }

diff --git a/Editor/PropertyEditor.cs b/Editor/PropertyEditor.cs
@@ -16,17 +16,11 @@ public void AddScript(SerializedObject llmScriptSO)
             EditorGUILayout.PropertyField(scriptProp);
         }
 
-        public void AddOptionsToggle(SerializedObject llmScriptSO, string propertyName, string name)
+        public bool ToggleButton(string text, bool activated)
         {
-            SerializedProperty advancedOptionsProp = llmScriptSO.FindProperty(propertyName);
-            string toggleText = (advancedOptionsProp.boolValue ? "Hide" : "Show") + " " + name;
             GUIStyle style = new GUIStyle("Button");
-            if (advancedOptionsProp.boolValue)
-                style.normal = new GUIStyleState() { background = Texture2D.grayTexture };
-            if (GUILayout.Button(toggleText, style, GUILayout.Width(buttonWidth)))
-            {
-                advancedOptionsProp.boolValue = !advancedOptionsProp.boolValue;
-            }
+            if (activated) style.normal = new GUIStyleState() { background = Texture2D.grayTexture };
+            return GUILayout.Button(text, style, GUILayout.Width(buttonWidth));
         }
 
         public void AddSetupSettings(SerializedObject llmScriptSO)
@@ -54,8 +48,12 @@ public void AddChatSettings(SerializedObject llmScriptSO)
         public void AddOptionsToggles(SerializedObject llmScriptSO)
         {
             LLMUnitySetup.SetDebugMode((LLMUnitySetup.DebugModeType)EditorGUILayout.EnumPopup("Log Level", LLMUnitySetup.DebugMode));
+
             EditorGUILayout.BeginHorizontal();
-            AddOptionsToggle(llmScriptSO, "advancedOptions", "Advanced Options");
+            SerializedProperty advancedOptionsProp = llmScriptSO.FindProperty("advancedOptions");
+            string toggleText = (advancedOptionsProp.boolValue ? "Hide" : "Show") + " Advanced Options";
+            if (ToggleButton(toggleText, advancedOptionsProp.boolValue)) advancedOptionsProp.boolValue = !advancedOptionsProp.boolValue;
+            if (ToggleButton("Use extras", LLMUnitySetup.FullLlamaLib)) LLMUnitySetup.SetFullLlamaLib(!LLMUnitySetup.FullLlamaLib);
             EditorGUILayout.EndHorizontal();
             Space();
         }

diff --git a/README.md b/README.md
@@ -46,8 +46,10 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger
 
 ## How to help
 - [⭐ Star](https://github.com/undreamai/LLMUnity) the repo, leave us a [review](https://assetstore.unity.com/packages/slug/273604) and spread the word about the project!
-- Join us at [Discord](https://discord.gg/RwXKQb6zdv) and say hi!
-- [Contribute](CONTRIBUTING.md) by submitting feature requests or bugs as issues or even submiting a PR and become a collaborator!
+- Join us at [Discord](https://discord.gg/RwXKQb6zdv) and say hi.
+- [Contribute](CONTRIBUTING.md) by submitting feature requests, bugs or even your own PR.
+- [![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/amakropoulos) this work to allow even cooler features!
+
 
 ## Games using LLM for Unity
 - [Verbal Verdict](https://store.steampowered.com/app/2778780/Verbal_Verdict/)
@@ -56,6 +58,7 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger
 - [Murder in Aisle 4](https://roadedlich.itch.io/murder-in-aisle-4)
 - [Finicky Food Delivery AI](https://helixngc7293.itch.io/finicky-food-delivery-ai)
 - [AI Emotional Girlfriend](https://whynames.itch.io/aiemotionalgirlfriend)
+- [AI Speak](https://jdscogin.wixsite.com/aispeak)
 
 ## Setup
 _Method 1: Install using the asset store_
@@ -247,8 +250,9 @@ public class MyScript : MonoBehaviour
         // The model needs to be added to the LLM model manager (see LLM model management) by loading or downloading it.
         // Otherwise the model file can be copied directly inside the StreamingAssets folder.
         llm.SetModel("Phi-3-mini-4k-instruct-q4.gguf");
-        // optional: you can also set a lora in a similar fashion
-        llm.SetLora("my-lora.bin");
+        // optional: you can also set loras in a similar fashion and set their weights (if needed)
+        llm.AddLora("my-lora.gguf");
+        llm.SetLoraWeight(0.5f);
         // optional: you can set the chat template of the model if it is not correctly identified
         // You can find a list of chat templates in the ChatTemplate.templates.Keys
         llm.SetTemplate("phi-3");
@@ -290,6 +294,15 @@ You can use a remote server to carry out the processing and implement characters
 - Create a second project with the game characters using the `LLMCharacter` script as described above.
   Enable the `Remote` option and configure the host with the IP address (starting with "http://") and port of the server.
 
+</details>
+<details>
+<summary>Compute embeddings using a LLM</summary>
+
+The `Embeddings` function can be used to obtain the emdeddings of a phrase:
+``` c#
+    List<float> embeddings = await llmCharacter.Embeddings("hi, how are you?");
+```
+
 </details>
 
 A <b>detailed documentation</b> on function level can be found here:
@@ -345,6 +358,7 @@ If you have loaded a model locally you need to set its URL through the expanded
 
 - `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
 - `Log Level` select how verbose the log messages are
+- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
 
 #### 💻 Setup Settings
 
@@ -374,13 +388,15 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
 
 - <details><summary>Advanced options</summary>
 
-  - `Download lora` click to download a LoRA model in .bin format
-  - `Load lora` click to load a LoRA model in .bin format
+  - `Download lora` click to download a LoRA model in .gguf format
+  - `Load lora` click to load a LoRA model in .gguf format
   - <details><summary><code>Context Size</code> size of the prompt context (0 = context size of the model)</summary> This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU). </details>
   - `Batch Size` batch size for prompt processing (default: 512)
   - `Model` the path of the model being used (relative to the Assets/StreamingAssets folder)
   - `Chat Template` the chat template being used for the LLM
-  - `Lora` the path of the LoRA being used (relative to the Assets/StreamingAssets folder)
+  - `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder)
+  - `Lora Weights` the weights of the LoRAs being used
+  - `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled)
 
 </details>
 
@@ -395,6 +411,7 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
 
 - `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
 - `Log Level` select how verbose the log messages are
+- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
 
 #### 💻 Setup Settings
 <div>
@@ -403,8 +420,9 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
 
 - `Remote` whether the LLM used is remote or local
 - `LLM` the LLM GameObject (if `Remote` is not set)
-- `Hort` ip of the LLM (if `Remote` is set)
-- `Port` port of the LLM (if `Remote` is set)
+- `Hort` ip of the LLM server (if `Remote` is set)
+- `Port` port of the LLM server (if `Remote` is set)
+- `Num Retries` number of HTTP request retries from the LLM server (if `Remote` is set)
 - <details><summary><code>Save</code> save filename or relative path</summary> If set, the chat history and LLM state (if save cache is enabled) is automatically saved to file specified. <br> The chat history is saved with a json suffix and the LLM state with a cache suffix. <br> Both files are saved in the [persistentDataPath folder of Unity](https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html).</details>
 - `Save Cache` select to save the LLM state along with the chat history. The LLM state is typically around 100MB+.
 - `Debug Prompt` select to log the constructed prompts in the Unity Editor
@@ -446,4 +464,6 @@ If it is not selected, the full reply from the model is received in one go
 </details>
 
 ## License
-The license of LLM for Unity is MIT ([LICENSE.md](LICENSE.md)) and uses third-party software with MIT and Apache licenses ([Third Party Notices.md](<Third Party Notices.md>)).
+The license of LLM for Unity is MIT ([LICENSE.md](LICENSE.md)) and uses third-party software with MIT and Apache licenses.
+Some models included in the asset define their own license terms, please review them before using each model.
+Third-party licenses can be found in the ([Third Party Notices.md](<Third Party Notices.md>)).