Point to gguf format for lora

ltoniazzi · amakropoulos · commit 225957d2cee1 · 2024-08-21T09:04:23.000+03:00
diff --git a/README.md b/README.md
@@ -248,7 +248,7 @@ public class MyScript : MonoBehaviour
         // Otherwise the model file can be copied directly inside the StreamingAssets folder.
         llm.SetModel("Phi-3-mini-4k-instruct-q4.gguf");
         // optional: you can also set a lora in a similar fashion
-        llm.SetLora("my-lora.bin");
+        llm.SetLora("my-lora.gguf");
         // optional: you can set the chat template of the model if it is not correctly identified
         // You can find a list of chat templates in the ChatTemplate.templates.Keys
         llm.SetTemplate("phi-3");
@@ -374,8 +374,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
 
 - <details><summary>Advanced options</summary>
 
-  - `Download lora` click to download a LoRA model in .bin format
-  - `Load lora` click to load a LoRA model in .bin format
+  - `Download lora` click to download a LoRA model in .gguf format
+  - `Load lora` click to load a LoRA model in .gguf format
   - <details><summary><code>Context Size</code> size of the prompt context (0 = context size of the model)</summary> This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU). </details>
   - `Batch Size` batch size for prompt processing (default: 512)
   - `Model` the path of the model being used (relative to the Assets/StreamingAssets folder)
diff --git a/Runtime/LLM.cs b/Runtime/LLM.cs
@@ -22,7 +22,7 @@ public LLMException(string message, int errorCode) : base(message)
         }
     }
 
-    public class DestroyException : Exception {}
+    public class DestroyException : Exception { }
     /// \endcond
 
     [DefaultExecutionOrder(-1)]
@@ -72,7 +72,7 @@ public class LLM : MonoBehaviour
         /// <summary> Chat template used for the model </summary>
         [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;
         /// <summary> the paths of the LORA models being used (relative to the Assets/StreamingAssets folder).
-        /// Models with .bin format are allowed.</summary>
+        /// Models with .gguf format are allowed.</summary>
         [ModelAdvanced] public string lora = "";
 
         /// \cond HIDE
@@ -192,9 +192,9 @@ public void SetModel(string path)
         /// <summary>
         /// Allows to set a LORA model to use in the LLM.
         /// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
-        /// Models supported are in .bin format.
+        /// Models supported are in .gguf format.
         /// </summary>
-        /// <param name="path">path to LORA model to use (.bin format)</param>
+        /// <param name="path">path to LORA model to use (.gguf format)</param>
         public void SetLora(string path)
         {
             lora = "";
@@ -204,9 +204,9 @@ public void SetLora(string path)
         /// <summary>
         /// Allows to add a LORA model to use in the LLM.
         /// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
-        /// Models supported are in .bin format.
+        /// Models supported are in .gguf format.
         /// </summary>
-        /// <param name="path">path to LORA model to use (.bin format)</param>
+        /// <param name="path">path to LORA model to use (.gguf format)</param>
         public void AddLora(string path)
         {
             string loraPath = GetModelLoraPath(path, true);
@@ -220,9 +220,9 @@ public void AddLora(string path)
 
         /// <summary>
         /// Allows to remove a LORA model from the LLM.
-        /// Models supported are in .bin format.
+        /// Models supported are in .gguf format.
         /// </summary>
-        /// <param name="path">path to LORA model to remove (.bin format)</param>
+        /// <param name="path">path to LORA model to remove (.gguf format)</param>
         public void RemoveLora(string path)
         {
             string loraPath = GetModelLoraPath(path, true);
@@ -373,7 +373,7 @@ void CallIfNotDestroyed(EmptyCallback fn)
         private void InitService(string arguments)
         {
             if (debug) CallIfNotDestroyed(() => SetupLogging());
-            CallIfNotDestroyed(() => {LLMObject = llmlib.LLM_Construct(arguments);});
+            CallIfNotDestroyed(() => { LLMObject = llmlib.LLM_Construct(arguments); });
             if (remote) CallIfNotDestroyed(() => llmlib.LLM_StartServer(LLMObject));
             CallIfNotDestroyed(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
             CallIfNotDestroyed(() => CheckLLMStatus(false));
@@ -383,7 +383,7 @@ private void StartService()
         {
             llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));
             llmThread.Start();
-            while (!llmlib.LLM_Started(LLMObject)) {}
+            while (!llmlib.LLM_Started(LLMObject)) { }
             loraWeights = new List<float>();
             for (int i = 0; i < lora.Split(" ").Count(); i++) loraWeights.Add(1f);
             started = true;
@@ -446,7 +446,7 @@ void AssertStarted()
 
         void CheckLLMStatus(bool log = true)
         {
-            if (llmlib == null) {return;}
+            if (llmlib == null) { return; }
             IntPtr stringWrapper = llmlib.StringWrapper_Construct();
             int status = llmlib.LLM_Status(LLMObject, stringWrapper);
             string result = llmlib.GetStringWrapperResult(stringWrapper);
@@ -553,7 +553,7 @@ public async Task<string> SetLoraScale(string loraToScale, float scale)
             loraWeightRequest.loraWeights = new List<LoraWeightRequest>();
             for (int i = 0; i < loraWeights.Count; i++)
             {
-                loraWeightRequest.loraWeights.Add(new LoraWeightRequest() {id = i, scale = loraWeights[i]});
+                loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = loraWeights[i] });
             }
             ;
 
@@ -607,7 +607,7 @@ public async Task<string> Slot(string json)
         public async Task<string> Completion(string json, Callback<string> streamCallback = null)
         {
             AssertStarted();
-            if (streamCallback == null) streamCallback = (string s) => {};
+            if (streamCallback == null) streamCallback = (string s) => { };
             StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);
             await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));
             if (!started) return null;
@@ -621,7 +621,7 @@ public async Task<string> Completion(string json, Callback<string> streamCallbac
         public async Task SetBasePrompt(string base_prompt)
         {
             AssertStarted();
-            SystemPromptRequest request = new SystemPromptRequest(){system_prompt = base_prompt, prompt = " ", n_predict = 0};
+            SystemPromptRequest request = new SystemPromptRequest() { system_prompt = base_prompt, prompt = " ", n_predict = 0 };
             await Completion(JsonUtility.ToJson(request));
         }