diff --git a/README.md b/README.md index f7a828e9..d8e79bba 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ public class MyScript : MonoBehaviour // Otherwise the model file can be copied directly inside the StreamingAssets folder. llm.SetModel("Phi-3-mini-4k-instruct-q4.gguf"); // optional: you can also set a lora in a similar fashion - llm.SetLora("my-lora.bin"); + llm.SetLora("my-lora.gguf"); // optional: you can set the chat template of the model if it is not correctly identified // You can find a list of chat templates in the ChatTemplate.templates.Keys llm.SetTemplate("phi-3"); @@ -374,8 +374,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU -
Advanced options - - `Download lora` click to download a LoRA model in .bin format - - `Load lora` click to load a LoRA model in .bin format + - `Download lora` click to download a LoRA model in .gguf format + - `Load lora` click to load a LoRA model in .gguf format -
Context Size size of the prompt context (0 = context size of the model) This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU).
- `Batch Size` batch size for prompt processing (default: 512) - `Model` the path of the model being used (relative to the Assets/StreamingAssets folder) diff --git a/Runtime/LLM.cs b/Runtime/LLM.cs index 7fb460c4..54395e8c 100644 --- a/Runtime/LLM.cs +++ b/Runtime/LLM.cs @@ -22,7 +22,7 @@ public LLMException(string message, int errorCode) : base(message) } } - public class DestroyException : Exception {} + public class DestroyException : Exception { } /// \endcond [DefaultExecutionOrder(-1)] @@ -72,7 +72,7 @@ public class LLM : MonoBehaviour /// Chat template used for the model [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate; /// the paths of the LORA models being used (relative to the Assets/StreamingAssets folder). - /// Models with .bin format are allowed. + /// Models with .gguf format are allowed. [ModelAdvanced] public string lora = ""; /// \cond HIDE @@ -192,9 +192,9 @@ public void SetModel(string path) /// /// Allows to set a LORA model to use in the LLM. /// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. - /// Models supported are in .bin format. + /// Models supported are in .gguf format. /// - /// path to LORA model to use (.bin format) + /// path to LORA model to use (.gguf format) public void SetLora(string path) { lora = ""; @@ -204,9 +204,9 @@ public void SetLora(string path) /// /// Allows to add a LORA model to use in the LLM. /// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. - /// Models supported are in .bin format. + /// Models supported are in .gguf format. /// - /// path to LORA model to use (.bin format) + /// path to LORA model to use (.gguf format) public void AddLora(string path) { string loraPath = GetModelLoraPath(path, true); @@ -220,9 +220,9 @@ public void AddLora(string path) /// /// Allows to remove a LORA model from the LLM. - /// Models supported are in .bin format. + /// Models supported are in .gguf format. /// - /// path to LORA model to remove (.bin format) + /// path to LORA model to remove (.gguf format) public void RemoveLora(string path) { string loraPath = GetModelLoraPath(path, true); @@ -373,7 +373,7 @@ void CallIfNotDestroyed(EmptyCallback fn) private void InitService(string arguments) { if (debug) CallIfNotDestroyed(() => SetupLogging()); - CallIfNotDestroyed(() => {LLMObject = llmlib.LLM_Construct(arguments);}); + CallIfNotDestroyed(() => { LLMObject = llmlib.LLM_Construct(arguments); }); if (remote) CallIfNotDestroyed(() => llmlib.LLM_StartServer(LLMObject)); CallIfNotDestroyed(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate)); CallIfNotDestroyed(() => CheckLLMStatus(false)); @@ -383,7 +383,7 @@ private void StartService() { llmThread = new Thread(() => llmlib.LLM_Start(LLMObject)); llmThread.Start(); - while (!llmlib.LLM_Started(LLMObject)) {} + while (!llmlib.LLM_Started(LLMObject)) { } loraWeights = new List(); for (int i = 0; i < lora.Split(" ").Count(); i++) loraWeights.Add(1f); started = true; @@ -446,7 +446,7 @@ void AssertStarted() void CheckLLMStatus(bool log = true) { - if (llmlib == null) {return;} + if (llmlib == null) { return; } IntPtr stringWrapper = llmlib.StringWrapper_Construct(); int status = llmlib.LLM_Status(LLMObject, stringWrapper); string result = llmlib.GetStringWrapperResult(stringWrapper); @@ -553,7 +553,7 @@ public async Task SetLoraScale(string loraToScale, float scale) loraWeightRequest.loraWeights = new List(); for (int i = 0; i < loraWeights.Count; i++) { - loraWeightRequest.loraWeights.Add(new LoraWeightRequest() {id = i, scale = loraWeights[i]}); + loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = loraWeights[i] }); } ; @@ -607,7 +607,7 @@ public async Task Slot(string json) public async Task Completion(string json, Callback streamCallback = null) { AssertStarted(); - if (streamCallback == null) streamCallback = (string s) => {}; + if (streamCallback == null) streamCallback = (string s) => { }; StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback); await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper())); if (!started) return null; @@ -621,7 +621,7 @@ public async Task Completion(string json, Callback streamCallbac public async Task SetBasePrompt(string base_prompt) { AssertStarted(); - SystemPromptRequest request = new SystemPromptRequest(){system_prompt = base_prompt, prompt = " ", n_predict = 0}; + SystemPromptRequest request = new SystemPromptRequest() { system_prompt = base_prompt, prompt = " ", n_predict = 0 }; await Completion(JsonUtility.ToJson(request)); }