Skip to content

Documentation/point to gguf format for lora #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ public class MyScript : MonoBehaviour
// Otherwise the model file can be copied directly inside the StreamingAssets folder.
llm.SetModel("Phi-3-mini-4k-instruct-q4.gguf");
// optional: you can also set a lora in a similar fashion
llm.SetLora("my-lora.bin");
llm.SetLora("my-lora.gguf");
// optional: you can set the chat template of the model if it is not correctly identified
// You can find a list of chat templates in the ChatTemplate.templates.Keys
llm.SetTemplate("phi-3");
Expand Down Expand Up @@ -374,8 +374,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU

- <details><summary>Advanced options</summary>

- `Download lora` click to download a LoRA model in .bin format
- `Load lora` click to load a LoRA model in .bin format
- `Download lora` click to download a LoRA model in .gguf format
- `Load lora` click to load a LoRA model in .gguf format
- <details><summary><code>Context Size</code> size of the prompt context (0 = context size of the model)</summary> This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU). </details>
- `Batch Size` batch size for prompt processing (default: 512)
- `Model` the path of the model being used (relative to the Assets/StreamingAssets folder)
Expand Down
28 changes: 14 additions & 14 deletions Runtime/LLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public LLMException(string message, int errorCode) : base(message)
}
}

public class DestroyException : Exception {}
public class DestroyException : Exception { }
/// \endcond

[DefaultExecutionOrder(-1)]
Expand Down Expand Up @@ -72,7 +72,7 @@ public class LLM : MonoBehaviour
/// <summary> Chat template used for the model </summary>
[ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;
/// <summary> the paths of the LORA models being used (relative to the Assets/StreamingAssets folder).
/// Models with .bin format are allowed.</summary>
/// Models with .gguf format are allowed.</summary>
[ModelAdvanced] public string lora = "";

/// \cond HIDE
Expand Down Expand Up @@ -192,9 +192,9 @@ public void SetModel(string path)
/// <summary>
/// Allows to set a LORA model to use in the LLM.
/// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to use (.bin format)</param>
/// <param name="path">path to LORA model to use (.gguf format)</param>
public void SetLora(string path)
{
lora = "";
Expand All @@ -204,9 +204,9 @@ public void SetLora(string path)
/// <summary>
/// Allows to add a LORA model to use in the LLM.
/// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to use (.bin format)</param>
/// <param name="path">path to LORA model to use (.gguf format)</param>
public void AddLora(string path)
{
string loraPath = GetModelLoraPath(path, true);
Expand All @@ -220,9 +220,9 @@ public void AddLora(string path)

/// <summary>
/// Allows to remove a LORA model from the LLM.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to remove (.bin format)</param>
/// <param name="path">path to LORA model to remove (.gguf format)</param>
public void RemoveLora(string path)
{
string loraPath = GetModelLoraPath(path, true);
Expand Down Expand Up @@ -373,7 +373,7 @@ void CallIfNotDestroyed(EmptyCallback fn)
private void InitService(string arguments)
{
if (debug) CallIfNotDestroyed(() => SetupLogging());
CallIfNotDestroyed(() => {LLMObject = llmlib.LLM_Construct(arguments);});
CallIfNotDestroyed(() => { LLMObject = llmlib.LLM_Construct(arguments); });
if (remote) CallIfNotDestroyed(() => llmlib.LLM_StartServer(LLMObject));
CallIfNotDestroyed(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
CallIfNotDestroyed(() => CheckLLMStatus(false));
Expand All @@ -383,7 +383,7 @@ private void StartService()
{
llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));
llmThread.Start();
while (!llmlib.LLM_Started(LLMObject)) {}
while (!llmlib.LLM_Started(LLMObject)) { }
loraWeights = new List<float>();
for (int i = 0; i < lora.Split(" ").Count(); i++) loraWeights.Add(1f);
started = true;
Expand Down Expand Up @@ -446,7 +446,7 @@ void AssertStarted()

void CheckLLMStatus(bool log = true)
{
if (llmlib == null) {return;}
if (llmlib == null) { return; }
IntPtr stringWrapper = llmlib.StringWrapper_Construct();
int status = llmlib.LLM_Status(LLMObject, stringWrapper);
string result = llmlib.GetStringWrapperResult(stringWrapper);
Expand Down Expand Up @@ -553,7 +553,7 @@ public async Task<string> SetLoraScale(string loraToScale, float scale)
loraWeightRequest.loraWeights = new List<LoraWeightRequest>();
for (int i = 0; i < loraWeights.Count; i++)
{
loraWeightRequest.loraWeights.Add(new LoraWeightRequest() {id = i, scale = loraWeights[i]});
loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = loraWeights[i] });
}
;

Expand Down Expand Up @@ -607,7 +607,7 @@ public async Task<string> Slot(string json)
public async Task<string> Completion(string json, Callback<string> streamCallback = null)
{
AssertStarted();
if (streamCallback == null) streamCallback = (string s) => {};
if (streamCallback == null) streamCallback = (string s) => { };
StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);
await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));
if (!started) return null;
Expand All @@ -621,7 +621,7 @@ public async Task<string> Completion(string json, Callback<string> streamCallbac
public async Task SetBasePrompt(string base_prompt)
{
AssertStarted();
SystemPromptRequest request = new SystemPromptRequest(){system_prompt = base_prompt, prompt = " ", n_predict = 0};
SystemPromptRequest request = new SystemPromptRequest() { system_prompt = base_prompt, prompt = " ", n_predict = 0 };
await Completion(JsonUtility.ToJson(request));
}

Expand Down
Loading