Skip to content

Allow HTTP request retries for remote server #217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,9 @@ If the user's GPU is not supported, the LLM will fall back to the CPU

- `Remote` whether the LLM used is remote or local
- `LLM` the LLM GameObject (if `Remote` is not set)
- `Hort` ip of the LLM (if `Remote` is set)
- `Port` port of the LLM (if `Remote` is set)
- `Hort` ip of the LLM server (if `Remote` is set)
- `Port` port of the LLM server (if `Remote` is set)
- `Num Retries` number of HTTP request retries from the LLM server (if `Remote` is set)
- <details><summary><code>Save</code> save filename or relative path</summary> If set, the chat history and LLM state (if save cache is enabled) is automatically saved to file specified. <br> The chat history is saved with a json suffix and the LLM state with a cache suffix. <br> Both files are saved in the [persistentDataPath folder of Unity](https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html).</details>
- `Save Cache` select to save the LLM state along with the chat history. The LLM state is typically around 100MB+.
- `Debug Prompt` select to log the constructed prompts in the Unity Editor
Expand Down
20 changes: 10 additions & 10 deletions Runtime/LLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ private void StartLLMServer(string arguments)
failed = true;
return;
}
CallIfNotDestroyed(() => StartService());
CallWithLock(StartService);
LLMUnitySetup.Log("LLM service created");
}

Expand All @@ -364,22 +364,22 @@ private void InitLib(string arch)
CheckLLMStatus(false);
}

void CallIfNotDestroyed(EmptyCallback fn)
void CallWithLock(EmptyCallback fn, bool checkNull = true)
{
lock (startLock)
{
if (llmlib == null) throw new DestroyException();
if (checkNull && llmlib == null) throw new DestroyException();
fn();
}
}

private void InitService(string arguments)
{
if (debug) CallIfNotDestroyed(() => SetupLogging());
CallIfNotDestroyed(() => { LLMObject = llmlib.LLM_Construct(arguments); });
if (remote) CallIfNotDestroyed(() => llmlib.LLM_StartServer(LLMObject));
CallIfNotDestroyed(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
CallIfNotDestroyed(() => CheckLLMStatus(false));
if (debug) CallWithLock(SetupLogging);
CallWithLock(() => { LLMObject = llmlib.LLM_Construct(arguments); });
CallWithLock(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
if (remote) CallWithLock(() => llmlib.LLM_StartServer(LLMObject));
CallWithLock(() => CheckLLMStatus(false));
}

private void StartService()
Expand Down Expand Up @@ -644,7 +644,7 @@ public void CancelRequest(int id_slot)
/// </summary>
public void Destroy()
{
lock (startLock)
CallWithLock(() =>
{
try
{
Expand All @@ -669,7 +669,7 @@ public void Destroy()
{
LLMUnitySetup.LogError(e.Message);
}
}
}, false);
}

/// <summary>
Expand Down
89 changes: 62 additions & 27 deletions Runtime/LLMCharacter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public class LLMCharacter : MonoBehaviour
[Remote] public string host = "localhost";
/// <summary> port to use for the LLM server </summary>
[Remote] public int port = 13333;
/// <summary> number of retries to use for the LLM server requests (-1 = infinite) </summary>
[Remote] public int numRetries = -1;
/// <summary> file to save the chat history.
/// The file is saved only for Chat calls with addToHistory set to true.
/// The file will be saved within the persistentDataPath directory (see https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html). </summary>
Expand Down Expand Up @@ -118,7 +120,7 @@ public class LLMCharacter : MonoBehaviour
public List<ChatMessage> chat;
private SemaphoreSlim chatLock = new SemaphoreSlim(1, 1);
private string chatTemplate;
private ChatTemplate template;
private ChatTemplate template = null;
public string grammarString;
protected int id_slot = -1;
private List<(string, string)> requestHeaders = new List<(string, string)> { ("Content-Type", "application/json") };
Expand Down Expand Up @@ -270,10 +272,21 @@ public void SetPrompt(string newPrompt, bool clearChat = true)
InitPrompt(clearChat);
}

private bool CheckTemplate()
{
if (template == null)
{
LLMUnitySetup.LogError("Template not set!");
return false;
}
return true;
}

private async Task InitNKeep()
{
if (setNKeepToPrompt && nKeep == -1)
{
if (!CheckTemplate()) return;
string systemPrompt = template.ComputePrompt(new List<ChatMessage>(){chat[0]}, playerName, "", false);
await Tokenize(systemPrompt, SetNKeep);
}
Expand Down Expand Up @@ -311,7 +324,8 @@ public async Task LoadTemplate()
if (llmTemplate != chatTemplate)
{
chatTemplate = llmTemplate;
template = ChatTemplate.GetTemplate(chatTemplate);
template = chatTemplate == null ? null : ChatTemplate.GetTemplate(chatTemplate);
nKeep = -1;
}
}

Expand All @@ -331,6 +345,7 @@ public async void SetGrammar(string path)

List<string> GetStopwords()
{
if (!CheckTemplate()) return null;
List<string> stopAll = new List<string>(template.GetStop(playerName, AIName));
if (stop != null) stopAll.AddRange(stop);
return stopAll;
Expand Down Expand Up @@ -465,6 +480,7 @@ public async Task<string> Chat(string query, Callback<string> callback = null, E
// call the callback function while the answer is received
// call the completionCallback function when the answer is fully received
await LoadTemplate();
if (!CheckTemplate()) return null;
await InitNKeep();

string json;
Expand Down Expand Up @@ -750,38 +766,57 @@ protected async Task<Ret> PostRequestRemote<Res, Ret>(string json, string endpoi

Ret result = default;
byte[] jsonToSend = new System.Text.UTF8Encoding().GetBytes(json);
using (var request = UnityWebRequest.Put($"{host}:{port}/{endpoint}", jsonToSend))
{
WIPRequests.Add(request);
UnityWebRequest request = null;
string error = null;
int tryNr = numRetries;

request.method = "POST";
if (requestHeaders != null)
while (tryNr != 0)
{
using (request = UnityWebRequest.Put($"{host}:{port}/{endpoint}", jsonToSend))
{
for (int i = 0; i < requestHeaders.Count; i++)
request.SetRequestHeader(requestHeaders[i].Item1, requestHeaders[i].Item2);
}
WIPRequests.Add(request);

// Start the request asynchronously
var asyncOperation = request.SendWebRequest();
float lastProgress = 0f;
// Continue updating progress until the request is completed
while (!asyncOperation.isDone)
{
float currentProgress = request.downloadProgress;
// Check if progress has changed
if (currentProgress != lastProgress && callback != null)
request.method = "POST";
if (requestHeaders != null)
{
callback?.Invoke(ConvertContent(request.downloadHandler.text, getContent));
lastProgress = currentProgress;
for (int i = 0; i < requestHeaders.Count; i++)
request.SetRequestHeader(requestHeaders[i].Item1, requestHeaders[i].Item2);
}

// Start the request asynchronously
var asyncOperation = request.SendWebRequest();
float lastProgress = 0f;
// Continue updating progress until the request is completed
while (!asyncOperation.isDone)
{
float currentProgress = request.downloadProgress;
// Check if progress has changed
if (currentProgress != lastProgress && callback != null)
{
callback?.Invoke(ConvertContent(request.downloadHandler.text, getContent));
lastProgress = currentProgress;
}
// Wait for the next frame
await Task.Yield();
}
WIPRequests.Remove(request);
if (request.result == UnityWebRequest.Result.Success)
{
result = ConvertContent(request.downloadHandler.text, getContent);
error = null;
break;
}
else
{
result = default;
error = request.error;
}
// Wait for the next frame
await Task.Yield();
}
WIPRequests.Remove(request);
if (request.result != UnityWebRequest.Result.Success) LLMUnitySetup.LogError(request.error);
else result = ConvertContent(request.downloadHandler.text, getContent);
callback?.Invoke(result);
tryNr--;
}

if (error != null) LLMUnitySetup.LogError(error);
callback?.Invoke(result);
return result;
}

Expand Down
Loading