Skip to content

Commit 0f56a0a

Browse files
committed
allow to set the slot of LLMCharacter
1 parent 6973753 commit 0f56a0a

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
376376
- `Debug` select to log the output of the model in the Unity Editor
377377
- <details><summary>Advanced options</summary>
378378

379-
- `Parallel Prompts` number of prompts that can happen in parallel (default: -1 = number of LLMCharacter objects)
379+
- <details><summary><code>Parallel Prompts</code> number of prompts / slots that can happen in parallel (default: -1 = number of LLMCharacter objects). Note that the context size is divided among the slots.</summary> If you want to retain as much context for the LLM and don't need all the characters present at the same time, you can set this number and specify the slot for each LLMCharacter object.
380+
e.g. Setting `Parallel Prompts` to 1 and slot 0 for all LLMCharacter objects will use the full context, but the entire prompt will need to be computed (no caching) whenever a LLMCharacter object is used for chat. </details>
380381
- `Dont Destroy On Load` select to not destroy the LLM GameObject when loading a new Scene
381382

382383
</details>
@@ -441,6 +442,7 @@ If it is not selected, the full reply from the model is received in one go
441442
- `Load grammar` click to load a grammar in .gbnf format
442443
- `Grammar` the path of the grammar being used (relative to the Assets/StreamingAssets folder)
443444
- <details><summary><code>Cache Prompt</code> save the ongoing prompt from the chat (default: true)</summary> Saves the prompt while it is being created by the chat to avoid reprocessing the entire prompt every time</details>
445+
- `Slot` slot of the server to use for computation. Value can be set from 0 to `Parallel Prompts`-1 (default: -1 = new slot for each character)
444446
- `Seed` seed for reproducibility. For random results every time use -1
445447
- <details><summary><code>Num Predict</code> maximum number of tokens to predict (default: 256, -1 = infinity, -2 = until context filled)</summary>This is the maximum amount of tokens the model will maximum predict. When N tokens are reached the model will stop generating. This means words / sentences might not get finished if this is too low. </details>
446448
- <details><summary><code>Temperature</code> LLM temperature, lower values give more deterministic answers (default: 0.2)</summary>The temperature setting adjusts how random the generated responses are. Turning it up makes the generated choices more varied and unpredictable. Turning it down makes the generated responses more predictable and focused on the most likely options.</details>

Runtime/LLM.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,9 @@ private void StartService()
449449
public int Register(LLMCharacter llmCharacter)
450450
{
451451
clients.Add(llmCharacter);
452-
return clients.IndexOf(llmCharacter);
452+
int index = clients.IndexOf(llmCharacter);
453+
if (parallelPrompts != -1) return index % parallelPrompts;
454+
return index;
453455
}
454456

455457
protected int GetNumClients()

Runtime/LLMCharacter.cs

+8-5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public class LLMCharacter : MonoBehaviour
4545
[ModelAdvanced] public string grammar = null;
4646
/// <summary> option to cache the prompt as it is being created by the chat to avoid reprocessing the entire prompt every time (default: true) </summary>
4747
[ModelAdvanced] public bool cachePrompt = true;
48+
/// <summary> specify which slot of the server to use for computation (affects caching) </summary>
49+
[ModelAdvanced] public int slot = -1;
4850
/// <summary> seed for reproducibility. For random results every time set to -1. </summary>
4951
[ModelAdvanced] public int seed = 0;
5052
/// <summary> number of tokens to predict (-1 = infinity, -2 = until context filled).
@@ -123,7 +125,6 @@ public class LLMCharacter : MonoBehaviour
123125
private string chatTemplate;
124126
private ChatTemplate template = null;
125127
public string grammarString;
126-
protected int id_slot = -1;
127128
private List<(string, string)> requestHeaders = new List<(string, string)> { ("Content-Type", "application/json") };
128129
private List<UnityWebRequest> WIPRequests = new List<UnityWebRequest>();
129130
/// \endcond
@@ -149,7 +150,8 @@ public void Awake()
149150
LLMUnitySetup.LogError($"No LLM assigned or detected for LLMCharacter {name}!");
150151
return;
151152
}
152-
id_slot = llm.Register(this);
153+
int slotFromServer = llm.Register(this);
154+
if (slot == -1) slot = slotFromServer;
153155
}
154156

155157
InitGrammar();
@@ -159,6 +161,7 @@ public void Awake()
159161
void OnValidate()
160162
{
161163
AssignLLM();
164+
if (llm != null && llm.parallelPrompts > -1 && (slot < -1 || slot >= llm.parallelPrompts)) LLMUnitySetup.LogError($"The slot needs to be between 0 and {llm.parallelPrompts-1}, or -1 to be automatically set");
162165
}
163166

164167
void Reset()
@@ -358,7 +361,7 @@ ChatRequest GenerateRequest(string prompt)
358361
ChatRequest chatRequest = new ChatRequest();
359362
if (debugPrompt) LLMUnitySetup.Log(prompt);
360363
chatRequest.prompt = prompt;
361-
chatRequest.id_slot = id_slot;
364+
chatRequest.id_slot = slot;
362365
chatRequest.temperature = temperature;
363366
chatRequest.top_k = topK;
364367
chatRequest.top_p = topP;
@@ -613,7 +616,7 @@ public async Task<List<float>> Embeddings(string query, Callback<List<float>> ca
613616
private async Task<string> Slot(string filepath, string action)
614617
{
615618
SlotRequest slotRequest = new SlotRequest();
616-
slotRequest.id_slot = id_slot;
619+
slotRequest.id_slot = slot;
617620
slotRequest.filepath = filepath;
618621
slotRequest.action = action;
619622
string json = JsonUtility.ToJson(slotRequest);
@@ -683,7 +686,7 @@ protected Ret ConvertContent<Res, Ret>(string response, ContentCallback<Res, Ret
683686

684687
protected void CancelRequestsLocal()
685688
{
686-
if (id_slot >= 0) llm.CancelRequest(id_slot);
689+
if (slot >= 0) llm.CancelRequest(slot);
687690
}
688691

689692
protected void CancelRequestsRemote()

0 commit comments

Comments
 (0)