You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: README.md
+3-1
Original file line number
Diff line number
Diff line change
@@ -376,7 +376,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
376
376
-`Debug` select to log the output of the model in the Unity Editor
377
377
- <details><summary>Advanced options</summary>
378
378
379
-
-`Parallel Prompts` number of prompts that can happen in parallel (default: -1 = number of LLMCharacter objects)
379
+
- <details><summary><code>Parallel Prompts</code> number of prompts / slots that can happen in parallel (default: -1 = number of LLMCharacter objects). Note that the context size is divided among the slots.</summary> If you want to retain as much context for the LLM and don't need all the characters present at the same time, you can set this number and specify the slot for each LLMCharacter object.
380
+
e.g. Setting `Parallel Prompts` to 1 and slot 0 for all LLMCharacter objects will use the full context, but the entire prompt will need to be computed (no caching) whenever a LLMCharacter object is used for chat. </details>
380
381
-`Dont Destroy On Load` select to not destroy the LLM GameObject when loading a new Scene
381
382
382
383
</details>
@@ -441,6 +442,7 @@ If it is not selected, the full reply from the model is received in one go
441
442
-`Load grammar` click to load a grammar in .gbnf format
442
443
-`Grammar` the path of the grammar being used (relative to the Assets/StreamingAssets folder)
443
444
- <details><summary><code>Cache Prompt</code> save the ongoing prompt from the chat (default: true)</summary> Saves the prompt while it is being created by the chat to avoid reprocessing the entire prompt every time</details>
445
+
-`Slot` slot of the server to use for computation. Value can be set from 0 to `Parallel Prompts`-1 (default: -1 = new slot for each character)
444
446
-`Seed` seed for reproducibility. For random results every time use -1
445
447
- <details><summary><code>Num Predict</code> maximum number of tokens to predict (default: 256, -1 = infinity, -2 = until context filled)</summary>This is the maximum amount of tokens the model will maximum predict. When N tokens are reached the model will stop generating. This means words / sentences might not get finished if this is too low. </details>
446
448
- <details><summary><code>Temperature</code> LLM temperature, lower values give more deterministic answers (default: 0.2)</summary>The temperature setting adjusts how random the generated responses are. Turning it up makes the generated choices more varied and unpredictable. Turning it down makes the generated responses more predictable and focused on the most likely options.</details>
Copy file name to clipboardExpand all lines: Runtime/LLMCharacter.cs
+8-5
Original file line number
Diff line number
Diff line change
@@ -45,6 +45,8 @@ public class LLMCharacter : MonoBehaviour
45
45
[ModelAdvanced]publicstringgrammar=null;
46
46
/// <summary> option to cache the prompt as it is being created by the chat to avoid reprocessing the entire prompt every time (default: true) </summary>
47
47
[ModelAdvanced]publicboolcachePrompt=true;
48
+
/// <summary> specify which slot of the server to use for computation (affects caching) </summary>
49
+
[ModelAdvanced]publicintslot=-1;
48
50
/// <summary> seed for reproducibility. For random results every time set to -1. </summary>
49
51
[ModelAdvanced]publicintseed=0;
50
52
/// <summary> number of tokens to predict (-1 = infinity, -2 = until context filled).
@@ -123,7 +125,6 @@ public class LLMCharacter : MonoBehaviour
LLMUnitySetup.LogError($"No LLM assigned or detected for LLMCharacter {name}!");
150
151
return;
151
152
}
152
-
id_slot=llm.Register(this);
153
+
intslotFromServer=llm.Register(this);
154
+
if(slot==-1)slot=slotFromServer;
153
155
}
154
156
155
157
InitGrammar();
@@ -159,6 +161,7 @@ public void Awake()
159
161
voidOnValidate()
160
162
{
161
163
AssignLLM();
164
+
if(llm!=null&&llm.parallelPrompts>-1&&(slot<-1||slot>=llm.parallelPrompts))LLMUnitySetup.LogError($"The slot needs to be between 0 and {llm.parallelPrompts-1}, or -1 to be automatically set");
0 commit comments