Skip to content

Commit c4c7488

Browse files
committed
remove support for extras (flash attention, iQ quants
1 parent f488839 commit c4c7488

File tree

6 files changed

+4
-49
lines changed

6 files changed

+4
-49
lines changed

Editor/LLMEditor.cs

-7
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ public override void AddModelSettings(SerializedObject llmScriptSO)
111111
if (llmScriptSO.FindProperty("advancedOptions").boolValue)
112112
{
113113
attributeClasses.Add(typeof(ModelAdvancedAttribute));
114-
if (LLMUnitySetup.FullLlamaLib) attributeClasses.Add(typeof(ModelExtrasAttribute));
115114
}
116115
ShowPropertiesOfClass("", llmScriptSO, attributeClasses, false);
117116
Space();
@@ -445,18 +444,12 @@ private void CopyToClipboard(string text)
445444
te.Copy();
446445
}
447446

448-
public void AddExtrasToggle()
449-
{
450-
if (ToggleButton("Use extras", LLMUnitySetup.FullLlamaLib)) LLMUnitySetup.SetFullLlamaLib(!LLMUnitySetup.FullLlamaLib);
451-
}
452-
453447
public override void AddOptionsToggles(SerializedObject llmScriptSO)
454448
{
455449
AddDebugModeToggle();
456450

457451
EditorGUILayout.BeginHorizontal();
458452
AddAdvancedOptionsToggle(llmScriptSO);
459-
AddExtrasToggle();
460453
EditorGUILayout.EndHorizontal();
461454
Space();
462455
}

README.md

+1-4
Original file line numberDiff line numberDiff line change
@@ -499,8 +499,7 @@ Save the scene, run and enjoy!
499499
### LLM Settings
500500

501501
- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
502-
- `Log Level` select how verbose the log messages are
503-
- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
502+
- `Log Level` select how verbose the log messages arequants)
504503

505504
#### 💻 Setup Settings
506505

@@ -551,7 +550,6 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
551550
- `Chat Template` the chat template being used for the LLM
552551
- `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder)
553552
- `Lora Weights` the weights of the LoRAs being used
554-
- `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled)
555553

556554
</details>
557555

@@ -566,7 +564,6 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
566564

567565
- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below
568566
- `Log Level` select how verbose the log messages are
569-
- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants)
570567

571568
#### 💻 Setup Settings
572569
<div>

Runtime/LLM.cs

-3
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ public class LLM : MonoBehaviour
6161
[ModelAdvanced] public string lora = "";
6262
/// <summary> the weights of the LORA models being used.</summary>
6363
[ModelAdvanced] public string loraWeights = "";
64-
/// <summary> enable use of flash attention </summary>
65-
[ModelExtras] public bool flashAttention = false;
6664

6765
/// <summary> API key to use for the server (optional) </summary>
6866
public string APIKey;
@@ -435,7 +433,6 @@ protected virtual string GetLlamaccpArguments()
435433
if (numThreadsToUse > 0) arguments += $" -t {numThreadsToUse}";
436434
arguments += loraArgument;
437435
arguments += $" -ngl {numGPULayers}";
438-
if (LLMUnitySetup.FullLlamaLib && flashAttention) arguments += $" --flash-attn";
439436
if (remote)
440437
{
441438
arguments += $" --port {port} --host 0.0.0.0";

Runtime/LLMBuilder.cs

-2
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,6 @@ public static void HideLibraryPlatforms(string platform)
161161
foreach (string platformPrefix in platforms)
162162
{
163163
bool move = sourceName.StartsWith(platformPrefix);
164-
move = move || (sourceName.Contains("cuda") && !sourceName.Contains("full") && LLMUnitySetup.FullLlamaLib);
165-
move = move || (sourceName.Contains("cuda") && sourceName.Contains("full") && !LLMUnitySetup.FullLlamaLib);
166164
if (move)
167165
{
168166
string target = Path.Combine(BuildTempDir, sourceName);

Runtime/LLMLib.cs

+3-12
Original file line numberDiff line numberDiff line change
@@ -462,18 +462,9 @@ public static List<string> PossibleArchitectures(bool gpu = false)
462462
{
463463
if (gpu)
464464
{
465-
if (LLMUnitySetup.FullLlamaLib)
466-
{
467-
architectures.Add("cuda-cu12.2.0-full");
468-
architectures.Add("cuda-cu11.7.1-full");
469-
architectures.Add("hip-full");
470-
}
471-
else
472-
{
473-
architectures.Add("cuda-cu12.2.0");
474-
architectures.Add("cuda-cu11.7.1");
475-
architectures.Add("hip");
476-
}
465+
architectures.Add("cuda-cu12.2.0");
466+
architectures.Add("cuda-cu11.7.1");
467+
architectures.Add("hip");
477468
architectures.Add("vulkan");
478469
}
479470
if (has_avx512) architectures.Add("avx512");

Runtime/LLMUnitySetup.cs

-21
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ public class LocalRemoteAttribute : PropertyAttribute {}
5959
public class RemoteAttribute : PropertyAttribute {}
6060
public class LocalAttribute : PropertyAttribute {}
6161
public class ModelAttribute : PropertyAttribute {}
62-
public class ModelExtrasAttribute : PropertyAttribute {}
6362
public class ChatAttribute : PropertyAttribute {}
6463
public class LLMUnityAttribute : PropertyAttribute {}
6564

@@ -112,8 +111,6 @@ public class LLMUnitySetup
112111
public static string libraryPath = GetAssetPath(libraryName);
113112
/// <summary> LlamaLib url </summary>
114113
public static string LlamaLibURL = $"{LlamaLibReleaseURL}/{libraryName}.zip";
115-
/// <summary> LlamaLib extension url </summary>
116-
public static string LlamaLibExtensionURL = $"{LlamaLibReleaseURL}/{libraryName}-full.zip";
117114
/// <summary> LLMnity store path </summary>
118115
public static string LLMUnityStore = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "LLMUnity");
119116
/// <summary> Model download path </summary>
@@ -153,8 +150,6 @@ public class LLMUnitySetup
153150
/// \cond HIDE
154151
[LLMUnity] public static DebugModeType DebugMode = DebugModeType.All;
155152
static string DebugModeKey = "DebugMode";
156-
public static bool FullLlamaLib = false;
157-
static string FullLlamaLibKey = "FullLlamaLib";
158153
static List<Callback<string>> errorCallbacks = new List<Callback<string>>();
159154
static readonly object lockObject = new object();
160155
static Dictionary<string, Task> androidExtractTasks = new Dictionary<string, Task>();
@@ -189,7 +184,6 @@ public static void LogError(string message)
189184
static void LoadPlayerPrefs()
190185
{
191186
DebugMode = (DebugModeType)PlayerPrefs.GetInt(DebugModeKey, (int)DebugModeType.All);
192-
FullLlamaLib = PlayerPrefs.GetInt(FullLlamaLibKey, 0) == 1;
193187
}
194188

195189
public static void SetDebugMode(DebugModeType newDebugMode)
@@ -200,18 +194,6 @@ public static void SetDebugMode(DebugModeType newDebugMode)
200194
PlayerPrefs.Save();
201195
}
202196

203-
#if UNITY_EDITOR
204-
public static void SetFullLlamaLib(bool value)
205-
{
206-
if (FullLlamaLib == value) return;
207-
FullLlamaLib = value;
208-
PlayerPrefs.SetInt(FullLlamaLibKey, value ? 1 : 0);
209-
PlayerPrefs.Save();
210-
_ = DownloadLibrary();
211-
}
212-
213-
#endif
214-
215197
public static string GetLibraryName(string version)
216198
{
217199
return $"undreamai-{version}-llamacpp";
@@ -452,9 +434,6 @@ static async Task DownloadLibrary()
452434

453435
// setup LlamaLib in StreamingAssets
454436
await DownloadAndExtractInsideDirectory(LlamaLibURL, libraryPath, setupDir);
455-
456-
// setup LlamaLib extras in StreamingAssets
457-
if (FullLlamaLib) await DownloadAndExtractInsideDirectory(LlamaLibExtensionURL, libraryPath, setupDir);
458437
}
459438
catch (Exception e)
460439
{

0 commit comments

Comments
 (0)