Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ PROJECT_NAME = "LlamaLib"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = v2.0.3
PROJECT_NUMBER = v2.0.4

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
2 changes: 1 addition & 1 deletion .github/tests/csharp-dotnet/Program.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="LlamaLib" Version="2.0.3" />
<PackageReference Include="LlamaLib" Version="2.0.4" />
</ItemGroup>

</Project>
2 changes: 2 additions & 0 deletions .github/workflows/build_library.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,7 @@ jobs:
./.github/scripts/release.sh release
mv release ${{ env.RELEASE_NAME }}
zip -r ${{ env.RELEASE_NAME }}.zip ${{ env.RELEASE_NAME }}
sha256sum ${{ env.RELEASE_NAME }}.zip | awk '{print $1}' > ${{ env.RELEASE_NAME }}.zip.sha256

- id: test_cmake
name: Test cmake
Expand Down Expand Up @@ -785,6 +786,7 @@ jobs:
with:
files: |
${{ env.RELEASE_NAME }}.zip
${{ env.RELEASE_NAME }}.zip.sha256
csharp/bin/Release/LlamaLib*nupkg
name: Release ${{ env.VERSION }}

2 changes: 2 additions & 0 deletions .github/workflows_template/build_library_steps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ jobs:
./.github/scripts/release.sh release
mv release ${{ env.RELEASE_NAME }}
zip -r ${{ env.RELEASE_NAME }}.zip ${{ env.RELEASE_NAME }}
sha256sum ${{ env.RELEASE_NAME }}.zip | awk '{print $1}' > ${{ env.RELEASE_NAME }}.zip.sha256

- name: Test cmake
id: test_cmake
Expand Down Expand Up @@ -325,4 +326,5 @@ jobs:
name: "Release ${{ env.VERSION }}"
files: |
${{ env.RELEASE_NAME }}.zip
${{ env.RELEASE_NAME }}.zip.sha256
csharp/bin/Release/LlamaLib*nupkg
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
[![LinkedIn](https://img.shields.io/badge/LinkedIn-blue?style=flat&logo=linkedin&labelColor=blue)](https://www.linkedin.com/company/undreamai)
[![GitHub Repo stars](https://img.shields.io/github/stars/undreamai/LlamaLib?style=flat&logo=github&color=f5f5f5)](https://github.com/undreamai/LlamaLib)
[![Documentation](https://img.shields.io/badge/Docs-white.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwEAYAAAAHkiXEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAATqSURBVHic7ZtbiE1RGMc349K4M5EwklwjzUhJCMmTJPJAYjQXJJcH8+Blkry4lPJA8aAoJbekDLmUS6E8SHJL5AW5JPf77eHv93C22Wfttc/ee+0zc/4vv+bMXvusvfZa3/q+b33H80oqqaSSSmqrKnPdgXjUvbvYq5f4+7f486eb/rRajRsn7t4tPngg/vol/vkj/vghXr0q7tghzpyZ//79+on79omXLombNondukXrd9GoSxdx8mSxqUm8eVNkgAvl0aPioEFip07i6dP52z15Ig4fbvVY2VVFhbhokXjrlogJiWvAg/jwoXjqVO73+leUny9eiFVV5mfMlLDRBw+KX76ISQ+0LZ8/F00v4uJFsWPHFh83O+rdWzx3TnQ9wCZ+/Sqyl5iux1RmTu3aiYcPi64H1pasALypoOv4/8SJXraEbXc9kLbECxo2TKyuFj9/zt9u+XIvG8LWv3wpuh5QW86f3/JznT+fv93s2S23C1Z72wbhtH692LdvMvdPSgzkhAkiJhT16ZO/PRPOmcr+Rda4aa5nclTeuZP7PDgRpr1g40bPrQYOFF0PYKHEC+raVVy8OFy7R49EArvURU4mrUAqaTY0iB8/2rXD+XCm5mbR9QAWylevorV7/VpkL0ld06eLpkiyWPj9u93179+LpFZwZ1PXtGnitWui64GMStPmG7SH1NSIJBNHjvTSFZvRvHlise0N9JcBtW1/44Y4dqx45IjnU0JxAGLpklPx+9VZFwPp/9v/eZDGjxcZh7dv4+mXtch+up7Rca+MsJvxiRNi6nvBhg25HWprZMaPGeOlqxEjxGKz+XGRTAAmyJnq6sR370TXA2NLW+8HNjZ62dLOnaLrAQ1r2zmqPH482n0mTfJCKmEvCJHUooNZE/369Elct06kqiKsONRfulTEFDsX8QDlIa5nup9374pE8IiZHPY+ly+LZE/37/cM6mC6IB6Vl4urV6fzfUG6d0/csyf37wsXRFInaM4ckTjGdPg+apTYs6dI3RIWwH//1DV1qkiuxNY2FzrTd+2y6y8z2HQU6efZs+KBAyJZ4v+V0h6ArlwROaQP0uPH4ooV4sqV8Xz/4MF211M2wwoOq1mzRAq5Pnywa5+4KDHE9mI7ly0TO3fOvZ6/eZCoKwB32HS0SMFV1DNtImBKHYstBROoQ4fEQk2RaS+qrxejmj5M7NatIhWARS82xUJfAKahzFcdPnq0GLYgy7Rnbd8e6rGKRyzpuNzPBQty709RcNSZf/KkuHCh2GpMDyKbGNcLYE+YMkVks336NFx7XhTZ3szXiBaqtWvFuAOxM2dEZiyH8UErgc8JLNun7E0aFffSI7RP6owZmz9kSO73HjsmXr8ukppYsybSYyQvBp5QfOjQ3M9tRR496pGgLf1JtLlzRZJzlFzGp4SWDnUxFCrdvy+uWiWa3DJe3N69oj8uSEq8CER88uaNOGBAOv2ILGY69TBBJoM8O0t72zaRoztXBzlLlrT8XARW/IQq82JTMv3mKmv0/9CC4mJMYPwrMSETxAyurRUxQVmXP1fEid7mzeK3b+n2Jzb16CFu2SIWmtNJiriVxANsyq0uoCJfTk4G9y4t24/bSQ0rTkP6gVTG3mz//uKMGSK/ucId5Xe9lZUi5eMMLGUgz56J5Hxu3xZ50Xg3RMIltVn9BRja26PYsBHgAAAAAElFTkSuQmCC)](https://undream.ai/LlamaLib)
[![llama.cpp](https://img.shields.io/badge/llama.cpp-b7777-blue?style=flat)](https://github.com/ggerganov/llama.cpp)

LlamaLib is a **high-level C++ and C#** library for running Large Language Models (LLMs) **anywhere** - from PCs to mobile devices and VR headsets.<br>
It is built on top of the awesome [llama.cpp](https://github.com/ggerganov/llama.cpp) library.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.3
2.0.4
2 changes: 1 addition & 1 deletion cmake/LlamaLibConfigVersion.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version.
# The variable CVF_VERSION must be set before calling configure_file().

set(PACKAGE_VERSION "2.0.3")
set(PACKAGE_VERSION "2.0.4")

if (PACKAGE_FIND_VERSION_RANGE)
# Package version must be in the requested version range
Expand Down
41 changes: 41 additions & 0 deletions cpp_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,47 @@ agent.remove_last_message();
agent.remove_last_message();
```

#### Context Overflow Management

When conversation history grows large enough to exceed the model's context window, the agent can automatically handle it using a configurable strategy.

```cpp
void set_overflow_strategy(
ContextOverflowStrategy strategy, // None, Truncate, or Summarize
float target_ratio = 0.5f, // target fill ratio of context after truncation (0.0–1.0)
const std::string &summarize_prompt = "..." // custom summarization prompt (default provided)
);
```

**Strategies:**

| Value | Behaviour |
|-------|-----------|
| `ContextOverflowStrategy::None` | No protection — may crash if context is exceeded |
| `ContextOverflowStrategy::Truncate` | Drops oldest message pairs until history fits within `target_ratio` of the context |
| `ContextOverflowStrategy::Summarize` | Asks the LLM to summarise history, embeds summary in the system message, then truncates if still needed |

**Example:**
```cpp
// Truncate: drop oldest pairs when context is full, keeping ≤50% filled
agent.set_overflow_strategy(ContextOverflowStrategy::Truncate, 0.5f);

// Summarize: condense history into a rolling summary in the system prompt
agent.set_overflow_strategy(ContextOverflowStrategy::Summarize);

// Summarize with a custom prompt
agent.set_overflow_strategy(
ContextOverflowStrategy::Summarize,
0.5f,
"Summarize the key facts from this conversation briefly:\n\n"
);

// The rolling summary is automatically saved with save_history / loaded with load_history.
// You can also access or replace it directly:
std::string summary = agent.get_summary();
agent.set_summary("Alice introduced herself as a software engineer.");
```

#### System Prompt

```cpp
Expand Down
44 changes: 44 additions & 0 deletions csharp/LLMAgent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ public override string ToString()
}
}

/// <summary>Strategy to apply when the chat history would exceed the model's context window.</summary>
public enum ContextOverflowStrategy
{
/// <summary>No automatic handling — may crash if context is exceeded.</summary>
None = 0,
/// <summary>Remove oldest message pairs from the front until history fits within targetRatio of the context.</summary>
Truncate = 1,
/// <summary>Summarise history, embed summary in the system message, then truncate if still needed.</summary>
Summarize = 2
}

// LLMAgent class
public class LLMAgent : LLMLocal
{
Expand Down Expand Up @@ -214,6 +225,39 @@ public int GetHistorySize()
return llamaLib.LLMAgent_Get_History_Size(llm);
}

// Context overflow management

/// <summary>
/// Configure how the agent handles context overflow.
/// </summary>
/// <param name="strategy">Overflow strategy to use.</param>
/// <param name="targetRatio">Target fill ratio of context after truncation (0.0–1.0, default 0.5).</param>
/// <param name="summarizePrompt">Custom prompt for summarization; null uses the built-in default.</param>
public void SetOverflowStrategy(ContextOverflowStrategy strategy, float targetRatio = 0.5f, string summarizePrompt = null)
{
CheckLlamaLib();
llamaLib.LLMAgent_Set_Overflow_Strategy(llm, (int)strategy, targetRatio, summarizePrompt);
}

/// <summary>
/// Get the current rolling summary generated by the Summarize overflow strategy.
/// Returns an empty string if no summary has been generated yet.
/// </summary>
public string GetSummary()
{
CheckLlamaLib();
return llamaLib.LLMAgent_Get_Summary(llm) ?? string.Empty;
}

/// <summary>
/// Set the rolling summary directly, e.g. to restore state after loading from file.
/// </summary>
public void SetSummary(string summary)
{
CheckLlamaLib();
llamaLib.LLMAgent_Set_Summary(llm, summary ?? string.Empty);
}

// Chat functionality
public string Chat(string userPrompt, bool addToHistory = true, LlamaLib.CharArrayCallback callback = null, bool returnResponseJson = false, bool debugPrompt = false)
{
Expand Down
62 changes: 52 additions & 10 deletions csharp/LlamaLib.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,15 @@ public delegate IntPtr LLMAgent_Chat_Delegate(IntPtr llm,
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate int LLMAgent_Get_History_Size_Delegate(IntPtr llm);

[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void LLMAgent_Set_Overflow_Strategy_Delegate(IntPtr llm, int strategy, float targetRatio, [MarshalAs(UnmanagedType.LPStr)] string summarizePrompt);

[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate IntPtr LLMAgent_Get_Summary_Delegate(IntPtr llm);

[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void LLMAgent_Set_Summary_Delegate(IntPtr llm, [MarshalAs(UnmanagedType.LPStr)] string summary);

//################################################## FUNCTION POINTERS ##################################################//

// Main lib
Expand Down Expand Up @@ -235,13 +244,16 @@ public delegate IntPtr LLMAgent_Chat_Delegate(IntPtr llm,
public LLMAgent_Save_History_Delegate LLMAgent_Save_History_Internal;
public LLMAgent_Load_History_Delegate LLMAgent_Load_History_Internal;
public LLMAgent_Get_History_Size_Delegate LLMAgent_Get_History_Size_Internal;
public LLMAgent_Set_Overflow_Strategy_Delegate LLMAgent_Set_Overflow_Strategy_Internal;
public LLMAgent_Get_Summary_Delegate LLMAgent_Get_Summary_Internal;
public LLMAgent_Set_Summary_Delegate LLMAgent_Set_Summary_Internal;

//################################################## STATUS CHECKING WRAPPER ##################################################//

public void CheckStatus(bool crashesOnly = false)
{
int status = LLM_Status_Code_Internal();
if (status > 0 || (status < 0 && !crashesOnly))
if (status < 0 || (status > 0 && !crashesOnly))
{
string msg = Marshal.PtrToStringAnsi(LLM_Status_Message_Internal()) ?? "";
throw new InvalidOperationException($"LlamaLib error {status}: {msg}");
Expand Down Expand Up @@ -328,6 +340,9 @@ public IntPtr LLMAgent_Chat(IntPtr llm, string userPrompt, bool addToHistory = t
public void LLMAgent_Save_History(IntPtr llm, string filepath) => CallWithStatus(() => LLMAgent_Save_History_Internal(llm, filepath));
public void LLMAgent_Load_History(IntPtr llm, string filepath) => CallWithStatus(() => LLMAgent_Load_History_Internal(llm, filepath));
public int LLMAgent_Get_History_Size(IntPtr llm) => CallWithStatus(() => LLMAgent_Get_History_Size_Internal(llm));
public void LLMAgent_Set_Overflow_Strategy(IntPtr llm, int strategy, float targetRatio, string summarizePrompt = null) => CallWithStatus(() => LLMAgent_Set_Overflow_Strategy_Internal(llm, strategy, targetRatio, summarizePrompt));
public string LLMAgent_Get_Summary(IntPtr llm) => Marshal.PtrToStringAnsi(CallWithStatus(() => LLMAgent_Get_Summary_Internal(llm)));
public void LLMAgent_Set_Summary(IntPtr llm, string summary) => CallWithStatus(() => LLMAgent_Set_Summary_Internal(llm, summary));

//################################################## MOBILE IMPLEMENTATION ##################################################//

Expand Down Expand Up @@ -537,6 +552,15 @@ public static extern IntPtr LLMAgent_Chat_Static(IntPtr llm,
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLMAgent_Get_History_Size")]
public static extern int LLMAgent_Get_History_Size_Static(IntPtr llm);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLMAgent_Set_Overflow_Strategy")]
public static extern void LLMAgent_Set_Overflow_Strategy_Static(IntPtr llm, int strategy, float targetRatio, [MarshalAs(UnmanagedType.LPStr)] string summarizePrompt);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLMAgent_Get_Summary")]
public static extern IntPtr LLMAgent_Get_Summary_Static(IntPtr llm);

[DllImport(DllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLMAgent_Set_Summary")]
public static extern void LLMAgent_Set_Summary_Static(IntPtr llm, [MarshalAs(UnmanagedType.LPStr)] string summary);

public static IntPtr Available_Architectures([MarshalAs(UnmanagedType.I1)] bool gpu) { return IntPtr.Zero; }
public static bool Has_GPU_Layers([MarshalAs(UnmanagedType.LPStr)] string command) { return false; }

Expand Down Expand Up @@ -603,6 +627,9 @@ public LlamaLib(bool gpu = false)
LLMAgent_Save_History_Internal = (llm, filepath) => LLMAgent_Save_History_Static(llm, filepath);
LLMAgent_Load_History_Internal = (llm, filepath) => LLMAgent_Load_History_Static(llm, filepath);
LLMAgent_Get_History_Size_Internal = (llm) => LLMAgent_Get_History_Size_Static(llm);
LLMAgent_Set_Overflow_Strategy_Internal = (llm, strategy, targetRatio, summarizePrompt) => LLMAgent_Set_Overflow_Strategy_Static(llm, strategy, targetRatio, summarizePrompt);
LLMAgent_Get_Summary_Internal = (llm) => LLMAgent_Get_Summary_Static(llm);
LLMAgent_Set_Summary_Internal = (llm, summary) => LLMAgent_Set_Summary_Static(llm, summary);
}

public void Dispose() {}
Expand All @@ -618,7 +645,7 @@ public void Dispose() {}
private List<IntPtr> dependencyHandles = new List<IntPtr>();
private static int debugLevelGlobal = 0;
private static CharArrayCallback loggingCallbackGlobal = null;
private string[] availableLibraries = null;
private List<Tuple<string, bool>> availableLibraries = null;
private int currentLibraryIndex = 0;

// Runtime lib
Expand Down Expand Up @@ -724,10 +751,7 @@ private string GetRuntimeLibraryPath()
private string[] GetAvailableArchitectures(bool gpu)
{
string architecturesString = Marshal.PtrToStringAnsi(Available_Architectures(gpu));
if (string.IsNullOrEmpty(architecturesString))
{
throw new InvalidOperationException("No architectures available for the specified GPU setting.");
}
if (string.IsNullOrEmpty(architecturesString)) return new string[0];

string[] librariesOptions = architecturesString.Split(',');
List<string> libraries = new List<string>();
Expand All @@ -751,12 +775,20 @@ private string[] GetAvailableArchitectures(bool gpu)

private void LoadLibraries(bool gpu)
{
availableLibraries = GetAvailableArchitectures(gpu);
availableLibraries = new List<Tuple<string, bool>>();
bool[] arch_options = gpu ? new bool[] { true, false }: new bool[] { false };
foreach (bool arch_gpu in arch_options)
{
string[] archs = GetAvailableArchitectures(arch_gpu);
foreach (string arch in archs) availableLibraries.Add(new Tuple<string, bool>(arch, arch_gpu));
}
currentLibraryIndex = -1;

if (!TryNextLibrary())
{
throw new InvalidOperationException($"Failed to load any library. Available libraries: {string.Join(", ", availableLibraries)}");
string libs = "";
foreach (Tuple<string, bool> arch in availableLibraries) libs += arch.Item1 + ", ";
throw new InvalidOperationException($"Failed to load any library. Available libraries: {libs.TrimEnd(',', ' ')}");
}
}

Expand Down Expand Up @@ -797,9 +829,9 @@ public bool TryNextLibrary()
libraryHandle = IntPtr.Zero;
}

while (++currentLibraryIndex < availableLibraries.Length)
while (++currentLibraryIndex < availableLibraries.Count)
{
string library = availableLibraries[currentLibraryIndex];
var (library, is_gpu_library) = availableLibraries[currentLibraryIndex];
try
{
string libraryPath = FindLibrary(library.Trim());
Expand All @@ -812,6 +844,8 @@ public bool TryNextLibrary()
libraryHandle = LibraryLoader.LoadLibrary(libraryPath);

LoadFunctionPointers();
if (is_gpu_library && !LLMService_Supports_GPU()) continue;

architecture = library.Trim();
if (debugLevelGlobal > 0) Console.WriteLine("Successfully loaded: " + libraryPath);
return true;
Expand Down Expand Up @@ -858,6 +892,7 @@ private void LoadFunctionPointers()
LLM_Debug = LibraryLoader.GetSymbolDelegate<LLM_Debug_Delegate>(libraryHandle, "LLM_Debug");
LLM_Logging_Callback = LibraryLoader.GetSymbolDelegate<LLM_Logging_Callback_Delegate>(libraryHandle, "LLM_Logging_Callback");
LLM_Logging_Stop = LibraryLoader.GetSymbolDelegate<LLM_Logging_Stop_Delegate>(libraryHandle, "LLM_Logging_Stop");
LLMService_Supports_GPU = LibraryLoader.GetSymbolDelegate<LLMService_Supports_GPU_Delegate>(libraryHandle, "LLMService_Supports_GPU");

LLM_Enable_Reasoning_Internal = LibraryLoader.GetSymbolDelegate<LLM_Enable_Reasoning_Delegate>(libraryHandle, "LLM_Enable_Reasoning");
LLM_Apply_Template_Internal = LibraryLoader.GetSymbolDelegate<LLM_Apply_Template_Delegate>(libraryHandle, "LLM_Apply_Template");
Expand Down Expand Up @@ -908,6 +943,9 @@ private void LoadFunctionPointers()
LLMAgent_Save_History_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Save_History_Delegate>(libraryHandle, "LLMAgent_Save_History");
LLMAgent_Load_History_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Load_History_Delegate>(libraryHandle, "LLMAgent_Load_History");
LLMAgent_Get_History_Size_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Get_History_Size_Delegate>(libraryHandle, "LLMAgent_Get_History_Size");
LLMAgent_Set_Overflow_Strategy_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Set_Overflow_Strategy_Delegate>(libraryHandle, "LLMAgent_Set_Overflow_Strategy");
LLMAgent_Get_Summary_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Get_Summary_Delegate>(libraryHandle, "LLMAgent_Get_Summary");
LLMAgent_Set_Summary_Internal = LibraryLoader.GetSymbolDelegate<LLMAgent_Set_Summary_Delegate>(libraryHandle, "LLMAgent_Set_Summary");
}

// Static functions
Expand All @@ -920,9 +958,13 @@ private void LoadFunctionPointers()
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void LLM_Logging_Stop_Delegate();

[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate bool LLMService_Supports_GPU_Delegate();

public LLM_Debug_Delegate LLM_Debug;
public LLM_Logging_Callback_Delegate LLM_Logging_Callback;
public LLM_Logging_Stop_Delegate LLM_Logging_Stop;
public LLMService_Supports_GPU_Delegate LLMService_Supports_GPU;

public static void Debug(int debugLevel)
{
Expand Down
Loading
Loading