-
Notifications
You must be signed in to change notification settings - Fork 1.3k
.NET Compaction - Introduce preconfigured CompactionStrategy pattern
#4665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
579993d
74e7526
c4c236b
0985a7f
d48ddbe
3f3bb77
1dae527
8e598ed
a3f11f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using System.Diagnostics.CodeAnalysis; | ||
| using Microsoft.Shared.DiagnosticIds; | ||
|
|
||
| namespace Microsoft.Agents.AI.Compaction; | ||
|
|
||
| #pragma warning disable IDE0001 // Simplify Names for namespace in comments | ||
|
|
||
| /// <summary> | ||
| /// Describes the compaction approach used by a pre-configured <see cref="CompactionStrategy"/>. | ||
| /// </summary> | ||
| /// <seealso cref="CompactionStrategy.Create"/> | ||
| [Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] | ||
| public enum CompactionApproach | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the terms used here, e.g. "Gentle", "lightest", or "Aggressive" refer to the costliness (both time and resources) of the techniques used? After all, I wouldn't necessarily consider Truncation a gentle technique. It's rather blunt and aggressive IMO 😄 |
||
| { | ||
| /// <summary> | ||
| /// Applies the lightest available compaction techniques. | ||
| /// Collapses old tool call groups into concise summaries and uses truncation as an emergency backstop. | ||
| /// Does not require a summarization <see cref="Microsoft.Extensions.AI.IChatClient"/>. | ||
| /// </summary> | ||
| Gentle, | ||
|
|
||
| /// <summary> | ||
| /// Balances context preservation with compaction efficiency. | ||
| /// Applies tool result collapsing, LLM-based summarization, and truncation as an emergency backstop. | ||
| /// Requires a summarization <see cref="Microsoft.Extensions.AI.IChatClient"/>. | ||
| /// </summary> | ||
| Balanced, | ||
|
|
||
| /// <summary> | ||
| /// Applies the most aggressive available compaction techniques. | ||
| /// Applies tool result collapsing, LLM-based summarization, turn-based sliding window, and truncation. | ||
| /// Requires a summarization <see cref="Microsoft.Extensions.AI.IChatClient"/>. | ||
| /// </summary> | ||
| Aggressive, | ||
| } | ||
|
|
||
| #pragma warning restore IDE0001 // Simplify Names | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using System.Diagnostics.CodeAnalysis; | ||
| using Microsoft.Shared.DiagnosticIds; | ||
|
|
||
| namespace Microsoft.Agents.AI.Compaction; | ||
|
|
||
| /// <summary> | ||
| /// Describes the context-size profile used by a pre-configured <see cref="CompactionStrategy"/>. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// The size profile controls the token and message thresholds at which compaction triggers. | ||
| /// Choose a size that matches the input token limit of your model: | ||
| /// <see cref="Compact"/> for smaller context windows, <see cref="Moderate"/> for common mid-range models, | ||
| /// and <see cref="Generous"/> for models with large context windows. | ||
| /// </remarks> | ||
| /// <seealso cref="CompactionStrategy.Create"/> | ||
| [Experimental(DiagnosticIds.Experiments.AgentsAIExperiments)] | ||
| public enum CompactionSize | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we explain what Compact, Moderate and Generous means here in actual numbers, so that users can more easily make a decision about which one fits their model? |
||
| { | ||
| /// <summary> | ||
| /// Maintains a smaller context window. | ||
| /// Compaction triggers earlier and keeps less history in context. | ||
| /// </summary> | ||
| Compact, | ||
|
|
||
| /// <summary> | ||
| /// Maintains a medium-sized context window. | ||
| /// This is a reasonable default for most common models. | ||
| /// </summary> | ||
| Moderate, | ||
|
|
||
| /// <summary> | ||
| /// Maintains a large context window. | ||
| /// Compaction triggers later and retains more history in context. | ||
| /// </summary> | ||
| Generous, | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,157 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using System; | ||
| using Microsoft.Extensions.AI; | ||
| using Microsoft.Shared.Diagnostics; | ||
|
|
||
| namespace Microsoft.Agents.AI.Compaction; | ||
|
|
||
| /// <summary> | ||
| /// Base class for strategies that compact a <see cref="CompactionMessageIndex"/> to reduce context size. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// <para> | ||
| /// Compaction strategies operate on <see cref="CompactionMessageIndex"/> instances, which organize messages | ||
| /// into atomic groups that respect the tool-call/result pairing constraint. Strategies mutate the collection | ||
| /// in place by marking groups as excluded, removing groups, or replacing message content (e.g., with summaries). | ||
| /// </para> | ||
| /// <para> | ||
| /// Every strategy requires a <see cref="CompactionTrigger"/> that determines whether compaction should | ||
| /// proceed based on current <see cref="CompactionMessageIndex"/> metrics (token count, message count, turn count, etc.). | ||
| /// The base class evaluates this trigger at the start of <see cref="CompactAsync"/> and skips compaction when | ||
| /// the trigger returns <see langword="false"/>. | ||
| /// </para> | ||
| /// <para> | ||
| /// An optional <b>target</b> condition controls when compaction stops. Strategies incrementally exclude | ||
| /// groups and re-evaluate the target after each exclusion, stopping as soon as the target returns | ||
| /// <see langword="true"/>. When no target is specified, it defaults to the inverse of the trigger — | ||
| /// meaning compaction stops when the trigger condition would no longer fire. | ||
| /// </para> | ||
| /// <para> | ||
| /// Strategies can be applied at three lifecycle points: | ||
| /// <list type="bullet"> | ||
| /// <item><description><b>In-run</b>: During the tool loop, before each LLM call, to keep context within token limits.</description></item> | ||
| /// <item><description><b>Pre-write</b>: Before persisting messages to storage via <see cref="ChatHistoryProvider"/>.</description></item> | ||
| /// <item><description><b>On existing storage</b>: As a maintenance operation to compact stored history.</description></item> | ||
| /// </list> | ||
| /// </para> | ||
| /// <para> | ||
| /// Multiple strategies can be composed by applying them sequentially to the same <see cref="CompactionMessageIndex"/> | ||
| /// via <see cref="PipelineCompactionStrategy"/>. | ||
| /// </para> | ||
| /// </remarks> | ||
| public abstract partial class CompactionStrategy | ||
| { | ||
| /// <summary> | ||
| /// Creates a pre-configured <see cref="CompactionStrategy"/> from a combination of | ||
| /// <paramref name="approach"/> and <paramref name="size"/>. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// <para> | ||
| /// The <paramref name="approach"/> controls which strategies are included in the pipeline: | ||
| /// <list type="bullet"> | ||
| /// <item><description><see cref="CompactionApproach.Gentle"/>: tool result collapsing + truncation backstop. No <paramref name="chatClient"/> required.</description></item> | ||
| /// <item><description><see cref="CompactionApproach.Balanced"/>: tool result collapsing + LLM summarization + truncation backstop.</description></item> | ||
| /// <item><description><see cref="CompactionApproach.Aggressive"/>: tool result collapsing + LLM summarization + sliding window + truncation backstop.</description></item> | ||
| /// </list> | ||
| /// </para> | ||
| /// <para> | ||
| /// The <paramref name="size"/> controls the token and message thresholds at which each stage triggers. | ||
| /// Choose a size that matches the input token limit of your model. | ||
| /// </para> | ||
| /// </remarks> | ||
| /// <param name="approach"> | ||
| /// The compaction approach that controls which strategy or pipeline to use. | ||
| /// <see cref="CompactionApproach.Gentle"/> does not require a <paramref name="chatClient"/>; | ||
| /// <see cref="CompactionApproach.Balanced"/> and <see cref="CompactionApproach.Aggressive"/> require one. | ||
| /// </param> | ||
| /// <param name="size"> | ||
| /// The context-size profile that controls token and message thresholds. | ||
| /// </param> | ||
| /// <param name="chatClient"> | ||
| /// The <see cref="IChatClient"/> used for LLM-based summarization. | ||
| /// Required when <paramref name="approach"/> is <see cref="CompactionApproach.Balanced"/> or | ||
| /// <see cref="CompactionApproach.Aggressive"/>; ignored for <see cref="CompactionApproach.Gentle"/>. | ||
| /// </param> | ||
| /// <returns>A <see cref="CompactionStrategy"/> configured for the specified approach and size.</returns> | ||
| /// <exception cref="ArgumentNullException"> | ||
| /// <paramref name="chatClient"/> is <see langword="null"/> and <paramref name="approach"/> requires one. | ||
| /// </exception> | ||
| /// <exception cref="ArgumentOutOfRangeException"> | ||
| /// <paramref name="approach"/> or <paramref name="size"/> is not a defined enum value. | ||
| /// </exception> | ||
| public static CompactionStrategy Create(CompactionApproach approach, CompactionSize size, IChatClient? chatClient = null) | ||
| { | ||
| if (approach is CompactionApproach.Balanced or CompactionApproach.Aggressive) | ||
| { | ||
| _ = Throw.IfNull(chatClient); | ||
| } | ||
|
|
||
| int tokenLimit = GetTokenLimit(size); | ||
| int messageLimit = GetMessageLimit(size); | ||
|
|
||
| return approach switch | ||
| { | ||
| CompactionApproach.Gentle => CreateGentlePipeline(tokenLimit, messageLimit), | ||
| CompactionApproach.Balanced => CreateBalancedPipeline(tokenLimit, messageLimit, chatClient!), | ||
| CompactionApproach.Aggressive => CreateAggressivePipeline(tokenLimit, messageLimit, GetTurnLimit(size), chatClient!), | ||
| _ => throw new ArgumentOutOfRangeException(nameof(approach), approach, null), | ||
| }; | ||
| } | ||
|
|
||
| private static int GetTokenLimit(CompactionSize size) => size switch | ||
| { | ||
| CompactionSize.Compact => 0x1FFF, // 8k | ||
| CompactionSize.Moderate => 0x7FFF, // 32k | ||
| CompactionSize.Generous => 0xFFFF, // 64k | ||
| _ => throw new ArgumentOutOfRangeException(nameof(size), size, null), | ||
crickman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| private static int GetMessageLimit(CompactionSize size) => size switch | ||
| { | ||
| CompactionSize.Compact => 50, | ||
| CompactionSize.Moderate => 500, | ||
| CompactionSize.Generous => 1000, | ||
| _ => throw new ArgumentOutOfRangeException(nameof(size), size, null), | ||
| }; | ||
|
|
||
| private static int GetTurnLimit(CompactionSize size) => size switch | ||
| { | ||
| CompactionSize.Compact => 25, | ||
| CompactionSize.Moderate => 250, | ||
| CompactionSize.Generous => 500, | ||
| _ => throw new ArgumentOutOfRangeException(nameof(size), size, null), | ||
| }; | ||
|
|
||
| private static PipelineCompactionStrategy CreateGentlePipeline(int tokenLimit, int messageLimit) => | ||
| new( | ||
| new ToolResultCompactionStrategy(CompactionTriggers.MessagesExceed(messageLimit)), | ||
| new TruncationCompactionStrategy(CompactionTriggers.TokensExceed(tokenLimit))); | ||
|
|
||
| private static PipelineCompactionStrategy CreateBalancedPipeline(int tokenLimit, int messageLimit, IChatClient chatClient) | ||
| { | ||
| // Early stages trigger at two-thirds of the limit so the pipeline has room to compact | ||
| // incrementally before reaching the emergency truncation backstop at the full limit. | ||
| int earlyMessageTrigger = messageLimit * 2 / 3; | ||
| int earlyTokenTrigger = tokenLimit * 2 / 3; | ||
|
|
||
| return new( | ||
| new ToolResultCompactionStrategy(CompactionTriggers.MessagesExceed(earlyMessageTrigger)), | ||
| new SummarizationCompactionStrategy(chatClient, CompactionTriggers.TokensExceed(earlyTokenTrigger)), | ||
| new TruncationCompactionStrategy(CompactionTriggers.TokensExceed(tokenLimit))); | ||
| } | ||
|
|
||
| private static PipelineCompactionStrategy CreateAggressivePipeline(int tokenLimit, int messageLimit, int turnLimit, IChatClient chatClient) | ||
| { | ||
| // Early stages trigger at half the limit so compaction kicks in sooner and | ||
| // the sliding window and truncation backstop are reached less often. | ||
| int earlyMessageTrigger = messageLimit / 2; | ||
| int earlyTokenTrigger = tokenLimit / 2; | ||
|
|
||
| return new( | ||
| new ToolResultCompactionStrategy(CompactionTriggers.MessagesExceed(earlyMessageTrigger)), | ||
| new SummarizationCompactionStrategy(chatClient, CompactionTriggers.TokensExceed(earlyTokenTrigger)), | ||
| new SlidingWindowCompactionStrategy(CompactionTriggers.TurnsExceed(turnLimit)), | ||
| new TruncationCompactionStrategy(CompactionTriggers.TokensExceed(tokenLimit))); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.