From fe73a703f3d60dbe114d477e7557156fe9f90d45 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:50:49 -0700 Subject: [PATCH 1/9] Diagnostics: Adds DiagnosticsVerbosity Summary mode for compacted diagnostics output Implements v1 of the diagnostics compaction feature per the spec at users/nalutripician/diagnostics-compaction-spec. New public API: - DiagnosticsVerbosity enum (Detailed=0, Summary=1) - CosmosClientOptions.DiagnosticsVerbosity property (default: Detailed) - CosmosClientOptions.MaxDiagnosticsSummarySizeBytes property (default: 8KB, min: 4KB) - CosmosDiagnostics.ToString(DiagnosticsVerbosity) abstract overload Summary mode groups requests by region, keeps first/last in full detail, and aggregates middle entries by (StatusCode, SubStatusCode) with count, total RU, min/max/P50/avg latency statistics. Size enforcement truncates output if it exceeds MaxDiagnosticsSummarySizeBytes. The parameterless ToString() always returns Detailed output for backward compatibility. In-memory ITrace tree is unchanged -- compaction only happens at serialization time. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/EncryptionCosmosDiagnostics.cs | 37 ++ .../src/CosmosClientOptions.cs | 62 +- .../src/Diagnostics/CosmosDiagnostics.cs | 12 +- .../src/Diagnostics/CosmosTraceDiagnostics.cs | 25 + .../Diagnostics/DiagnosticsSummaryWriter.cs | 460 +++++++++++++++ .../src/Diagnostics/DiagnosticsVerbosity.cs | 26 + .../Contracts/DotNetSDKAPI.net6.json | 60 ++ .../DiagnosticsSummaryWriterTests.cs | 528 ++++++++++++++++++ 8 files changed, 1205 insertions(+), 5 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs create mode 100644 Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs index bc0f536c6d..ef0d35fde5 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs @@ -88,6 +88,43 @@ public override string ToString() } #if SDKPROJECTREF + public override string ToString(DiagnosticsVerbosity verbosity) + { + if (verbosity == DiagnosticsVerbosity.Summary) + { + StringBuilder stringBuilder = new StringBuilder(); + StringWriter stringWriter = new StringWriter(stringBuilder); + + using (JsonWriter writer = new JsonTextWriter(stringWriter)) + { + writer.WriteStartObject(); + writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); + writer.WriteRawValue(this.coreDiagnostics.ToString(verbosity)); + writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); + writer.WriteStartObject(); + + if (this.encryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); + writer.WriteRawValue(this.encryptContent.ToString()); + } + + if (this.decryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); + writer.WriteRawValue(this.decryptContent.ToString()); + } + + writer.WriteEndObject(); + writer.WriteEndObject(); + } + + return stringWriter.ToString(); + } + + return this.ToString(); + } + public override DateTime? GetStartTimeUtc() { return this.coreDiagnostics.GetStartTimeUtc(); diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 80a9478dab..7b377ccd96 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -54,7 +54,12 @@ public class CosmosClientOptions private const string ConnectionStringAccountKey = "AccountKey"; private const string ConnectionStringDisableServerCertificateValidation = "DisableServerCertificateValidation"; - private const ApiType DefaultApiType = ApiType.None; + private const ApiType DefaultApiType = ApiType.None; + + /// + /// Default maximum size in bytes for Summary mode diagnostic output. + /// + internal const int DefaultMaxDiagnosticsSummarySizeBytes = 8192; /// /// Default request timeout @@ -74,8 +79,9 @@ public class CosmosClientOptions private IWebProxy webProxy; private Func httpClientFactory; private string applicationName; - private IFaultInjector faultInjector; - private bool isCustomSerializerProvided; + private IFaultInjector faultInjector; + private bool isCustomSerializerProvided; + private int maxDiagnosticsSummarySizeBytes = DefaultMaxDiagnosticsSummarySizeBytes; /// /// Creates a new CosmosClientOptions @@ -384,7 +390,55 @@ public ConnectionMode ConnectionMode /// If is set to true in CosmosClientOptions, priority level set on the CosmosClient is used. /// /// - public PriorityLevel? PriorityLevel { get; set; } + public PriorityLevel? PriorityLevel { get; set; } + + /// + /// Gets or sets the default verbosity for serialization. + /// Default: . + /// + /// + /// + /// This setting controls the level of detail when calling + /// . + /// + /// + /// When set to , + /// the diagnostics output is compacted by grouping requests by region and deduplicating + /// retries with aggregate statistics (count, total RU, min/max/P50/avg latency). + /// + /// + /// The parameterless always returns + /// output for + /// backward compatibility. + /// + /// + public DiagnosticsVerbosity DiagnosticsVerbosity { get; set; } = DiagnosticsVerbosity.Detailed; + + /// + /// Gets or sets the maximum size in bytes for Summary mode diagnostic output. + /// If the summary output exceeds this limit, a truncated indicator is returned. + /// Default: 8192 (8 KB). Minimum: 4096 (4 KB). + /// + /// + /// This property is only relevant when is set to + /// . + /// + public int MaxDiagnosticsSummarySizeBytes + { + get => this.maxDiagnosticsSummarySizeBytes; + set + { + if (value < 4096) + { + throw new ArgumentOutOfRangeException( + nameof(this.MaxDiagnosticsSummarySizeBytes), + value, + $"{nameof(this.MaxDiagnosticsSummarySizeBytes)} must be at least 4096 bytes."); + } + + this.maxDiagnosticsSummarySizeBytes = value; + } + } /// /// Gets or sets the maximum number of retries in the case where the request fails diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs index b88eaa8efe..318103a59b 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs @@ -61,7 +61,7 @@ public virtual ServerSideCumulativeMetrics GetQueryMetrics() /// /// The string field instance in the Azure Cosmos DB database service. /// - /// implements lazy materialization and is only materialized when is called. + /// implements lazy materialization and is only materialized when is called. /// /// /// Do not eagerly materialize the diagnostics until the moment of consumption to avoid unnecessary allocations, let the ToString be called only when needed. @@ -89,6 +89,16 @@ public virtual ServerSideCumulativeMetrics GetQueryMetrics() /// public abstract override string ToString(); + /// + /// Returns the string representation of diagnostics using the specified verbosity. + /// When is , + /// produces a compacted region-grouped summary. When , + /// produces the full trace output (same as parameterless ). + /// + /// The verbosity level to use for serialization. + /// A JSON string with diagnostics at the requested verbosity level. + public abstract string ToString(DiagnosticsVerbosity verbosity); + /// /// Gets the list of all regions that were contacted for a request /// diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs index 5eae8492ea..c8fdeab3c1 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs @@ -17,8 +17,14 @@ namespace Microsoft.Azure.Cosmos.Diagnostics internal sealed class CosmosTraceDiagnostics : CosmosDiagnostics { private readonly Lazy accumulatedMetrics; + private readonly Lazy cachedSummaryJson; public CosmosTraceDiagnostics(ITrace trace) + : this(trace, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes) + { + } + + internal CosmosTraceDiagnostics(ITrace trace, int maxDiagnosticsSummarySizeBytes) { if (trace == null) { @@ -34,6 +40,15 @@ public CosmosTraceDiagnostics(ITrace trace) this.Value = rootTrace; this.accumulatedMetrics = new Lazy(() => PopulateServerSideCumulativeMetrics(this.Value)); + this.cachedSummaryJson = new Lazy(() => + { + if (this.Value is Tracing.Trace rootConcreteTrace2) + { + rootConcreteTrace2.SetWalkingStateRecursively(); + } + + return DiagnosticsSummaryWriter.WriteSummary(this.Value, maxDiagnosticsSummarySizeBytes); + }); } public ITrace Value { get; } @@ -48,6 +63,16 @@ public override string ToString() return this.ToJsonString(); } + public override string ToString(DiagnosticsVerbosity verbosity) + { + if (verbosity == DiagnosticsVerbosity.Summary) + { + return this.cachedSummaryJson.Value; + } + + return this.ToString(); + } + public override TimeSpan GetClientElapsedTime() { return this.Value.Duration; diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs new file mode 100644 index 0000000000..8b96bfc7c7 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs @@ -0,0 +1,460 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Diagnostics +{ + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Linq; + using System.Net; + using System.Text; + using Microsoft.Azure.Cosmos.Json; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + + /// + /// Produces compacted summary JSON from an ITrace tree. + /// Groups requests by region, keeps first/last in full detail, + /// and aggregates middle entries by (StatusCode, SubStatusCode). + /// + internal static class DiagnosticsSummaryWriter + { + private const string UnknownRegion = "Unknown"; + + /// + /// Produces the summary JSON string for the given trace. + /// If the output exceeds maxSizeBytes, returns a truncated indicator. + /// + public static string WriteSummary( + ITrace trace, + int maxSizeBytes) + { + if (trace == null) + { + throw new ArgumentNullException(nameof(trace)); + } + + List entries = CollectRequestEntries(trace); + + string summaryJson = BuildSummaryJson(trace, entries); + + if (Encoding.UTF8.GetByteCount(summaryJson) <= maxSizeBytes) + { + return summaryJson; + } + + return BuildTruncatedJson(trace, entries.Count); + } + + private static List CollectRequestEntries(ITrace trace) + { + List entries = new List(); + CollectRequestEntriesRecursive(trace, entries); + return entries; + } + + private static void CollectRequestEntriesRecursive(ITrace currentTrace, List entries) + { + foreach (object datum in currentTrace.Data.Values) + { + if (datum is ClientSideRequestStatisticsTraceDatum clientSideStats) + { + foreach (ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics storeStat + in clientSideStats.StoreResponseStatisticsList) + { + if (storeStat.IsSupplementalResponse) + { + continue; + } + + entries.Add(new RequestEntry( + region: storeStat.Region ?? UnknownRegion, + statusCode: (int)storeStat.StoreResult.StatusCode, + subStatusCode: (int)storeStat.StoreResult.SubStatusCode, + requestCharge: storeStat.StoreResult.RequestCharge, + durationMs: storeStat.RequestLatency.TotalMilliseconds, + requestStartTimeUtc: storeStat.RequestStartTime, + endpoint: storeStat.LocationEndpoint?.ToString(), + operationType: storeStat.RequestOperationType.ToString(), + resourceType: storeStat.RequestResourceType.ToString())); + } + + foreach (ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStat + in clientSideStats.HttpResponseStatisticsList) + { + int statusCode = 0; + int subStatusCode = 0; + double requestCharge = 0; + + if (httpStat.HttpResponseMessage != null) + { + statusCode = (int)httpStat.HttpResponseMessage.StatusCode; + subStatusCode = GetHttpSubStatusCode(httpStat); + + if (httpStat.HttpResponseMessage.Headers.TryGetValues( + HttpConstants.HttpHeaders.RequestCharge, + out IEnumerable chargeValues)) + { + string chargeStr = chargeValues.FirstOrDefault(); + if (chargeStr != null) + { + double.TryParse(chargeStr, NumberStyles.Float, CultureInfo.InvariantCulture, out requestCharge); + } + } + } + + entries.Add(new RequestEntry( + region: httpStat.Region ?? UnknownRegion, + statusCode: statusCode, + subStatusCode: subStatusCode, + requestCharge: requestCharge, + durationMs: httpStat.Duration.TotalMilliseconds, + requestStartTimeUtc: httpStat.RequestStartTime, + endpoint: httpStat.RequestUri?.Host, + operationType: httpStat.HttpMethod?.ToString(), + resourceType: httpStat.ResourceType.ToString())); + } + } + } + + foreach (ITrace childTrace in currentTrace.Children) + { + CollectRequestEntriesRecursive(childTrace, entries); + } + } + + private static int GetHttpSubStatusCode( + ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStat) + { + if (httpStat.HttpResponseMessage?.Headers != null + && httpStat.HttpResponseMessage.Headers.TryGetValues( + WFConstants.BackendHeaders.SubStatus, + out IEnumerable values)) + { + string first = values.FirstOrDefault(); + if (first != null + && int.TryParse(first, NumberStyles.Integer, CultureInfo.InvariantCulture, out int sub)) + { + return sub; + } + } + + return 0; + } + + private static string BuildSummaryJson(ITrace trace, List entries) + { + IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); + writer.WriteObjectStart(); + writer.WriteFieldName("Summary"); + writer.WriteObjectStart(); + + writer.WriteFieldName("DiagnosticsVerbosity"); + writer.WriteStringValue("Summary"); + + writer.WriteFieldName("TotalDurationMs"); + writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + + double totalRequestCharge = 0; + foreach (RequestEntry e in entries) + { + totalRequestCharge += e.RequestCharge; + } + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalRequestCharge); + + writer.WriteFieldName("TotalRequestCount"); + writer.WriteNumberValue(entries.Count); + + // Group by region, preserving chronological order within each group + Dictionary> regionGroups = new Dictionary>(); + List regionOrder = new List(); + + foreach (RequestEntry entry in entries.OrderBy(e => e.RequestStartTimeUtc ?? DateTime.MinValue)) + { + if (!regionGroups.TryGetValue(entry.Region, out List group)) + { + group = new List(); + regionGroups[entry.Region] = group; + regionOrder.Add(entry.Region); + } + + group.Add(entry); + } + + writer.WriteFieldName("RegionsSummary"); + writer.WriteArrayStart(); + + foreach (string region in regionOrder) + { + List regionEntries = regionGroups[region]; + WriteRegionSummary(writer, region, regionEntries); + } + + writer.WriteArrayEnd(); + + writer.WriteObjectEnd(); // Summary + writer.WriteObjectEnd(); // root + + return Encoding.UTF8.GetString(writer.GetResult().Span); + } + + private static void WriteRegionSummary( + IJsonWriter writer, + string region, + List entries) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("Region"); + writer.WriteStringValue(region); + + double regionRequestCharge = 0; + foreach (RequestEntry e in entries) + { + regionRequestCharge += e.RequestCharge; + } + + writer.WriteFieldName("RequestCount"); + writer.WriteNumberValue(entries.Count); + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(regionRequestCharge); + + // First entry (always present) + writer.WriteFieldName("First"); + WriteRequestEntryDetail(writer, entries[0]); + + // Last entry (only if more than 1) + if (entries.Count > 1) + { + writer.WriteFieldName("Last"); + WriteRequestEntryDetail(writer, entries[entries.Count - 1]); + } + + // Aggregated groups for middle entries (all except first and last) + if (entries.Count > 2) + { + List middleEntries = entries.GetRange(1, entries.Count - 2); + + // Group by (StatusCode, SubStatusCode) + Dictionary<(int, int), List> statusGroups = + new Dictionary<(int, int), List>(); + + foreach (RequestEntry entry in middleEntries) + { + (int, int) key = (entry.StatusCode, entry.SubStatusCode); + if (!statusGroups.TryGetValue(key, out List group)) + { + group = new List(); + statusGroups[key] = group; + } + + group.Add(entry); + } + + writer.WriteFieldName("AggregatedGroups"); + writer.WriteArrayStart(); + + foreach (KeyValuePair<(int, int), List> kvp in statusGroups) + { + WriteAggregatedGroup(writer, kvp.Key.Item1, kvp.Key.Item2, kvp.Value); + } + + writer.WriteArrayEnd(); + } + + writer.WriteObjectEnd(); + } + + private static void WriteRequestEntryDetail(IJsonWriter writer, RequestEntry entry) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("StatusCode"); + writer.WriteNumberValue(entry.StatusCode); + + writer.WriteFieldName("SubStatusCode"); + writer.WriteNumberValue(entry.SubStatusCode); + + writer.WriteFieldName("RequestCharge"); + writer.WriteNumberValue(entry.RequestCharge); + + writer.WriteFieldName("DurationMs"); + writer.WriteNumberValue(entry.DurationMs); + + writer.WriteFieldName("Region"); + writer.WriteStringValue(entry.Region); + + if (entry.Endpoint != null) + { + writer.WriteFieldName("Endpoint"); + writer.WriteStringValue(entry.Endpoint); + } + + if (entry.RequestStartTimeUtc.HasValue) + { + writer.WriteFieldName("RequestStartTimeUtc"); + writer.WriteStringValue(entry.RequestStartTimeUtc.Value.ToString("o", CultureInfo.InvariantCulture)); + } + + if (entry.OperationType != null) + { + writer.WriteFieldName("OperationType"); + writer.WriteStringValue(entry.OperationType); + } + + if (entry.ResourceType != null) + { + writer.WriteFieldName("ResourceType"); + writer.WriteStringValue(entry.ResourceType); + } + + writer.WriteObjectEnd(); + } + + private static void WriteAggregatedGroup( + IJsonWriter writer, + int statusCode, + int subStatusCode, + List entries) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("StatusCode"); + writer.WriteNumberValue(statusCode); + + writer.WriteFieldName("SubStatusCode"); + writer.WriteNumberValue(subStatusCode); + + writer.WriteFieldName("Count"); + writer.WriteNumberValue(entries.Count); + + double totalCharge = 0; + foreach (RequestEntry e in entries) + { + totalCharge += e.RequestCharge; + } + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalCharge); + + // Sort durations for percentile computation + List durations = new List(entries.Count); + foreach (RequestEntry e in entries) + { + durations.Add(e.DurationMs); + } + + durations.Sort(); + + writer.WriteFieldName("MinDurationMs"); + writer.WriteNumberValue(durations[0]); + + writer.WriteFieldName("MaxDurationMs"); + writer.WriteNumberValue(durations[durations.Count - 1]); + + writer.WriteFieldName("P50DurationMs"); + writer.WriteNumberValue(ComputeP50(durations)); + + double avgDuration = 0; + foreach (double d in durations) + { + avgDuration += d; + } + + avgDuration /= durations.Count; + + writer.WriteFieldName("AvgDurationMs"); + writer.WriteNumberValue(Math.Round(avgDuration, 1)); + + writer.WriteObjectEnd(); + } + + private static double ComputeP50(List sortedValues) + { + int count = sortedValues.Count; + if (count == 1) + { + return sortedValues[0]; + } + + // For odd count, take the middle element. + // For even count, take the lower of the two middle elements + // (matching the Rust SDK's floor-based approach). + int midIndex = (count - 1) / 2; + return sortedValues[midIndex]; + } + + private static string BuildTruncatedJson(ITrace trace, int totalRequestCount) + { + IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); + writer.WriteObjectStart(); + writer.WriteFieldName("Summary"); + writer.WriteObjectStart(); + + writer.WriteFieldName("DiagnosticsVerbosity"); + writer.WriteStringValue("Summary"); + + writer.WriteFieldName("TotalDurationMs"); + writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + + writer.WriteFieldName("TotalRequestCount"); + writer.WriteNumberValue(totalRequestCount); + + writer.WriteFieldName("Truncated"); + writer.WriteBoolValue(true); + + writer.WriteFieldName("Message"); + writer.WriteStringValue( + "Summary output truncated to fit size limit. Set DiagnosticsVerbosity to Detailed for full diagnostics."); + + writer.WriteObjectEnd(); // Summary + writer.WriteObjectEnd(); // root + + return Encoding.UTF8.GetString(writer.GetResult().Span); + } + + /// + /// Internal representation of a single request entry collected from the trace tree. + /// + private readonly struct RequestEntry + { + public RequestEntry( + string region, + int statusCode, + int subStatusCode, + double requestCharge, + double durationMs, + DateTime? requestStartTimeUtc, + string endpoint, + string operationType, + string resourceType) + { + this.Region = region; + this.StatusCode = statusCode; + this.SubStatusCode = subStatusCode; + this.RequestCharge = requestCharge; + this.DurationMs = durationMs; + this.RequestStartTimeUtc = requestStartTimeUtc; + this.Endpoint = endpoint; + this.OperationType = operationType; + this.ResourceType = resourceType; + } + + public string Region { get; } + public int StatusCode { get; } + public int SubStatusCode { get; } + public double RequestCharge { get; } + public double DurationMs { get; } + public DateTime? RequestStartTimeUtc { get; } + public string Endpoint { get; } + public string OperationType { get; } + public string ResourceType { get; } + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs new file mode 100644 index 0000000000..5ea8138b1f --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs @@ -0,0 +1,26 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + /// + /// Controls the level of detail in serialized output. + /// + public enum DiagnosticsVerbosity + { + /// + /// Full diagnostic output with all individual request traces. + /// This is the default and preserves backward compatibility. + /// + Detailed = 0, + + /// + /// Compacted diagnostic output optimized for log size constraints. + /// Groups requests by region. Keeps first and last request in full detail. + /// Deduplicates middle requests by (StatusCode, SubStatusCode) with + /// aggregate statistics (count, total RU, min/max/P50 latency). + /// + Summary = 1, + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json index 63865a40ad..0a6154671d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json @@ -2866,6 +2866,16 @@ "Attributes": [], "MethodInfo": "Int32 get_GatewayModeMaxConnectionLimit();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Int32 get_MaxDiagnosticsSummarySizeBytes()": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Int32 get_MaxDiagnosticsSummarySizeBytes();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Int32 MaxDiagnosticsSummarySizeBytes": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Int32 MaxDiagnosticsSummarySizeBytes;CanRead:True;CanWrite:True;Int32 get_MaxDiagnosticsSummarySizeBytes();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_MaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.AvailabilityStrategy AvailabilityStrategy": { "Type": "Property", "Attributes": [], @@ -2922,6 +2932,18 @@ ], "MethodInfo": "Microsoft.Azure.Cosmos.CosmosSerializer Serializer;CanRead:True;CanWrite:True;Microsoft.Azure.Cosmos.CosmosSerializer get_Serializer();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_Serializer(Microsoft.Azure.Cosmos.CosmosSerializer);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity DiagnosticsVerbosity": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity DiagnosticsVerbosity;CanRead:True;CanWrite:True;Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.FaultInjection.IFaultInjector FaultInjector": { "Type": "Property", "Attributes": [], @@ -3236,6 +3258,13 @@ ], "MethodInfo": "Void set_CosmosClientTelemetryOptions(Microsoft.Azure.Cosmos.CosmosClientTelemetryOptions);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Void set_EnableContentResponseOnWrite(System.Nullable`1[System.Boolean])[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { "Type": "Method", "Attributes": [ @@ -3277,6 +3306,11 @@ ], "MethodInfo": "Void set_LimitToEndpoint(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Void set_MaxDiagnosticsSummarySizeBytes(Int32)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Void set_MaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Void set_MaxRequestsPerTcpConnection(System.Nullable`1[System.Int32])": { "Type": "Method", "Attributes": [], @@ -3477,6 +3511,11 @@ "Attributes": [], "MethodInfo": "System.String ToString();IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "System.String ToString(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "System.String ToString(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "System.TimeSpan GetClientElapsedTime()": { "Type": "Method", "Attributes": [], @@ -4431,6 +4470,27 @@ }, "NestedTypes": {} }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity;System.Enum;IsAbstract:False;IsSealed:True;IsInterface:False;IsEnum:True;IsClass:False;IsValueType:True;IsNested:False;IsGenericType:False;IsSerializable:True": { + "Subclasses": {}, + "Members": { + "Int32 value__": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Int32 value__;IsInitOnly:False;IsStatic:False;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Detailed": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Detailed;IsInitOnly:False;IsStatic:True;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Summary": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Summary;IsInitOnly:False;IsStatic:True;" + } + }, + "NestedTypes": {} + }, "Microsoft.Azure.Cosmos.DistanceFunction;System.Enum;IsAbstract:False;IsSealed:True;IsInterface:False;IsEnum:True;IsClass:False;IsValueType:True;IsNested:False;IsGenericType:False;IsSerializable:True": { "Subclasses": {}, "Members": { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs new file mode 100644 index 0000000000..097d1916ff --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs @@ -0,0 +1,528 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Tracing +{ + using System; + using System.Collections.Generic; + using System.Reflection; + using System.Linq; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + using static Microsoft.Azure.Cosmos.Tracing.TraceData.ClientSideRequestStatisticsTraceDatum; + + [TestClass] + public class DiagnosticsSummaryWriterTests + { + [TestMethod] + public void DiagnosticsVerbosity_DefaultIsDetailed() + { + Assert.AreEqual(0, (int)DiagnosticsVerbosity.Detailed); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, default(DiagnosticsVerbosity)); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_DefaultValue() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_DefaultValue() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxSummarySizeBytes_Validation_TooSmall() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 2048 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_Validation_MinAllowed() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 4096 + }; + Assert.AreEqual(4096, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void ToString_Parameterless_AlwaysDetailed() + { + // Parameterless ToString() must always return detailed output + // regardless of any options setting. We verify structural equivalence + // (same keys in JSON) rather than exact string match since duration changes. + using ITrace trace = Trace.GetRootTrace("TestOperation"); + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string detailed = diagnostics.ToString(); + string explicitDetailed = diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + JObject parsedDefault = JObject.Parse(detailed); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + // Same structure: both have name, Summary, start datetime + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + Assert.AreEqual(parsedDefault["start datetime"].ToString(), parsedExplicit["start datetime"].ToString()); + } + + [TestMethod] + public void ToString_Summary_ProducesValidJson() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string summary = diagnostics.ToString(DiagnosticsVerbosity.Summary); + + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"], "Summary object should exist"); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + + [TestMethod] + public void Summary_SingleRegion_SingleRequest() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(1, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"]); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + + JObject region = (JObject)regions[0]; + Assert.AreEqual("West US 2", region["Region"].ToString()); + Assert.AreEqual(1, region["RequestCount"].Value()); + Assert.IsNotNull(region["First"]); + Assert.IsNull(region["Last"], "Last should be omitted when only 1 request"); + Assert.IsNull(region["AggregatedGroups"], "No aggregated groups for single request"); + } + + [TestMethod] + public void Summary_SingleRegion_TwoRequests() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + AddStoreResponseStatistic(trace, "East US", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "East US", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddSeconds(1)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(2, summaryObj["TotalRequestCount"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + JObject region = (JObject)regions[0]; + Assert.AreEqual(2, region["RequestCount"].Value()); + Assert.IsNotNull(region["First"]); + Assert.IsNotNull(region["Last"]); + Assert.AreEqual((int)StatusCodes.TooManyRequests, region["First"]["StatusCode"].Value()); + Assert.AreEqual((int)StatusCodes.Ok, region["Last"]["StatusCode"].Value()); + Assert.IsNull(region["AggregatedGroups"], "No middle entries for exactly 2 requests"); + } + + [TestMethod] + public void Summary_SingleRegion_ManyRetries_429() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // First request: 429 + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + + // 48 middle retries: all 429 + for (int i = 1; i <= 48; i++) + { + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 3 + i, baseTime.AddMilliseconds(i * 100)); + } + + // Last request: 200 OK + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddSeconds(5)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(50, summaryObj["TotalRequestCount"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + + JObject region = (JObject)regions[0]; + Assert.AreEqual(50, region["RequestCount"].Value()); + Assert.AreEqual((int)StatusCodes.TooManyRequests, region["First"]["StatusCode"].Value()); + Assert.AreEqual((int)StatusCodes.Ok, region["Last"]["StatusCode"].Value()); + + JArray groups = (JArray)region["AggregatedGroups"]; + Assert.AreEqual(1, groups.Count, "All middle entries are 429 so 1 aggregated group"); + + JObject group = (JObject)groups[0]; + Assert.AreEqual((int)StatusCodes.TooManyRequests, group["StatusCode"].Value()); + Assert.AreEqual(48, group["Count"].Value()); + Assert.IsTrue(group["MinDurationMs"].Value() > 0); + Assert.IsTrue(group["MaxDurationMs"].Value() >= group["MinDurationMs"].Value()); + Assert.IsTrue(group["P50DurationMs"].Value() > 0); + Assert.IsTrue(group["AvgDurationMs"].Value() > 0); + } + + [TestMethod] + public void Summary_MultiRegion_Failover() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // West US 2: 3 requests + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 15, baseTime.AddMilliseconds(200)); + + // East US 2: 2 requests + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 8, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(5, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(2, ((JArray)summaryObj["RegionsSummary"]).Count); + + JObject westRegion = (JObject)summaryObj["RegionsSummary"][0]; + Assert.AreEqual("West US 2", westRegion["Region"].ToString()); + Assert.AreEqual(3, westRegion["RequestCount"].Value()); + + JObject eastRegion = (JObject)summaryObj["RegionsSummary"][1]; + Assert.AreEqual("East US 2", eastRegion["Region"].ToString()); + Assert.AreEqual(2, eastRegion["RequestCount"].Value()); + } + + [TestMethod] + public void Summary_MixedStatusCodes() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 25, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + JObject region = (JObject)regions[0]; + JArray groups = (JArray)region["AggregatedGroups"]; + + Assert.AreEqual(2, groups.Count, "Two distinct status codes in middle entries"); + } + + [TestMethod] + public void Summary_P50_OddCount() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 5 requests total: first, 3 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 30, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Middle entries have durations: 10, 20, 30 (sorted) + // P50 of 3 items = index (3-1)/2 = 1 → 20 + Assert.AreEqual(10, group["MinDurationMs"].Value()); + Assert.AreEqual(30, group["MaxDurationMs"].Value()); + Assert.AreEqual(20, group["P50DurationMs"].Value()); + } + + [TestMethod] + public void Summary_P50_EvenCount() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 6 requests total: first, 4 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 30, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 40, baseTime.AddMilliseconds(400)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(500)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Middle entries have durations: 10, 20, 30, 40 (sorted) + // P50 of 4 items = index (4-1)/2 = 1 → 20 + Assert.AreEqual(10, group["MinDurationMs"].Value()); + Assert.AreEqual(40, group["MaxDurationMs"].Value()); + Assert.AreEqual(20, group["P50DurationMs"].Value()); + } + + [TestMethod] + public void Summary_P50_SingleItem() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 3 requests total: first, 1 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 42, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Single middle entry with duration 42 + Assert.AreEqual(1, group["Count"].Value()); + Assert.AreEqual(42, group["P50DurationMs"].Value()); + Assert.AreEqual(42, group["MinDurationMs"].Value()); + Assert.AreEqual(42, group["MaxDurationMs"].Value()); + } + + [TestMethod] + public void Summary_SizeEnforcement_UnderLimit() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + int byteCount = System.Text.Encoding.UTF8.GetByteCount(summary); + + Assert.IsTrue(byteCount <= 8192, $"Summary size {byteCount} should be under limit"); + Assert.IsFalse(summary.Contains("Truncated"), "Should not be truncated"); + } + + [TestMethod] + public void Summary_SizeEnforcement_OverLimit_Truncated() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Generate many requests across many regions to exceed a tiny limit + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic( + trace, + $"Region {r}", + StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, + 0.0, + 5 + i, + baseTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + // Use a very small limit to force truncation + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.IsTrue(summaryObj["Truncated"].Value(), "Should be truncated"); + Assert.IsNotNull(summaryObj["Message"]); + Assert.AreEqual(200, summaryObj["TotalRequestCount"].Value()); + } + + [TestMethod] + public void Summary_EmptyTrace() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(0, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(0, summaryObj["TotalRequestCharge"].Value()); + Assert.AreEqual(0, ((JArray)summaryObj["RegionsSummary"]).Count); + } + + [TestMethod] + public void Detailed_Mode_Unchanged() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string parameterless = diagnostics.ToString(); + string explicitDetailed = diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + // Verify structural equivalence (duration may differ slightly due to timing) + JObject parsedDefault = JObject.Parse(parameterless); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + Assert.AreEqual(parsedDefault["start datetime"].ToString(), parsedExplicit["start datetime"].ToString()); + + // Detailed output should contain the full trace tree structure + Assert.IsNotNull(parsedDefault["name"]); + Assert.IsNotNull(parsedDefault["Summary"]); + } + + [TestMethod] + public void Summary_RegionOrdering_Deterministic() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Add in chronological order: West first, then East + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime); + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime.AddMilliseconds(100)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + // Regions should appear in the order they were first encountered chronologically + Assert.AreEqual("West US 2", regions[0]["Region"].ToString()); + Assert.AreEqual("East US 2", regions[1]["Region"].ToString()); + } + + [TestMethod] + public void Summary_NullRegion_GroupedAsUnknown() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, null, StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + Assert.AreEqual("Unknown", regions[0]["Region"].ToString()); + } + + [TestMethod] + public void Summary_RequestEntryDetail_HasAllFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime requestTime = new DateTime(2026, 3, 17, 12, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, requestTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject first = (JObject)parsed["Summary"]["RegionsSummary"][0]["First"]; + + Assert.AreEqual((int)StatusCodes.Ok, first["StatusCode"].Value()); + Assert.IsNotNull(first["SubStatusCode"]); + Assert.IsNotNull(first["RequestCharge"]); + Assert.IsNotNull(first["DurationMs"]); + Assert.AreEqual("West US 2", first["Region"].ToString()); + Assert.IsNotNull(first["Endpoint"]); + Assert.IsNotNull(first["RequestStartTimeUtc"]); + Assert.IsNotNull(first["OperationType"]); + Assert.IsNotNull(first["ResourceType"]); + } + + [TestMethod] + public void CosmosTraceDiagnostics_SummaryCaching() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + + // Call ToString(Summary) multiple times — should return same cached instance + string summary1 = diagnostics.ToString(DiagnosticsVerbosity.Summary); + string summary2 = diagnostics.ToString(DiagnosticsVerbosity.Summary); + + Assert.AreSame(summary1, summary2, "Summary should be cached via Lazy"); + } + + #region Helpers + + private static void AddStoreResponseStatistic( + ITrace trace, + string region, + StatusCodes statusCode, + SubStatusCodes subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + // Create or find existing ClientSideRequestStatisticsTraceDatum on the trace + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + // Create a StoreResult with the desired status code and request charge + StoreResponse storeResponse = new StoreResponse(); + storeResponse.Status = (int)statusCode; + storeResponse.Headers = new DictionaryNameValueCollection(); + storeResponse.Headers[HttpConstants.HttpHeaders.RequestCharge] = requestCharge.ToString(); + storeResponse.Headers[WFConstants.BackendHeaders.SubStatus] = ((int)subStatusCode).ToString(); + + ReferenceCountedDisposable storeResultRef = StoreResult.CreateForTesting(storeResponse: storeResponse); + + DateTime responseTime = requestStartTime.AddMilliseconds(durationMs); + + StoreResponseStatistics stats = new StoreResponseStatistics( + requestStartTime: requestStartTime, + requestResponseTime: responseTime, + storeResult: storeResultRef.Target, + resourceType: ResourceType.Document, + operationType: OperationType.Read, + requestSessionToken: null, + locationEndpoint: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + region: region); + + // Use reflection to add to the private storeResponseStatistics list + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("storeResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(stats); + } + + private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace trace) + { + const string datumKey = "ClientSideStats"; + + if (trace.TryGetDatum(datumKey, out object existing) + && existing is ClientSideRequestStatisticsTraceDatum existingDatum) + { + return existingDatum; + } + + ClientSideRequestStatisticsTraceDatum datum = new ClientSideRequestStatisticsTraceDatum( + DateTime.UtcNow, + trace); + trace.AddDatum(datumKey, datum); + return datum; + } + + #endregion + } +} From a2ba0d4666087dab353c44cabed59cd03695bef9 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:57:39 -0700 Subject: [PATCH 2/9] Diagnostics: Fixes review issues for DiagnosticsVerbosity feature - Wire MaxDiagnosticsSummarySizeBytes from CosmosClientOptions to CosmosTraceDiagnostics callers that have access to ClientContext (ContainerCore, ReadManyQueryHelper, CosmosLinqQuery, ChangeFeedEstimatorIterator) - Fix DiagnosticsVerbosity.Summary XML doc to include avg in aggregate statistics list - Clarify CosmosClientOptions.DiagnosticsVerbosity XML doc to indicate it is a preference property, not auto-applied - Add edge case tests (null trace, invalid enum value) - Add DiagnosticsVerbosity and MaxDiagnosticsSummarySizeBytes tests to CosmosClientOptionsUnitTests - Add changelog entry for new public API surface Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ChangeFeedEstimatorIterator.cs | 17 ++++-- .../src/CosmosClientOptions.cs | 10 ++-- .../src/Diagnostics/DiagnosticsVerbosity.cs | 2 +- .../src/Linq/CosmosLinqQuery.cs | 2 +- .../src/ReadManyQueryHelper.cs | 2 +- .../src/Resource/Container/ContainerCore.cs | 2 +- .../CosmosClientOptionsUnitTests.cs | 57 +++++++++++++++++++ .../DiagnosticsSummaryWriterTests.cs | 50 ++++++++++++++++ 8 files changed, 128 insertions(+), 14 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs b/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs index 969a921a5e..ca2e49d875 100644 --- a/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs +++ b/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs @@ -174,7 +174,7 @@ private async Task> ReadNextInternalAsync { // Lease store is empty this.hasMoreResults = false; - return new ChangeFeedEstimatorEmptyFeedResponse(trace); + return new ChangeFeedEstimatorEmptyFeedResponse(trace, this.monitoredContainer.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes); } IEnumerable leasesForCurrentPage = this.lazyLeaseDocuments @@ -202,7 +202,7 @@ private async Task> ReadNextInternalAsync this.hasMoreResults = ++this.currentPage != this.maxPage; - return new ChangeFeedEstimatorFeedResponse(trace, estimations.AsReadOnly(), totalRUCost); + return new ChangeFeedEstimatorFeedResponse(trace, estimations.AsReadOnly(), totalRUCost, this.monitoredContainer.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes); } /// @@ -368,14 +368,17 @@ private sealed class ChangeFeedEstimatorFeedResponse : FeedResponse remainingLeaseWorks; private readonly Headers headers; + private readonly int maxDiagnosticsSummarySizeBytes; public ChangeFeedEstimatorFeedResponse( ITrace trace, ReadOnlyCollection remainingLeaseWorks, - double ruCost) + double ruCost, + int maxDiagnosticsSummarySizeBytes) { this.Trace = trace ?? throw new ArgumentNullException(nameof(trace)); this.remainingLeaseWorks = remainingLeaseWorks ?? throw new ArgumentNullException(nameof(remainingLeaseWorks)); + this.maxDiagnosticsSummarySizeBytes = maxDiagnosticsSummarySizeBytes; this.headers = new Headers { RequestCharge = ruCost @@ -394,7 +397,7 @@ public ChangeFeedEstimatorFeedResponse( public override HttpStatusCode StatusCode => HttpStatusCode.OK; - public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace); + public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace, this.maxDiagnosticsSummarySizeBytes); public override string IndexMetrics => null; @@ -412,10 +415,12 @@ private sealed class ChangeFeedEstimatorEmptyFeedResponse : FeedResponse remainingLeaseWorks = Enumerable.Empty(); private readonly Headers headers; + private readonly int maxDiagnosticsSummarySizeBytes; - public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace) + public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace, int maxDiagnosticsSummarySizeBytes) { this.Trace = trace ?? throw new ArgumentNullException(nameof(trace)); + this.maxDiagnosticsSummarySizeBytes = maxDiagnosticsSummarySizeBytes; this.headers = new Headers(); } @@ -431,7 +436,7 @@ public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace) public override HttpStatusCode StatusCode => HttpStatusCode.OK; - public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace); + public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace, this.maxDiagnosticsSummarySizeBytes); public override string IndexMetrics => null; diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 7b377ccd96..3364de8156 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -393,16 +393,18 @@ public ConnectionMode ConnectionMode public PriorityLevel? PriorityLevel { get; set; } /// - /// Gets or sets the default verbosity for serialization. + /// Gets or sets the preferred verbosity for serialization. /// Default: . /// /// /// - /// This setting controls the level of detail when calling - /// . + /// This property stores the preferred verbosity level. To obtain diagnostics at + /// the configured verbosity, pass it to + /// : + /// response.Diagnostics.ToString(client.ClientOptions.DiagnosticsVerbosity). /// /// - /// When set to , + /// When is used, /// the diagnostics output is compacted by grouping requests by region and deduplicating /// retries with aggregate statistics (count, total RU, min/max/P50/avg latency). /// diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs index 5ea8138b1f..cec4825fed 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs @@ -19,7 +19,7 @@ public enum DiagnosticsVerbosity /// Compacted diagnostic output optimized for log size constraints. /// Groups requests by region. Keeps first and last request in full detail. /// Deduplicates middle requests by (StatusCode, SubStatusCode) with - /// aggregate statistics (count, total RU, min/max/P50 latency). + /// aggregate statistics (count, total RU, min/max/P50/avg latency). /// Summary = 1, } diff --git a/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs b/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs index 136e0d3ba0..abb81ffebc 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs @@ -235,7 +235,7 @@ internal async Task> AggregateResultAsync(CancellationToken cancella System.Net.HttpStatusCode.OK, headers, result.FirstOrDefault(), - new CosmosTraceDiagnostics(rootTrace), + new CosmosTraceDiagnostics(rootTrace, this.container.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes), null); } diff --git a/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs b/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs index ee969e57a4..257a2c304b 100644 --- a/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs +++ b/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs @@ -239,7 +239,7 @@ private FeedResponse CombineFeedResponseFromQueryResponses(List OfferRetryHelperForStaleRidCacheAsync( responseMessage.StatusCode, responseMessage.Headers, null, - new CosmosTraceDiagnostics(trace), + new CosmosTraceDiagnostics(trace, this.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes), responseMessage.RequestMessage); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index fc72a63579..4fcddd9af2 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -1295,5 +1295,62 @@ public int Compare(object x, object y) return 1; } } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_DefaultIsDetailed() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_CanSetToSummary() + { + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary + }; + + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_DefaultIs8192() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_ThrowsBelowMinimum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 2048 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsMinimum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 4096 + }; + + Assert.AreEqual(4096, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsLargeValue() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 65536 + }; + + Assert.AreEqual(65536, options.MaxDiagnosticsSummarySizeBytes); + } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs index 097d1916ff..2abe3993d6 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs @@ -524,5 +524,55 @@ private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace tra } #endregion + + #region Edge Case Tests + + [TestMethod] + [ExpectedException(typeof(ArgumentNullException))] + public void WriteSummary_NullTrace_ThrowsArgumentNullException() + { + DiagnosticsSummaryWriter.WriteSummary(null, 8192); + } + + [TestMethod] + public void ToString_InvalidEnumValue_FallsBackToDetailed() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + + // Invalid enum value should fall back to detailed output (same as parameterless ToString) + string result = diagnostics.ToString((DiagnosticsVerbosity)99); + JObject parsed = JObject.Parse(result); + + Assert.IsNotNull(parsed["name"], "Invalid verbosity should produce detailed output with trace name"); + Assert.IsNull(parsed["Summary"]?["DiagnosticsVerbosity"], + "Should not contain Summary.DiagnosticsVerbosity since it is detailed output"); + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_CustomValuePropagated() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 16384 + }; + + Assert.AreEqual(16384, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_CanBeSetToSummary() + { + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary + }; + + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + + #endregion } } From ee2ef5832b8b1996e92acee8dd15049d9c4c58a6 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 17 Mar 2026 15:27:38 -0700 Subject: [PATCH 3/9] Diagnostics: Adds env var support, builder API, baseline and emulator tests - Add AZURE_COSMOS_DIAGNOSTICS_VERBOSITY and AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE environment variable support via ConfigurationManager - Read env vars as fallback defaults in CosmosClientOptions constructor - Add WithDiagnosticsVerbosity and WithMaxDiagnosticsSummarySizeBytes fluent builder methods to CosmosClientBuilder - Update DotNetSDKAPI.net6.json contract with new builder methods - Add 9 env var and 3 builder tests to CosmosClientOptionsUnitTests - Add 9 baseline schema validation tests (DiagnosticsSummaryBaselineTests) - Add 8 emulator integration tests (DiagnosticsVerbosityEmulatorTests) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/CosmosClientOptions.cs | 19 +- .../src/Fluent/CosmosClientBuilder.cs | 25 ++ .../src/Util/ConfigurationManager.cs | 12 + .../DiagnosticsVerbosityEmulatorTests.cs | 180 +++++++++ .../Contracts/DotNetSDKAPI.net6.json | 10 + .../CosmosClientOptionsUnitTests.cs | 127 ++++++ .../DiagnosticsSummaryBaselineTests.cs | 378 ++++++++++++++++++ 7 files changed, 749 insertions(+), 2 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 3364de8156..d9654d16b2 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -95,8 +95,23 @@ public CosmosClientOptions() this.ConnectionProtocol = CosmosClientOptions.DefaultProtocol; this.ApiType = CosmosClientOptions.DefaultApiType; this.CustomHandlers = new Collection(); - this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); - this.SessionRetryOptions = new SessionRetryOptions(); + this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); + this.SessionRetryOptions = new SessionRetryOptions(); + + string envVerbosity = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + if (!string.IsNullOrEmpty(envVerbosity) + && Enum.TryParse(envVerbosity, ignoreCase: true, out DiagnosticsVerbosity parsedVerbosity)) + { + this.DiagnosticsVerbosity = parsedVerbosity; + } + + string envMaxSize = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + if (!string.IsNullOrEmpty(envMaxSize) + && int.TryParse(envMaxSize, out int parsedMaxSize) + && parsedMaxSize >= 4096) + { + this.maxDiagnosticsSummarySizeBytes = parsedMaxSize; + } } /// diff --git a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs index e93229ae70..8cf2d3caed 100644 --- a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs +++ b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs @@ -849,5 +849,30 @@ CosmosClientBuilder WithReadConsistencyStrategy(Cosmos.ReadConsistencyStrategy r this.clientOptions.ReadConsistencyStrategy = readConsistencyStrategy; return this; } + + /// + /// Sets the preferred for diagnostics serialization. + /// + /// The verbosity level to use when calling + /// . + /// The current . + /// + public CosmosClientBuilder WithDiagnosticsVerbosity(DiagnosticsVerbosity verbosity) + { + this.clientOptions.DiagnosticsVerbosity = verbosity; + return this; + } + + /// + /// Sets the maximum size in bytes for Summary mode diagnostic output. + /// + /// Maximum size in bytes (minimum 4096). + /// The current . + /// + public CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(int maxSizeBytes) + { + this.clientOptions.MaxDiagnosticsSummarySizeBytes = maxSizeBytes; + return this; + } } } diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index b5b4d8eec2..ef34477551 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -125,6 +125,18 @@ internal static class ConfigurationManager /// internal static readonly string UseLengthAwareRangeComparator = "AZURE_COSMOS_USE_LENGTH_AWARE_RANGE_COMPARATOR"; + /// + /// Environment variable name for overriding the default diagnostics verbosity. + /// Accepts "Summary" or "Detailed" (case-insensitive). + /// + internal static readonly string DiagnosticsVerbosityVariable = "AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"; + + /// + /// Environment variable name for overriding the maximum summary diagnostics size in bytes. + /// Must be an integer >= 4096. + /// + internal static readonly string DiagnosticsMaxSummarySizeVariable = "AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"; + public static T GetEnvironmentVariable(string variable, T defaultValue) { string value = Environment.GetEnvironmentVariable(variable); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs new file mode 100644 index 0000000000..966e9f9170 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs @@ -0,0 +1,180 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests +{ + using System; + using System.Text; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + + [TestClass] + public class DiagnosticsVerbosityEmulatorTests + { + private Container Container = null; + private Cosmos.Database Database = null; + private CosmosClient Client = null; + + [TestInitialize] + public async Task TestInitialize() + { + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary, + MaxDiagnosticsSummarySizeBytes = 8192, + ConsistencyLevel = Cosmos.ConsistencyLevel.Session + }; + this.Client = TestCommon.CreateCosmosClient(clientOptions); + this.Database = (await this.Client.CreateDatabaseAsync(Guid.NewGuid().ToString())).Database; + this.Container = (await this.Database.CreateContainerAsync(Guid.NewGuid().ToString(), "/pk")).Container; + } + + [TestCleanup] + public async Task Cleanup() + { + await this.Database.DeleteAsync(); + this.Client.Dispose(); + } + + [TestMethod] + public async Task CreateItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + Assert.IsNotNull(summary); + + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + Assert.IsNotNull(summaryObj, "Summary object should exist"); + Assert.AreEqual("Summary", summaryObj["DiagnosticsVerbosity"].ToString()); + Assert.IsTrue(summaryObj["TotalRequestCount"].Value() >= 1); + Assert.IsNotNull(summaryObj["RegionsSummary"]); + } + + [TestMethod] + public async Task ReadItem_SummaryMode_ContainsRegionInfo() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + ItemResponse response = await this.Container.ReadItemAsync( + testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + Assert.IsTrue(regions.Count >= 1, "Should have at least one region"); + JObject firstRegion = (JObject)regions[0]; + Assert.IsNotNull(firstRegion["Region"]); + Assert.IsNotNull(firstRegion["First"]); + Assert.IsTrue(firstRegion["RequestCount"].Value() >= 1); + } + + [TestMethod] + public async Task SummaryMode_SmallerThanDetailed() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string detailed = response.Diagnostics.ToString(); + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + + int detailedBytes = Encoding.UTF8.GetByteCount(detailed); + int summaryBytes = Encoding.UTF8.GetByteCount(summary); + + Assert.IsTrue(summaryBytes <= detailedBytes, + $"Summary ({summaryBytes} bytes) should be <= Detailed ({detailedBytes} bytes)"); + } + + [TestMethod] + public async Task ParameterlessToString_UnchangedBySummaryOption() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string parameterless = response.Diagnostics.ToString(); + string explicitDetailed = response.Diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + JObject parsedDefault = JObject.Parse(parameterless); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + // Both should have the trace name (detailed format), not a Summary wrapper + Assert.IsNotNull(parsedDefault["name"], "Parameterless ToString should have trace name"); + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + } + + [TestMethod] + public async Task SummaryCaching_ReturnsSameInstance() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string summary1 = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + string summary2 = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + + Assert.AreSame(summary1, summary2, "Summary should be cached via Lazy"); + } + + [TestMethod] + public async Task Query_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + FeedIterator feedIterator = this.Container.GetItemQueryIterator( + $"select * from c where c.id = '{testItem.id}'"); + + while (feedIterator.HasMoreResults) + { + FeedResponse feedResponse = await feedIterator.ReadNextAsync(); + string summary = feedResponse.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + Assert.IsNotNull(summary); + + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + } + + [TestMethod] + public async Task ReplaceItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + testItem.cost = 9999; + ItemResponse response = await this.Container.ReplaceItemAsync( + testItem, testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.IsTrue(parsed["Summary"]["TotalRequestCount"].Value() >= 1); + } + + [TestMethod] + public async Task DeleteItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + ItemResponse response = await this.Container.DeleteItemAsync( + testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json index 0a6154671d..4267424d03 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json @@ -5170,6 +5170,11 @@ "Attributes": [], "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithContentResponseOnWrite(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithDiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithDiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithCustomAccountEndpoints(System.Collections.Generic.IEnumerable`1[System.Uri])": { "Type": "Method", "Attributes": [], @@ -5195,6 +5200,11 @@ "Attributes": [], "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithLimitToEndpoint(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(Int32)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithPriorityLevel(Microsoft.Azure.Cosmos.PriorityLevel)": { "Type": "Method", "Attributes": [], diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index 4fcddd9af2..c0cf1cde26 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -1352,5 +1352,132 @@ public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsLargeValue Assert.AreEqual(65536, options.MaxDiagnosticsSummarySizeBytes); } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_Summary() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "Summary"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_CaseInsensitive() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "summary"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_InvalidIgnored() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "InvalidValue"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", "16384"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(16384, options.MaxDiagnosticsSummarySizeBytes); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize_BelowMinimumIgnored() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", "1024"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes, "Below-minimum env var should be ignored"); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_ExplicitPropertyOverridesEnvVar() + { + string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + try + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "Summary"); + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Detailed + }; + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + } + } + + [TestMethod] + public void CosmosClientBuilder_WithDiagnosticsVerbosity() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithDiagnosticsVerbosity(DiagnosticsVerbosity.Summary); + + CosmosClient client = builder.Build(new MockDocumentClient()); + Assert.AreEqual(DiagnosticsVerbosity.Summary, client.ClientOptions.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientBuilder_WithMaxDiagnosticsSummarySizeBytes() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithMaxDiagnosticsSummarySizeBytes(16384); + + CosmosClient client = builder.Build(new MockDocumentClient()); + Assert.AreEqual(16384, client.ClientOptions.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientBuilder_WithMaxDiagnosticsSummarySizeBytes_TooSmallThrows() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithMaxDiagnosticsSummarySizeBytes(2048); + } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs new file mode 100644 index 0000000000..5df68bb771 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs @@ -0,0 +1,378 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Tracing +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Reflection; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + using static Microsoft.Azure.Cosmos.Tracing.TraceData.ClientSideRequestStatisticsTraceDatum; + + /// + /// Baseline tests that validate the exact JSON schema produced by DiagnosticsSummaryWriter. + /// These tests ensure the summary output structure does not change accidentally across releases. + /// + [TestClass] + public class DiagnosticsSummaryBaselineTests + { + /// + /// Validates the exact set of top-level fields in a single-request summary. + /// This catches any accidental field additions/removals/renames. + /// + [TestMethod] + public void Baseline_SingleRequest_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Top-level summary fields (exact set) + HashSet expectedTopLevelFields = new HashSet + { + "DiagnosticsVerbosity", + "TotalDurationMs", + "TotalRequestCharge", + "TotalRequestCount", + "RegionsSummary" + }; + + HashSet actualFields = new HashSet(summaryObj.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedTopLevelFields.SetEquals(actualFields), + $"Summary top-level fields mismatch. Expected: [{string.Join(", ", expectedTopLevelFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a region summary entry. + /// + [TestMethod] + public void Baseline_RegionSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + // Single-request region has: Region, RequestCount, TotalRequestCharge, First + HashSet expectedFields = new HashSet + { + "Region", + "RequestCount", + "TotalRequestCharge", + "First" + }; + + HashSet actualFields = new HashSet(region.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Region fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a request entry detail. + /// + [TestMethod] + public void Baseline_RequestEntryDetail_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject first = (JObject)parsed["Summary"]["RegionsSummary"][0]["First"]; + + HashSet expectedFields = new HashSet + { + "StatusCode", + "SubStatusCode", + "RequestCharge", + "DurationMs", + "Region", + "Endpoint", + "RequestStartTimeUtc", + "OperationType", + "ResourceType" + }; + + HashSet actualFields = new HashSet(first.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Request entry fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in an aggregated group. + /// + [TestMethod] + public void Baseline_AggregatedGroup_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, fixedTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject group = (JObject)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"][0]; + + HashSet expectedFields = new HashSet + { + "StatusCode", + "SubStatusCode", + "Count", + "TotalRequestCharge", + "MinDurationMs", + "MaxDurationMs", + "P50DurationMs", + "AvgDurationMs" + }; + + HashSet actualFields = new HashSet(group.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Aggregated group fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a truncated summary. + /// + [TestMethod] + public void Baseline_TruncatedSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic(trace, $"Region {r}", StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, 0.0, 5 + i, fixedTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + HashSet expectedFields = new HashSet + { + "DiagnosticsVerbosity", + "TotalDurationMs", + "TotalRequestCount", + "Truncated", + "Message" + }; + + HashSet actualFields = new HashSet(summaryObj.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Truncated summary fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the full region summary schema when both First, Last, and AggregatedGroups are present. + /// + [TestMethod] + public void Baseline_FullRegionSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 5, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 15, fixedTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 12, fixedTime.AddMilliseconds(300)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + HashSet expectedFields = new HashSet + { + "Region", + "RequestCount", + "TotalRequestCharge", + "First", + "Last", + "AggregatedGroups" + }; + + HashSet actualFields = new HashSet(region.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Full region fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates field types are correct (numbers are numbers, strings are strings, etc.). + /// + [TestMethod] + public void Baseline_FieldTypes_Consistent() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 5, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 12, fixedTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Top-level types + Assert.AreEqual(JTokenType.String, summaryObj["DiagnosticsVerbosity"].Type); + Assert.IsTrue(summaryObj["TotalDurationMs"].Type == JTokenType.Float + || summaryObj["TotalDurationMs"].Type == JTokenType.Integer, + "TotalDurationMs should be numeric"); + Assert.IsTrue(summaryObj["TotalRequestCharge"].Type == JTokenType.Float + || summaryObj["TotalRequestCharge"].Type == JTokenType.Integer, + "TotalRequestCharge should be numeric"); + Assert.AreEqual(JTokenType.Integer, summaryObj["TotalRequestCount"].Type); + Assert.AreEqual(JTokenType.Array, summaryObj["RegionsSummary"].Type); + + // Region types + JObject region = (JObject)summaryObj["RegionsSummary"][0]; + Assert.AreEqual(JTokenType.String, region["Region"].Type); + Assert.AreEqual(JTokenType.Integer, region["RequestCount"].Type); + Assert.IsTrue(region["TotalRequestCharge"].Type == JTokenType.Float + || region["TotalRequestCharge"].Type == JTokenType.Integer, + "TotalRequestCharge should be numeric"); + Assert.AreEqual(JTokenType.Object, region["First"].Type); + Assert.AreEqual(JTokenType.Object, region["Last"].Type); + + // Request entry types + JObject first = (JObject)region["First"]; + Assert.AreEqual(JTokenType.Integer, first["StatusCode"].Type); + Assert.AreEqual(JTokenType.Integer, first["SubStatusCode"].Type); + Assert.IsTrue(first["RequestCharge"].Type == JTokenType.Float + || first["RequestCharge"].Type == JTokenType.Integer, + "RequestCharge should be numeric"); + Assert.IsTrue(first["DurationMs"].Type == JTokenType.Float + || first["DurationMs"].Type == JTokenType.Integer, + "DurationMs should be numeric"); + Assert.AreEqual(JTokenType.String, first["Region"].Type); + } + + /// + /// Validates that the DiagnosticsVerbosity field always has the value "Summary". + /// + [TestMethod] + public void Baseline_DiagnosticsVerbosityField_AlwaysSummary() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].Value()); + } + + /// + /// Validates that the truncated summary has the correct Truncated and Message fields. + /// + [TestMethod] + public void Baseline_TruncatedMessage_Content() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic(trace, $"Region {r}", StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, 0.0, 5, fixedTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.IsTrue(summaryObj["Truncated"].Value()); + Assert.AreEqual(JTokenType.Boolean, summaryObj["Truncated"].Type); + string message = summaryObj["Message"].Value(); + Assert.IsTrue(message.Contains("truncated", StringComparison.OrdinalIgnoreCase), + $"Truncation message should mention 'truncated'. Actual: {message}"); + Assert.IsTrue(message.Contains("Detailed", StringComparison.OrdinalIgnoreCase), + $"Truncation message should mention 'Detailed' mode. Actual: {message}"); + } + + #region Helpers + + private static void AddStoreResponseStatistic( + ITrace trace, + string region, + StatusCodes statusCode, + SubStatusCodes subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + StoreResponse storeResponse = new StoreResponse(); + storeResponse.Status = (int)statusCode; + storeResponse.Headers = new DictionaryNameValueCollection(); + storeResponse.Headers[HttpConstants.HttpHeaders.RequestCharge] = requestCharge.ToString(); + storeResponse.Headers[WFConstants.BackendHeaders.SubStatus] = ((int)subStatusCode).ToString(); + + ReferenceCountedDisposable storeResultRef = StoreResult.CreateForTesting(storeResponse: storeResponse); + + DateTime responseTime = requestStartTime.AddMilliseconds(durationMs); + + StoreResponseStatistics stats = new StoreResponseStatistics( + requestStartTime: requestStartTime, + requestResponseTime: responseTime, + storeResult: storeResultRef.Target, + resourceType: ResourceType.Document, + operationType: OperationType.Read, + requestSessionToken: null, + locationEndpoint: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + region: region); + + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("storeResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(stats); + } + + private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace trace) + { + const string datumKey = "ClientSideStats"; + + if (trace.TryGetDatum(datumKey, out object existing) + && existing is ClientSideRequestStatisticsTraceDatum existingDatum) + { + return existingDatum; + } + + ClientSideRequestStatisticsTraceDatum datum = new ClientSideRequestStatisticsTraceDatum( + DateTime.UtcNow, + trace); + trace.AddDatum(datumKey, datum); + return datum; + } + + #endregion + } +} From e1d22186bdbdb16cb1b2246a9085369611bcd941 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 18 Mar 2026 12:01:22 -0700 Subject: [PATCH 4/9] Diagnostics: Fixes test failures from DiagnosticsVerbosity constructor changes Update reflection-based constructor lookups in OpenTelemetryRecorderTests to match new signatures that include maxDiagnosticsSummarySizeBytes parameter. Add ClientOptions mock setup in ChangeFeedEstimatorIteratorTests for strict mocks that now access ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ChangeFeed/ChangeFeedEstimatorIteratorTests.cs | 2 ++ .../Telemetry/OpenTelemetryRecorderTests.cs | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs index cca7863af2..3b816b79f0 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs @@ -384,6 +384,7 @@ static FeedIteratorInternal feedCreator(DocumentServiceLease lease, string conti Mock mockedContext = new Mock(MockBehavior.Strict); mockedContext.Setup(c => c.Client).Returns(MockCosmosUtil.CreateMockCosmosClient()); + mockedContext.Setup(c => c.ClientOptions).Returns(new CosmosClientOptions()); mockedContext.Setup(x => x.OperationHelperAsync>( It.Is(str => str.Contains("Change Feed Estimator")), It.IsAny(), @@ -492,6 +493,7 @@ private static ContainerInternal GetMockedContainer() Mock containerMock = new Mock(MockBehavior.Strict); Mock mockContext = new Mock(MockBehavior.Strict); mockContext.Setup(x => x.Client).Returns(mockClient.Object); + mockContext.Setup(x => x.ClientOptions).Returns(new CosmosClientOptions()); containerMock.Setup(c => c.ClientContext).Returns(mockContext.Object); containerMock.Setup(c => c.Id).Returns("containerId"); containerMock.Setup(c => c.Database.Id).Returns("databaseId"); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs index 9640097210..8fdc8d94dd 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs @@ -184,12 +184,12 @@ public async Task CheckResponseCompatibility() private static readonly Func> ChangefeedResponseFunc = (Type type) => { ConstructorInfo constructorInfo = type - .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(ReadOnlyCollection), typeof(double) }, null); + .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(ReadOnlyCollection), typeof(double), typeof(int) }, null); if (constructorInfo != null) { return (FeedResponse)constructorInfo.Invoke( new object[] { - NoOpTrace.Singleton, new List().AsReadOnly(), 10 }); + NoOpTrace.Singleton, new List().AsReadOnly(), 10, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes }); } return null; @@ -198,12 +198,12 @@ public async Task CheckResponseCompatibility() private static readonly Func> ChangeFeedEstimatorEmptyFeedResponseFunc = (Type type) => { ConstructorInfo constructorInfo = type - .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace) }, null); + .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(int) }, null); if (constructorInfo != null) { return (FeedResponse)constructorInfo.Invoke( new object[] { - NoOpTrace.Singleton}); + NoOpTrace.Singleton, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes }); } return null; From becef8ede973611a8819de1667bc8aa2cb7e5769 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:55:08 -0700 Subject: [PATCH 5/9] Diagnostics: Fixes Debug.Assert crash by calling SetWalkingStateRecursively before trace traversal WriteSummary now calls SetWalkingStateRecursively() on the concrete Trace before accessing Data/Children properties, which have Debug.Assert guards requiring isBeingWalked to be true. Previously, only the CosmosTraceDiagnostics caller set this state, but direct callers (including unit tests) would crash the test host in Debug builds. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Diagnostics/DiagnosticsSummaryWriter.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs index 8b96bfc7c7..d377e72518 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs @@ -37,6 +37,11 @@ public static string WriteSummary( throw new ArgumentNullException(nameof(trace)); } + if (trace is Tracing.Trace concreteTrace) + { + concreteTrace.SetWalkingStateRecursively(); + } + List entries = CollectRequestEntries(trace); string summaryJson = BuildSummaryJson(trace, entries); From 9f3be1ca8e8f77ee299353880f9c925ac6945bb6 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:33:28 -0700 Subject: [PATCH 6/9] Add diagnostics-compaction spec from PR #5644 Brings in the OpenSpec change spec (design, proposal, tasks) for the diagnostics compaction feature alongside the implementation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../changes/diagnostics-compaction/design.md | 105 ++++++++++++++++++ .../diagnostics-compaction/proposal.md | 71 ++++++++++++ .../changes/diagnostics-compaction/tasks.md | 88 +++++++++++++++ 3 files changed, 264 insertions(+) create mode 100644 openspec/changes/diagnostics-compaction/design.md create mode 100644 openspec/changes/diagnostics-compaction/proposal.md create mode 100644 openspec/changes/diagnostics-compaction/tasks.md diff --git a/openspec/changes/diagnostics-compaction/design.md b/openspec/changes/diagnostics-compaction/design.md new file mode 100644 index 0000000000..d738f4045c --- /dev/null +++ b/openspec/changes/diagnostics-compaction/design.md @@ -0,0 +1,105 @@ +# Diagnostics Compaction — Design + +## Summary Compaction Algorithm + +### Data Collection + +Walk the `ITrace` tree (same traversal as `SummaryDiagnostics.CollectSummaryFromTraceTree()`) to collect all `StoreResponseStatistics` and `HttpResponseStatistics` entries from every `ClientSideRequestStatisticsTraceDatum` in the trace hierarchy. + +### Region Grouping + +Group collected entries by `Region` (string). Entries with a null/empty region are grouped under `"Unknown"`. + +### Per-Region Summary + +For each region group (ordered chronologically by request start time): + +1. **First**: Full details of the chronologically first request +2. **Last**: Full details of the chronologically last request (omitted if only 1 request) +3. **Middle entries** (all except first and last): Group by `(StatusCode, SubStatusCode)`: + - **Count**: Number of requests in this group + - **TotalRequestCharge**: Sum of RU charges + - **MinDurationMs / MaxDurationMs / P50DurationMs / AvgDurationMs**: Latency statistics + +### Size Enforcement + +1. Serialize the summary JSON +2. If `serializedBytes <= MaxDiagnosticsSummarySizeBytes` → return as-is +3. If `serializedBytes > MaxDiagnosticsSummarySizeBytes` → return truncated output + +### Handling Both Direct and Gateway Requests + +Both `StoreResponseStatistics` (direct mode) and `HttpResponseStatistics` (gateway mode) are collected and treated uniformly in the summary. The aggregated groups include entries from both transport paths. An optional `"TransportType"` field (`"Direct"` / `"Gateway"`) can be included in aggregated groups if needed to distinguish. + +## Request Flow + +```mermaid +flowchart TD + A["ToString(DiagnosticsVerbosity)"] --> B{Verbosity?} + B -->|Detailed| C["Existing TraceJsonWriter path"] + B -->|Summary| D["DiagnosticsSummaryWriter"] + D --> E["Walk ITrace tree"] + E --> F["Collect StoreResponseStatistics\n+ HttpResponseStatistics"] + F --> G["Group by Region"] + G --> H["Per region:\nFirst + Last + Aggregated Middle"] + H --> I["Serialize to JSON"] + I --> J{Size <= Max?} + J -->|Yes| K["Return summary JSON"] + J -->|No| L["Return truncated JSON"] + C --> M["Return full trace JSON"] +``` + +## Files to Create + +| File | Description | +|------|-------------| +| `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs` | `DiagnosticsVerbosity` enum | +| `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs` | Summary computation and JSON serialization logic | + +## Files to Modify + +| File | Change | +|------|--------| +| `CosmosClientOptions.cs` | Add `DiagnosticsVerbosity` and `MaxDiagnosticsSummarySizeBytes` properties with validation | +| `CosmosDiagnostics.cs` | Add `ToString(DiagnosticsVerbosity)` abstract overload | +| `CosmosTraceDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload; delegate to `DiagnosticsSummaryWriter` when verbosity is `Summary` | +| `TraceWriter.TraceJsonWriter.cs` | Add summary serialization path that delegates to `DiagnosticsSummaryWriter` when verbosity is `Summary` | +| `SummaryDiagnostics.cs` | Extend `CollectSummaryFromTraceTree()` to support region-grouped collection with ordering | +| `ClientSideRequestStatisticsTraceDatum.cs` | Ensure `StoreResponseStatistics` and `HttpResponseStatistics` lists are accessible for summary computation | + +## Contract/Baseline Updates + +| File | Change | +|------|--------| +| `ContractEnforcementTests.cs` baseline | Update public API contract for new enum and properties | + +## Alternatives Considered + +### Alternative 1: Emit summary alongside truncated trace tree +Instead of replacing the full trace, emit the summary _alongside_ the first + last children of the trace tree. + +**Pros:** Preserves some trace structure for tooling that parses it. +**Cons:** Larger output size; complex to implement; defeats the purpose of compaction. +**Decision:** Rejected — summary replaces the full trace. The `First` and `Last` entries in each region summary provide the detailed bookends. + +### Alternative 2: Per-request verbosity via RequestOptions +Add a `DiagnosticsVerbosity` property to `RequestOptions` for per-request control. + +**Pros:** More granular control. +**Cons:** Verbosity is a serialization concern, not a request concern. The `ToString(DiagnosticsVerbosity)` overload provides the same flexibility without complicating `RequestOptions`. +**Decision:** Deferred. Can be added later if needed. + +### Alternative 3: Transport type distinction in aggregated groups +Include a `TransportType` field (`"Direct"` / `"Gateway"`) in each aggregated group. + +**Pros:** Helps distinguish transport-specific issues. +**Cons:** Increases output size; `StatusCode/SubStatusCode` is usually sufficient. +**Decision:** Deferred. Can add later if customer feedback warrants it. + +## Key References + +- `Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs` — concrete diagnostics implementation +- `Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs` — current trace serialization +- `Microsoft.Azure.Cosmos/src/Diagnostics/SummaryDiagnostics.cs` — existing summary aggregation (foundation) +- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/ClientSideRequestStatisticsTraceDatum.cs` — stats data +- `docs/SdkDesign.md` — SDK architecture overview diff --git a/openspec/changes/diagnostics-compaction/proposal.md b/openspec/changes/diagnostics-compaction/proposal.md new file mode 100644 index 0000000000..b0db031a50 --- /dev/null +++ b/openspec/changes/diagnostics-compaction/proposal.md @@ -0,0 +1,71 @@ +# Diagnostics Compaction — Proposal + +## Problem + +`CosmosDiagnostics.ToString()` produces a JSON trace that grows **unboundedly** with retries. Each retry attempt creates a new child `ITrace` node containing a full `ClientSideRequestStatisticsTraceDatum` with complete `StoreResponseStatistics` and `HttpResponseStatistics` entries. In pathological scenarios (sustained 429 throttling, transient failures, cross-region failovers), a single operation's diagnostics can grow to hundreds of KB. + +**Impact:** +- **Log truncation** — monitoring systems (Application Insights, Azure Monitor, etc.) silently drop oversized log entries +- **Memory pressure** — large diagnostic strings increase GC overhead, especially at high throughput +- **Readability** — operators cannot quickly extract signal from noise when hundreds of identical retry entries are listed + +**Example scenario:** A point read that encounters 50 retries due to 429 throttling in West US 2, then fails over to East US 2 with 10 more retries, produces ~60 full `StoreResponseStatistics` entries in the trace tree. With summary mode, this compacts to: first request + last request + 1 aggregated group per region. + +## Proposed Approach + +Introduce a **`DiagnosticsVerbosity`** concept (modeled after [Azure/azure-sdk-for-rust#3592](https://github.com/Azure/azure-sdk-for-rust/pull/3592)) that controls how `CosmosDiagnostics.ToString()` serializes trace data: + +| Mode | Behavior | Use Case | +|------|----------|----------| +| **Detailed** (default) | Current behavior — full trace tree output | Debugging, development | +| **Summary** | Region-grouped compaction with first/last + aggregated middle | Production logging, size-constrained environments | + +**Key design principle:** The in-memory representation (`ITrace` tree, `ClientSideRequestStatisticsTraceDatum`) stays **unchanged**. Compaction only happens at **serialization time** in the `TraceJsonWriter` path. This preserves full programmatic access to diagnostics data while reducing serialized output size. + +## SDK Area + +- **Primary:** Diagnostics +- **Secondary:** Client-config (new options properties) + +## Preview vs GA + +The `DiagnosticsVerbosity` enum and related options should ship as **GA** (non-preview) since it's an additive, backward-compatible feature with no impact when not opted into. + +## Backward Compatibility + +- **Default is `Detailed`** — no behavioral change for existing users +- **No breaking changes** — `ToString()` output format only changes when `Summary` is explicitly opted into +- **Programmatic API unchanged** — `GetContactedRegions()`, `GetFailedRequestCount()`, etc. continue to work from the full in-memory trace regardless of verbosity + +## Rollout Strategy + +1. Ship with `Detailed` as default in initial release +2. Document `Summary` mode in SDK documentation and changelog +3. Consider making `Summary` the default in a future major version after customer feedback + +## Non-Goals + +- Changing the in-memory `ITrace` tree structure +- Modifying the `Detailed` mode output format +- Adding new programmatic APIs beyond `ToString(DiagnosticsVerbosity)` overload +- Per-request verbosity override via `RequestOptions` (can be added later) + +## Resolved Questions + +1. **Should `AggregatedGroups` include an `AvgDurationMs` field?** The Rust SDK only includes min/max/P50. Adding avg is cheap to compute but adds to the output size. _Decision: Include avg. It's a single field and provides useful signal._ + +2. **Should the summary include the `children` trace tree at all?** Currently proposed as replacing the entire trace output. An alternative is to emit the summary _alongside_ a truncated trace tree (e.g., first + last children only). _Decision: Summary replaces the full trace. The `First` and `Last` entries in each region summary provide the detailed bookends._ + +3. **Gateway vs Direct distinction in aggregated groups.** Should each `AggregatedGroup` indicate whether it's from Direct or Gateway transport? _Decision: Defer. The `StatusCode/SubStatusCode` combination is usually sufficient. Can add a `TransportType` field later if needed._ + +4. **Caching.** The Rust SDK caches serialized JSON per verbosity level via `OnceLock`. Should the .NET SDK cache the summary JSON? _Decision: Yes, use `Lazy` or similar. `ToString()` may be called multiple times (logging, telemetry, etc.)._ + +5. **Thread safety.** `CosmosDiagnostics.Verbosity` as a settable property on a potentially shared object needs consideration. _Decision: Use the `ToString(DiagnosticsVerbosity)` overload which avoids mutating state entirely. The property is set once from `CosmosClientOptions` during response creation and read during serialization._ + +## References + +- **Rust SDK PR:** [Azure/azure-sdk-for-rust#3592](https://github.com/Azure/azure-sdk-for-rust/pull/3592) — `DiagnosticsContext` with `Summary` and `Detailed` modes +- **Current .NET diagnostics:** `Microsoft.Azure.Cosmos/src/Diagnostics/` and `Microsoft.Azure.Cosmos/src/Tracing/` +- **Existing summary:** `SummaryDiagnostics.cs` — aggregates `(StatusCode, SubStatusCode)` counts (foundation to build on) +- **Trace tree:** `ITrace` → `Trace` with recursive children and `ClientSideRequestStatisticsTraceDatum` data +- **Related spec:** `openspec/specs/diagnostics-and-observability/spec.md` diff --git a/openspec/changes/diagnostics-compaction/tasks.md b/openspec/changes/diagnostics-compaction/tasks.md new file mode 100644 index 0000000000..6048a7aac1 --- /dev/null +++ b/openspec/changes/diagnostics-compaction/tasks.md @@ -0,0 +1,88 @@ +# Diagnostics Compaction — Tasks + +## Task 1: DiagnosticsVerbosity Enum & Options Plumbing + +**Scope:** Create the enum, add `DiagnosticsVerbosity` and `MaxDiagnosticsSummarySizeBytes` properties to `CosmosClientOptions`, add `ToString(DiagnosticsVerbosity)` abstract overload to `CosmosDiagnostics`, add environment variable support. + +**Acceptance:** `ToString(verbosity)` overloads compile and delegate correctly. Parameterless `ToString()` is unchanged (always `Detailed`). No behavioral change yet. + +**Spec requirements:** Diagnostics Verbosity (default verbosity, parameterless ToString, environment variable configuration, code-level override, verbosity precedence) + +## Task 2: Summary Computation Engine + +**Scope:** Implement `DiagnosticsSummaryWriter` — the core logic that walks the trace tree, collects stats, groups by region, computes first/last/aggregated groups, and produces the summary JSON structure. + +**Acceptance:** Given an `ITrace` tree, produces the correct summary JSON. Unit-testable in isolation. + +**Spec requirements:** Summary mode region grouping, first/last preservation, single request region, aggregated groups, mixed Direct and Gateway, region ordering + +## Task 3: Summary Serialization Integration + +**Scope:** Implement `CosmosTraceDiagnostics.ToString(DiagnosticsVerbosity)`. When `Summary`, delegate to `DiagnosticsSummaryWriter`. Implement size enforcement and truncated output fallback. Implement caching. Parameterless `ToString()` remains unchanged. + +**Acceptance:** `ToString(DiagnosticsVerbosity.Summary)` returns compact summary JSON. `ToString()` (parameterless) continues to return full `Detailed` trace. + +**Spec requirements:** In-memory trace tree unchanged, size enforcement, size under limit, summary mode caching, Summary JSON Format, truncated output format + +## Task 4: Contract Updates & Public API Validation + +**Scope:** Update `ContractEnforcementTests` baselines for new public API surface. Ensure the new enum and properties appear in contracts. + +**Acceptance:** All contract tests pass. Public API is correctly documented. + +## Task 5: Unit Tests + +**Scope:** Comprehensive unit tests for the summary engine. + +| Test | Description | Spec Requirement | +|------|-------------|------------------| +| `DiagnosticsVerbosity_DefaultIsDetailed` | Verify enum default | Default verbosity is Detailed | +| `CosmosClientOptions_DiagnosticsVerbosity_DefaultValue` | Verify options default | Default verbosity is Detailed | +| `CosmosClientOptions_MaxSummarySizeBytes_Validation` | Min 4096 enforced | MaxDiagnosticsSummarySizeBytes minimum validation | +| `CosmosClientOptions_DiagnosticsVerbosity_EnvVarFallback` | Env var populates options | Environment variable configuration | +| `CosmosClientOptions_DiagnosticsVerbosity_CodeOverridesEnvVar` | Code takes precedence | Code-level value overrides env var | +| `ToString_Overload_UsesSummary_WhenExplicit` | `ToString(Summary)` produces summary | Verbosity precedence | +| `Summary_SingleRegion_SingleRequest` | No deduplication, first only | Single request region | +| `Summary_SingleRegion_TwoRequests` | First + last, no middle | First/last preservation | +| `Summary_SingleRegion_ManyRetries_429` | First + last + 1 aggregated group | Aggregated groups | +| `Summary_MultiRegion_Failover` | Separate region summaries | Region grouping | +| `Summary_MixedStatusCodes` | Multiple aggregated groups per region | Aggregated groups | +| `Summary_DirectAndGateway_Combined` | Both transport types in summary | Mixed Direct and Gateway | +| `Summary_P50_OddCount` | Percentile on odd-sized collection | Aggregated groups | +| `Summary_P50_EvenCount` | Percentile on even-sized collection | Aggregated groups | +| `Summary_P50_SingleItem` | Percentile with 1 item | Aggregated groups | +| `Summary_SizeEnforcement_UnderLimit` | Summary fits within max size | Size under limit | +| `Summary_SizeEnforcement_OverLimit_Truncated` | Falls back to truncated output | Size enforcement | +| `Summary_EmptyTrace` | No requests produces minimal output | Region grouping | +| `Summary_RegionOrdering_Deterministic` | Regions sorted alphabetically | Region ordering | +| `Detailed_Mode_Unchanged` | Existing detailed output is byte-for-byte identical | Parameterless ToString | +| `ToString_Parameterless_AlwaysDetailed` | Parameterless always returns Detailed | Parameterless ToString | + +## Task 6: Integration Tests (Emulator) + +| Test | Description | Spec Requirement | +|------|-------------|------------------| +| `ReadItem_SummaryMode_ProducesValidJson` | Real read → summary JSON parses correctly | Summary JSON Format | +| `ReadItem_SummaryMode_SizeWithinLimit` | Summary output ≤ configured max bytes | Size under limit | +| `QueryItems_SummaryMode_MultipleRequests` | Query with continuations → summary compacts | Aggregated groups | +| `BulkOperations_SummaryMode_HighRetryCount` | Simulate throttling → verify compaction | Aggregated groups | +| `CrossRegion_SummaryMode_RegionGroups` | Multi-region → separate region summaries | Region grouping | + +## Task 7: Baseline / Golden-File Tests + +**Scope:** Create baseline JSON files for summary mode output (similar to existing `EndToEndTraceWriterBaselineTests`). Verify serialization stability across code changes. + +**Spec requirements:** Summary JSON Format, truncated output format + +## Task 8: Changelog & Documentation + +**Scope:** Update `changelog.md` with the new feature. Update `.github/copilot-instructions.md` if diagnostics verbosity affects AI assistant behavior. + +## Expected Size Reductions + +| Scenario | Detailed Size | Expected Summary Size | Reduction | +|----------|--------------|----------------------|-----------| +| 1 request, no retries | ~2 KB | ~1 KB | ~50% | +| 10 retries, same region | ~20 KB | ~2 KB | ~90% | +| 50 retries, 2 regions | ~100 KB | ~3 KB | ~97% | +| 100 retries, 3 regions | ~200 KB | ~4 KB | ~98% | From e9fc8fe349252341287c64e427ad282b21b19270 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 1 Apr 2026 09:46:06 -0700 Subject: [PATCH 7/9] Diagnostics: Addresses PR review feedback for diagnostics compaction - Remove redundant SetWalkingStateRecursively() call from Lazy lambda (blocking) - Use switch expression for DiagnosticsVerbosity in CosmosTraceDiagnostics - Add cycle guard (HashSet visited) to CollectRequestEntriesRecursive - Add ActivityId to summary JSON output from PointOperationStatisticsTraceDatum - Add TotalRequestCharge to truncated summary output (BuildTruncatedJson) - Document v1 tradeoff of full summary computation before size check - Add caching for Summary path in EncryptionCosmosDiagnostics.ToString(verbosity) - Replace env var string literals with ConfigurationManager constants in tests - Add HttpResponseStatistics (Gateway mode) tests: single request, sub-status code extraction, mixed Direct+Gateway - Update truncated baseline test to expect TotalRequestCharge field - Update design.md spec to reflect actual implementation files Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/EncryptionCosmosDiagnostics.cs | 60 ++++++---- .../src/Diagnostics/CosmosTraceDiagnostics.cs | 19 +-- .../Diagnostics/DiagnosticsSummaryWriter.cs | 83 +++++++++++-- .../CosmosClientOptionsUnitTests.cs | 37 +++--- .../DiagnosticsSummaryBaselineTests.cs | 1 + .../DiagnosticsSummaryWriterTests.cs | 111 ++++++++++++++++++ .../changes/diagnostics-compaction/design.md | 24 ++-- 7 files changed, 257 insertions(+), 78 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs index ef0d35fde5..4646f02dea 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs @@ -17,6 +17,9 @@ internal sealed class EncryptionCosmosDiagnostics : CosmosDiagnostics private readonly JObject encryptContent; private readonly JObject decryptContent; private readonly TimeSpan processingDuration; +#if SDKPROJECTREF + private string cachedSummaryDiagnostics; +#endif public EncryptionCosmosDiagnostics( CosmosDiagnostics coreDiagnostics, @@ -90,39 +93,48 @@ public override string ToString() #if SDKPROJECTREF public override string ToString(DiagnosticsVerbosity verbosity) { - if (verbosity == DiagnosticsVerbosity.Summary) + switch (verbosity) { - StringBuilder stringBuilder = new StringBuilder(); - StringWriter stringWriter = new StringWriter(stringBuilder); - - using (JsonWriter writer = new JsonTextWriter(stringWriter)) - { - writer.WriteStartObject(); - writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); - writer.WriteRawValue(this.coreDiagnostics.ToString(verbosity)); - writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); - writer.WriteStartObject(); - - if (this.encryptContent != null) + case DiagnosticsVerbosity.Summary: + if (this.cachedSummaryDiagnostics != null) { - writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); - writer.WriteRawValue(this.encryptContent.ToString()); + return this.cachedSummaryDiagnostics; } - if (this.decryptContent != null) + StringBuilder stringBuilder = new StringBuilder(); + StringWriter stringWriter = new StringWriter(stringBuilder); + + using (JsonWriter writer = new JsonTextWriter(stringWriter)) { - writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); - writer.WriteRawValue(this.decryptContent.ToString()); + writer.WriteStartObject(); + writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); + writer.WriteRawValue(this.coreDiagnostics.ToString(verbosity)); + writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); + writer.WriteStartObject(); + + if (this.encryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); + writer.WriteRawValue(this.encryptContent.ToString()); + } + + if (this.decryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); + writer.WriteRawValue(this.decryptContent.ToString()); + } + + writer.WriteEndObject(); + writer.WriteEndObject(); } - writer.WriteEndObject(); - writer.WriteEndObject(); - } + this.cachedSummaryDiagnostics = stringWriter.ToString(); + return this.cachedSummaryDiagnostics; - return stringWriter.ToString(); + case DiagnosticsVerbosity.Detailed: + default: + return this.ToString(); } - - return this.ToString(); } public override DateTime? GetStartTimeUtc() diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs index c8fdeab3c1..6b1f5d5380 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs @@ -41,14 +41,7 @@ internal CosmosTraceDiagnostics(ITrace trace, int maxDiagnosticsSummarySizeBytes this.Value = rootTrace; this.accumulatedMetrics = new Lazy(() => PopulateServerSideCumulativeMetrics(this.Value)); this.cachedSummaryJson = new Lazy(() => - { - if (this.Value is Tracing.Trace rootConcreteTrace2) - { - rootConcreteTrace2.SetWalkingStateRecursively(); - } - - return DiagnosticsSummaryWriter.WriteSummary(this.Value, maxDiagnosticsSummarySizeBytes); - }); + DiagnosticsSummaryWriter.WriteSummary(this.Value, maxDiagnosticsSummarySizeBytes)); } public ITrace Value { get; } @@ -65,12 +58,12 @@ public override string ToString() public override string ToString(DiagnosticsVerbosity verbosity) { - if (verbosity == DiagnosticsVerbosity.Summary) + return verbosity switch { - return this.cachedSummaryJson.Value; - } - - return this.ToString(); + DiagnosticsVerbosity.Summary => this.cachedSummaryJson.Value, + DiagnosticsVerbosity.Detailed => this.ToString(), + _ => this.ToString(), + }; } public override TimeSpan GetClientElapsedTime() diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs index d377e72518..3556b979f1 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs @@ -9,6 +9,8 @@ namespace Microsoft.Azure.Cosmos.Diagnostics using System.Globalization; using System.Linq; using System.Net; + using System.Net.Http; + using System.Runtime.CompilerServices; using System.Text; using Microsoft.Azure.Cosmos.Json; using Microsoft.Azure.Cosmos.Tracing; @@ -44,25 +46,41 @@ public static string WriteSummary( List entries = CollectRequestEntries(trace); - string summaryJson = BuildSummaryJson(trace, entries); + double totalRequestCharge = 0; + foreach (RequestEntry e in entries) + { + totalRequestCharge += e.RequestCharge; + } + + string summaryJson = BuildSummaryJson(trace, entries, totalRequestCharge); + // v1 tradeoff: the full summary JSON is always computed before checking the size limit. + // For pathological scenarios with hundreds of retries, this means allocating and then + // potentially discarding the full string. A future optimization could estimate output + // size before serialization or use a streaming approach that bails early. if (Encoding.UTF8.GetByteCount(summaryJson) <= maxSizeBytes) { return summaryJson; } - return BuildTruncatedJson(trace, entries.Count); + return BuildTruncatedJson(trace, entries.Count, totalRequestCharge); } private static List CollectRequestEntries(ITrace trace) { List entries = new List(); - CollectRequestEntriesRecursive(trace, entries); + HashSet visited = new HashSet(TraceReferenceEqualityComparer.Instance); + CollectRequestEntriesRecursive(trace, entries, visited); return entries; } - private static void CollectRequestEntriesRecursive(ITrace currentTrace, List entries) + private static void CollectRequestEntriesRecursive(ITrace currentTrace, List entries, HashSet visited) { + if (!visited.Add(currentTrace)) + { + return; + } + foreach (object datum in currentTrace.Data.Values) { if (datum is ClientSideRequestStatisticsTraceDatum clientSideStats) @@ -127,7 +145,7 @@ private static void CollectRequestEntriesRecursive(ITrace currentTrace, List entries) + private static string BuildSummaryJson(ITrace trace, List entries, double totalRequestCharge) { IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); writer.WriteObjectStart(); @@ -160,15 +178,16 @@ private static string BuildSummaryJson(ITrace trace, List entries) writer.WriteFieldName("DiagnosticsVerbosity"); writer.WriteStringValue("Summary"); - writer.WriteFieldName("TotalDurationMs"); - writer.WriteNumberValue(trace.Duration.TotalMilliseconds); - - double totalRequestCharge = 0; - foreach (RequestEntry e in entries) + string activityId = FindActivityId(trace); + if (activityId != null) { - totalRequestCharge += e.RequestCharge; + writer.WriteFieldName("ActivityId"); + writer.WriteStringValue(activityId); } + writer.WriteFieldName("TotalDurationMs"); + writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + writer.WriteFieldName("TotalRequestCharge"); writer.WriteNumberValue(totalRequestCharge); @@ -395,7 +414,7 @@ private static double ComputeP50(List sortedValues) return sortedValues[midIndex]; } - private static string BuildTruncatedJson(ITrace trace, int totalRequestCount) + private static string BuildTruncatedJson(ITrace trace, int totalRequestCount, double totalRequestCharge) { IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); writer.WriteObjectStart(); @@ -408,6 +427,9 @@ private static string BuildTruncatedJson(ITrace trace, int totalRequestCount) writer.WriteFieldName("TotalDurationMs"); writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalRequestCharge); + writer.WriteFieldName("TotalRequestCount"); writer.WriteNumberValue(totalRequestCount); @@ -424,6 +446,32 @@ private static string BuildTruncatedJson(ITrace trace, int totalRequestCount) return Encoding.UTF8.GetString(writer.GetResult().Span); } + /// + /// Finds the ActivityId from a PointOperationStatisticsTraceDatum in the trace tree. + /// + private static string FindActivityId(ITrace trace) + { + foreach (object datum in trace.Data.Values) + { + if (datum is PointOperationStatisticsTraceDatum pointOpStats + && !string.IsNullOrEmpty(pointOpStats.ActivityId)) + { + return pointOpStats.ActivityId; + } + } + + foreach (ITrace child in trace.Children) + { + string activityId = FindActivityId(child); + if (activityId != null) + { + return activityId; + } + } + + return null; + } + /// /// Internal representation of a single request entry collected from the trace tree. /// @@ -461,5 +509,14 @@ public RequestEntry( public string OperationType { get; } public string ResourceType { get; } } + + private sealed class TraceReferenceEqualityComparer : IEqualityComparer + { + public static readonly TraceReferenceEqualityComparer Instance = new TraceReferenceEqualityComparer(); + + public bool Equals(ITrace x, ITrace y) => ReferenceEquals(x, y); + + public int GetHashCode(ITrace obj) => RuntimeHelpers.GetHashCode(obj); + } } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index c0cf1cde26..bcbf8942cb 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -17,6 +17,7 @@ namespace Microsoft.Azure.Cosmos.Tests using System.Text; using global::Azure.Core; using Microsoft.Azure.Cosmos.Fluent; + using Microsoft.Azure.Cosmos.Util; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Client; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -1356,90 +1357,90 @@ public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsLargeValue [TestMethod] public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_Summary() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "Summary"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "Summary"); CosmosClientOptions options = new CosmosClientOptions(); Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); } } [TestMethod] public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_CaseInsensitive() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "summary"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "summary"); CosmosClientOptions options = new CosmosClientOptions(); Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); } } [TestMethod] public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_InvalidIgnored() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "InvalidValue"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "InvalidValue"); CosmosClientOptions options = new CosmosClientOptions(); Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); } } [TestMethod] public void CosmosClientOptions_EnvVar_MaxSummarySize() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", "16384"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, "16384"); CosmosClientOptions options = new CosmosClientOptions(); Assert.AreEqual(16384, options.MaxDiagnosticsSummarySizeBytes); } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); } } [TestMethod] public void CosmosClientOptions_EnvVar_MaxSummarySize_BelowMinimumIgnored() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", "1024"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, "1024"); CosmosClientOptions options = new CosmosClientOptions(); Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes, "Below-minimum env var should be ignored"); } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); } } [TestMethod] public void CosmosClientOptions_EnvVar_ExplicitPropertyOverridesEnvVar() { - string original = Environment.GetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"); + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); try { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", "Summary"); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "Summary"); CosmosClientOptions options = new CosmosClientOptions { DiagnosticsVerbosity = DiagnosticsVerbosity.Detailed @@ -1448,7 +1449,7 @@ public void CosmosClientOptions_EnvVar_ExplicitPropertyOverridesEnvVar() } finally { - Environment.SetEnvironmentVariable("AZURE_COSMOS_DIAGNOSTICS_VERBOSITY", original); + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs index 5df68bb771..2f90b2e183 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs @@ -179,6 +179,7 @@ public void Baseline_TruncatedSummary_SchemaFields() "DiagnosticsVerbosity", "TotalDurationMs", "TotalRequestCount", + "TotalRequestCharge", "Truncated", "Message" }; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs index 2abe3993d6..39f03b39c7 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs @@ -6,6 +6,9 @@ namespace Microsoft.Azure.Cosmos.Tests.Tracing { using System; using System.Collections.Generic; + using System.Net; + using System.Net.Http; + using System.Net.Http.Headers; using System.Reflection; using System.Linq; using Microsoft.Azure.Cosmos.Diagnostics; @@ -357,6 +360,7 @@ public void Summary_SizeEnforcement_OverLimit_Truncated() Assert.IsTrue(summaryObj["Truncated"].Value(), "Should be truncated"); Assert.IsNotNull(summaryObj["Message"]); Assert.AreEqual(200, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"], "Truncated summary should include TotalRequestCharge"); } [TestMethod] @@ -523,6 +527,113 @@ private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace tra return datum; } + private static void AddHttpResponseStatistic( + ITrace trace, + string region, + HttpStatusCode statusCode, + int subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + HttpResponseMessage responseMessage = new HttpResponseMessage(statusCode); + responseMessage.Headers.Add(WFConstants.BackendHeaders.SubStatus, subStatusCode.ToString()); + responseMessage.Headers.Add(HttpConstants.HttpHeaders.RequestCharge, requestCharge.ToString()); + responseMessage.Headers.Add(HttpConstants.HttpHeaders.ActivityId, Guid.NewGuid().ToString()); + + DateTime requestEndTime = requestStartTime.AddMilliseconds(durationMs); + + HttpResponseStatistics httpStats = new HttpResponseStatistics( + requestStartTime: requestStartTime, + requestEndTime: requestEndTime, + requestUri: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + httpMethod: HttpMethod.Get, + resourceType: ResourceType.Document, + responseMessage: responseMessage, + exception: null, + region: region); + + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("httpResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(httpStats); + } + + #endregion + + #region Gateway Mode (HttpResponseStatistics) Tests + + [TestMethod] + public void Summary_GatewayMode_SingleRequest() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddHttpResponseStatistic(trace, "West US 2", HttpStatusCode.OK, 0, 3.5, 15, baseTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(1, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(3.5, summaryObj["TotalRequestCharge"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + JObject region = (JObject)regions[0]; + Assert.AreEqual("West US 2", region["Region"].ToString()); + + JObject first = (JObject)region["First"]; + Assert.AreEqual(200, first["StatusCode"].Value()); + Assert.AreEqual(0, first["SubStatusCode"].Value()); + Assert.AreEqual(3.5, first["RequestCharge"].Value()); + } + + [TestMethod] + public void Summary_GatewayMode_SubStatusCodeExtraction() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddHttpResponseStatistic(trace, "East US", HttpStatusCode.TooManyRequests, 3200, 0, 5, baseTime); + AddHttpResponseStatistic(trace, "East US", HttpStatusCode.OK, 0, 5.0, 12, baseTime.AddSeconds(1)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + JObject first = (JObject)region["First"]; + Assert.AreEqual(429, first["StatusCode"].Value()); + Assert.AreEqual(3200, first["SubStatusCode"].Value()); + } + + [TestMethod] + public void Summary_MixedDirectAndGateway() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Direct mode request + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime); + + // Gateway mode request + AddHttpResponseStatistic(trace, "West US 2", HttpStatusCode.OK, 0, 3.0, 15, baseTime.AddMilliseconds(100)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Both Direct and Gateway entries should be collected + Assert.AreEqual(2, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"]); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count, "Both entries are in the same region"); + Assert.AreEqual(2, regions[0]["RequestCount"].Value()); + } + #endregion #region Edge Case Tests diff --git a/openspec/changes/diagnostics-compaction/design.md b/openspec/changes/diagnostics-compaction/design.md index d738f4045c..1024dd9b82 100644 --- a/openspec/changes/diagnostics-compaction/design.md +++ b/openspec/changes/diagnostics-compaction/design.md @@ -49,23 +49,27 @@ flowchart TD C --> M["Return full trace JSON"] ``` -## Files to Create +## Files Created | File | Description | |------|-------------| | `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs` | `DiagnosticsVerbosity` enum | | `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs` | Summary computation and JSON serialization logic | -## Files to Modify +## Files Modified | File | Change | |------|--------| | `CosmosClientOptions.cs` | Add `DiagnosticsVerbosity` and `MaxDiagnosticsSummarySizeBytes` properties with validation | +| `CosmosClientBuilder.cs` | Add `WithDiagnosticsVerbosity()` and `WithMaxDiagnosticsSummarySizeBytes()` builder methods | +| `ConfigurationManager.cs` | Add environment variable constants for diagnostics verbosity and max summary size | | `CosmosDiagnostics.cs` | Add `ToString(DiagnosticsVerbosity)` abstract overload | -| `CosmosTraceDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload; delegate to `DiagnosticsSummaryWriter` when verbosity is `Summary` | -| `TraceWriter.TraceJsonWriter.cs` | Add summary serialization path that delegates to `DiagnosticsSummaryWriter` when verbosity is `Summary` | -| `SummaryDiagnostics.cs` | Extend `CollectSummaryFromTraceTree()` to support region-grouped collection with ordering | -| `ClientSideRequestStatisticsTraceDatum.cs` | Ensure `StoreResponseStatistics` and `HttpResponseStatistics` lists are accessible for summary computation | +| `CosmosTraceDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload with `Lazy` caching; delegate to `DiagnosticsSummaryWriter` when verbosity is `Summary` | +| `EncryptionCosmosDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload (`SDKPROJECTREF`-gated) with caching | +| `ContainerCore.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `ReadManyQueryHelper.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `CosmosLinqQuery.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `ChangeFeedEstimatorIterator.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | ## Contract/Baseline Updates @@ -99,7 +103,7 @@ Include a `TransportType` field (`"Direct"` / `"Gateway"`) in each aggregated gr ## Key References - `Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs` — concrete diagnostics implementation -- `Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs` — current trace serialization -- `Microsoft.Azure.Cosmos/src/Diagnostics/SummaryDiagnostics.cs` — existing summary aggregation (foundation) -- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/ClientSideRequestStatisticsTraceDatum.cs` — stats data -- `docs/SdkDesign.md` — SDK architecture overview +- `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs` — summary computation and JSON serialization +- `Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs` — current trace serialization (detailed mode) +- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/ClientSideRequestStatisticsTraceDatum.cs` — stats data (`StoreResponseStatistics`, `HttpResponseStatistics`) +- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/PointOperationStatisticsTraceDatum.cs` — ActivityId source From 5c1180c6e796ac5ff183a26c7e495ef2c273e727 Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:54:31 -0700 Subject: [PATCH 8/9] Diagnostics: Fixes review feedback for Summary mode - Fix thread safety in EncryptionCosmosDiagnostics by replacing manual check-and-set cache with Lazy - Add upper bound (10 MB) on MaxDiagnosticsSummarySizeBytes with ArgumentOutOfRangeException validation - Add SummaryFormatVersion field (value: 1) to Summary JSON output in both full and truncated formats - Add DefaultTrace.TraceWarning for invalid env var values - Document v1 limitation that standard CRUD paths use default MaxDiagnosticsSummarySizeBytes - Update baseline tests for new SummaryFormatVersion field - Add tests for upper bound validation and env var above-max Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/EncryptionCosmosDiagnostics.cs | 81 +++++++++---------- .../src/CosmosClientOptions.cs | 43 ++++++++-- .../Diagnostics/DiagnosticsSummaryWriter.cs | 6 ++ .../CosmosClientOptionsUnitTests.cs | 38 +++++++++ .../DiagnosticsSummaryBaselineTests.cs | 4 + 5 files changed, 124 insertions(+), 48 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs index 4646f02dea..289b2441aa 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs @@ -18,7 +18,7 @@ internal sealed class EncryptionCosmosDiagnostics : CosmosDiagnostics private readonly JObject decryptContent; private readonly TimeSpan processingDuration; #if SDKPROJECTREF - private string cachedSummaryDiagnostics; + private readonly Lazy cachedSummaryDiagnostics; #endif public EncryptionCosmosDiagnostics( @@ -39,6 +39,10 @@ public EncryptionCosmosDiagnostics( } this.processingDuration = processingDuration; + +#if SDKPROJECTREF + this.cachedSummaryDiagnostics = new Lazy(() => this.BuildSummaryDiagnostics()); +#endif } public override IReadOnlyList<(string regionName, Uri uri)> GetContactedRegions() @@ -93,48 +97,43 @@ public override string ToString() #if SDKPROJECTREF public override string ToString(DiagnosticsVerbosity verbosity) { - switch (verbosity) + return verbosity switch + { + DiagnosticsVerbosity.Summary => this.cachedSummaryDiagnostics.Value, + _ => this.ToString(), + }; + } + + private string BuildSummaryDiagnostics() + { + StringBuilder stringBuilder = new StringBuilder(); + StringWriter stringWriter = new StringWriter(stringBuilder); + + using (JsonWriter writer = new JsonTextWriter(stringWriter)) { - case DiagnosticsVerbosity.Summary: - if (this.cachedSummaryDiagnostics != null) - { - return this.cachedSummaryDiagnostics; - } - - StringBuilder stringBuilder = new StringBuilder(); - StringWriter stringWriter = new StringWriter(stringBuilder); - - using (JsonWriter writer = new JsonTextWriter(stringWriter)) - { - writer.WriteStartObject(); - writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); - writer.WriteRawValue(this.coreDiagnostics.ToString(verbosity)); - writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); - writer.WriteStartObject(); - - if (this.encryptContent != null) - { - writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); - writer.WriteRawValue(this.encryptContent.ToString()); - } - - if (this.decryptContent != null) - { - writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); - writer.WriteRawValue(this.decryptContent.ToString()); - } - - writer.WriteEndObject(); - writer.WriteEndObject(); - } - - this.cachedSummaryDiagnostics = stringWriter.ToString(); - return this.cachedSummaryDiagnostics; - - case DiagnosticsVerbosity.Detailed: - default: - return this.ToString(); + writer.WriteStartObject(); + writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); + writer.WriteRawValue(this.coreDiagnostics.ToString(DiagnosticsVerbosity.Summary)); + writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); + writer.WriteStartObject(); + + if (this.encryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); + writer.WriteRawValue(this.encryptContent.ToString()); + } + + if (this.decryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); + writer.WriteRawValue(this.decryptContent.ToString()); + } + + writer.WriteEndObject(); + writer.WriteEndObject(); } + + return stringWriter.ToString(); } public override DateTime? GetStartTimeUtc() diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index d9654d16b2..2b039947bb 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -13,8 +13,9 @@ namespace Microsoft.Azure.Cosmos using System.Net.Http; using System.Net.Security; using System.Security.Cryptography.X509Certificates; - using Microsoft.Azure.Cosmos.FaultInjection; - using Microsoft.Azure.Cosmos.Fluent; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.FaultInjection; + using Microsoft.Azure.Cosmos.Fluent; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Client; using Newtonsoft.Json; @@ -59,7 +60,12 @@ public class CosmosClientOptions /// /// Default maximum size in bytes for Summary mode diagnostic output. /// - internal const int DefaultMaxDiagnosticsSummarySizeBytes = 8192; + internal const int DefaultMaxDiagnosticsSummarySizeBytes = 8192; + + /// + /// Maximum allowed value for (10 MB). + /// + internal const int MaxAllowedDiagnosticsSummarySizeBytes = 10_485_760; /// /// Default request timeout @@ -104,14 +110,31 @@ public CosmosClientOptions() { this.DiagnosticsVerbosity = parsedVerbosity; } + else if (!string.IsNullOrEmpty(envVerbosity)) + { + DefaultTrace.TraceWarning( + "Invalid {0} value: '{1}'. Expected 'Detailed' or 'Summary'. Using default (Detailed).", + ConfigurationManager.DiagnosticsVerbosityVariable, + envVerbosity); + } string envMaxSize = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); if (!string.IsNullOrEmpty(envMaxSize) && int.TryParse(envMaxSize, out int parsedMaxSize) - && parsedMaxSize >= 4096) + && parsedMaxSize >= 4096 + && parsedMaxSize <= MaxAllowedDiagnosticsSummarySizeBytes) { this.maxDiagnosticsSummarySizeBytes = parsedMaxSize; } + else if (!string.IsNullOrEmpty(envMaxSize)) + { + DefaultTrace.TraceWarning( + "Invalid {0} value: '{1}'. Must be an integer between 4096 and {2}. Using default ({3}).", + ConfigurationManager.DiagnosticsMaxSummarySizeVariable, + envMaxSize, + MaxAllowedDiagnosticsSummarySizeBytes, + DefaultMaxDiagnosticsSummarySizeBytes); + } } /// @@ -434,23 +457,29 @@ public ConnectionMode ConnectionMode /// /// Gets or sets the maximum size in bytes for Summary mode diagnostic output. /// If the summary output exceeds this limit, a truncated indicator is returned. - /// Default: 8192 (8 KB). Minimum: 4096 (4 KB). + /// Default: 8192 (8 KB). Minimum: 4096 (4 KB). Maximum: 10 MB. /// /// /// This property is only relevant when is set to /// . + /// + /// Note: In v1, this custom value is respected by ChangeFeed estimator, LINQ, + /// ReadMany, and throughput operations. Standard CRUD operations (ReadItem, + /// CreateItem, etc.) use the default value. This is a known limitation that + /// may be addressed in a future release. + /// /// public int MaxDiagnosticsSummarySizeBytes { get => this.maxDiagnosticsSummarySizeBytes; set { - if (value < 4096) + if (value < 4096 || value > MaxAllowedDiagnosticsSummarySizeBytes) { throw new ArgumentOutOfRangeException( nameof(this.MaxDiagnosticsSummarySizeBytes), value, - $"{nameof(this.MaxDiagnosticsSummarySizeBytes)} must be at least 4096 bytes."); + $"{nameof(this.MaxDiagnosticsSummarySizeBytes)} must be between 4096 and {MaxAllowedDiagnosticsSummarySizeBytes} bytes."); } this.maxDiagnosticsSummarySizeBytes = value; diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs index 3556b979f1..91f8d546f6 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs @@ -178,6 +178,9 @@ private static string BuildSummaryJson(ITrace trace, List entries, writer.WriteFieldName("DiagnosticsVerbosity"); writer.WriteStringValue("Summary"); + writer.WriteFieldName("SummaryFormatVersion"); + writer.WriteNumberValue(1); + string activityId = FindActivityId(trace); if (activityId != null) { @@ -424,6 +427,9 @@ private static string BuildTruncatedJson(ITrace trace, int totalRequestCount, do writer.WriteFieldName("DiagnosticsVerbosity"); writer.WriteStringValue("Summary"); + writer.WriteFieldName("SummaryFormatVersion"); + writer.WriteNumberValue(1); + writer.WriteFieldName("TotalDurationMs"); writer.WriteNumberValue(trace.Duration.TotalMilliseconds); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index bcbf8942cb..a0ca1163f7 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -1480,5 +1480,43 @@ public void CosmosClientBuilder_WithMaxDiagnosticsSummarySizeBytes_TooSmallThrow CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); builder.WithMaxDiagnosticsSummarySizeBytes(2048); } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_ThrowsAboveMaximum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + 1 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsMaximum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + }; + Assert.AreEqual(CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize_AboveMaximumIgnored() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + try + { + Environment.SetEnvironmentVariable( + ConfigurationManager.DiagnosticsMaxSummarySizeVariable, + (CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + 1).ToString()); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes, "Above-maximum env var should be ignored"); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); + } + } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs index 2f90b2e183..af9c662e3c 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs @@ -43,6 +43,7 @@ public void Baseline_SingleRequest_SchemaFields() HashSet expectedTopLevelFields = new HashSet { "DiagnosticsVerbosity", + "SummaryFormatVersion", "TotalDurationMs", "TotalRequestCharge", "TotalRequestCount", @@ -177,6 +178,7 @@ public void Baseline_TruncatedSummary_SchemaFields() HashSet expectedFields = new HashSet { "DiagnosticsVerbosity", + "SummaryFormatVersion", "TotalDurationMs", "TotalRequestCount", "TotalRequestCharge", @@ -243,6 +245,8 @@ public void Baseline_FieldTypes_Consistent() // Top-level types Assert.AreEqual(JTokenType.String, summaryObj["DiagnosticsVerbosity"].Type); + Assert.AreEqual(JTokenType.Integer, summaryObj["SummaryFormatVersion"].Type); + Assert.AreEqual(1, summaryObj["SummaryFormatVersion"].Value()); Assert.IsTrue(summaryObj["TotalDurationMs"].Type == JTokenType.Float || summaryObj["TotalDurationMs"].Type == JTokenType.Integer, "TotalDurationMs should be numeric"); From b8fba340f98e8d6998904a3ede362b5874b618da Mon Sep 17 00:00:00 2001 From: Nalu Tripician <27316859+NaluTripician@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:57:55 -0700 Subject: [PATCH 9/9] Encryption: Fixes SA1202 member ordering in EncryptionCosmosDiagnostics Move public override methods (GetStartTimeUtc, GetFailedRequestCount) before private BuildSummaryDiagnostics to satisfy StyleCop SA1202 rule requiring public members before private members. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/EncryptionCosmosDiagnostics.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs index 289b2441aa..fb4adcf84a 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs @@ -104,6 +104,16 @@ public override string ToString(DiagnosticsVerbosity verbosity) }; } + public override DateTime? GetStartTimeUtc() + { + return this.coreDiagnostics.GetStartTimeUtc(); + } + + public override int GetFailedRequestCount() + { + return this.coreDiagnostics.GetFailedRequestCount(); + } + private string BuildSummaryDiagnostics() { StringBuilder stringBuilder = new StringBuilder(); @@ -135,16 +145,6 @@ private string BuildSummaryDiagnostics() return stringWriter.ToString(); } - - public override DateTime? GetStartTimeUtc() - { - return this.coreDiagnostics.GetStartTimeUtc(); - } - - public override int GetFailedRequestCount() - { - return this.coreDiagnostics.GetFailedRequestCount(); - } #endif }