diff --git a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs index bc0f536c6d..fb4adcf84a 100644 --- a/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos.Encryption/src/EncryptionCosmosDiagnostics.cs @@ -17,6 +17,9 @@ internal sealed class EncryptionCosmosDiagnostics : CosmosDiagnostics private readonly JObject encryptContent; private readonly JObject decryptContent; private readonly TimeSpan processingDuration; +#if SDKPROJECTREF + private readonly Lazy cachedSummaryDiagnostics; +#endif public EncryptionCosmosDiagnostics( CosmosDiagnostics coreDiagnostics, @@ -36,6 +39,10 @@ public EncryptionCosmosDiagnostics( } this.processingDuration = processingDuration; + +#if SDKPROJECTREF + this.cachedSummaryDiagnostics = new Lazy(() => this.BuildSummaryDiagnostics()); +#endif } public override IReadOnlyList<(string regionName, Uri uri)> GetContactedRegions() @@ -88,6 +95,15 @@ public override string ToString() } #if SDKPROJECTREF + public override string ToString(DiagnosticsVerbosity verbosity) + { + return verbosity switch + { + DiagnosticsVerbosity.Summary => this.cachedSummaryDiagnostics.Value, + _ => this.ToString(), + }; + } + public override DateTime? GetStartTimeUtc() { return this.coreDiagnostics.GetStartTimeUtc(); @@ -97,6 +113,38 @@ public override int GetFailedRequestCount() { return this.coreDiagnostics.GetFailedRequestCount(); } + + private string BuildSummaryDiagnostics() + { + StringBuilder stringBuilder = new StringBuilder(); + StringWriter stringWriter = new StringWriter(stringBuilder); + + using (JsonWriter writer = new JsonTextWriter(stringWriter)) + { + writer.WriteStartObject(); + writer.WritePropertyName(Constants.DiagnosticsCoreDiagnostics); + writer.WriteRawValue(this.coreDiagnostics.ToString(DiagnosticsVerbosity.Summary)); + writer.WritePropertyName(Constants.DiagnosticsEncryptionDiagnostics); + writer.WriteStartObject(); + + if (this.encryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsEncryptOperation); + writer.WriteRawValue(this.encryptContent.ToString()); + } + + if (this.decryptContent != null) + { + writer.WritePropertyName(Constants.DiagnosticsDecryptOperation); + writer.WriteRawValue(this.decryptContent.ToString()); + } + + writer.WriteEndObject(); + writer.WriteEndObject(); + } + + return stringWriter.ToString(); + } #endif } diff --git a/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs b/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs index 969a921a5e..ca2e49d875 100644 --- a/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs +++ b/Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/ChangeFeedEstimatorIterator.cs @@ -174,7 +174,7 @@ private async Task> ReadNextInternalAsync { // Lease store is empty this.hasMoreResults = false; - return new ChangeFeedEstimatorEmptyFeedResponse(trace); + return new ChangeFeedEstimatorEmptyFeedResponse(trace, this.monitoredContainer.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes); } IEnumerable leasesForCurrentPage = this.lazyLeaseDocuments @@ -202,7 +202,7 @@ private async Task> ReadNextInternalAsync this.hasMoreResults = ++this.currentPage != this.maxPage; - return new ChangeFeedEstimatorFeedResponse(trace, estimations.AsReadOnly(), totalRUCost); + return new ChangeFeedEstimatorFeedResponse(trace, estimations.AsReadOnly(), totalRUCost, this.monitoredContainer.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes); } /// @@ -368,14 +368,17 @@ private sealed class ChangeFeedEstimatorFeedResponse : FeedResponse remainingLeaseWorks; private readonly Headers headers; + private readonly int maxDiagnosticsSummarySizeBytes; public ChangeFeedEstimatorFeedResponse( ITrace trace, ReadOnlyCollection remainingLeaseWorks, - double ruCost) + double ruCost, + int maxDiagnosticsSummarySizeBytes) { this.Trace = trace ?? throw new ArgumentNullException(nameof(trace)); this.remainingLeaseWorks = remainingLeaseWorks ?? throw new ArgumentNullException(nameof(remainingLeaseWorks)); + this.maxDiagnosticsSummarySizeBytes = maxDiagnosticsSummarySizeBytes; this.headers = new Headers { RequestCharge = ruCost @@ -394,7 +397,7 @@ public ChangeFeedEstimatorFeedResponse( public override HttpStatusCode StatusCode => HttpStatusCode.OK; - public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace); + public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace, this.maxDiagnosticsSummarySizeBytes); public override string IndexMetrics => null; @@ -412,10 +415,12 @@ private sealed class ChangeFeedEstimatorEmptyFeedResponse : FeedResponse remainingLeaseWorks = Enumerable.Empty(); private readonly Headers headers; + private readonly int maxDiagnosticsSummarySizeBytes; - public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace) + public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace, int maxDiagnosticsSummarySizeBytes) { this.Trace = trace ?? throw new ArgumentNullException(nameof(trace)); + this.maxDiagnosticsSummarySizeBytes = maxDiagnosticsSummarySizeBytes; this.headers = new Headers(); } @@ -431,7 +436,7 @@ public ChangeFeedEstimatorEmptyFeedResponse(ITrace trace) public override HttpStatusCode StatusCode => HttpStatusCode.OK; - public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace); + public override CosmosDiagnostics Diagnostics => new CosmosTraceDiagnostics(this.Trace, this.maxDiagnosticsSummarySizeBytes); public override string IndexMetrics => null; diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index 9e3b8dfc00..9982339542 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -13,8 +13,9 @@ namespace Microsoft.Azure.Cosmos using System.Net.Http; using System.Net.Security; using System.Security.Cryptography.X509Certificates; - using Microsoft.Azure.Cosmos.FaultInjection; - using Microsoft.Azure.Cosmos.Fluent; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.FaultInjection; + using Microsoft.Azure.Cosmos.Fluent; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Client; using Newtonsoft.Json; @@ -54,7 +55,17 @@ public class CosmosClientOptions private const string ConnectionStringAccountKey = "AccountKey"; private const string ConnectionStringDisableServerCertificateValidation = "DisableServerCertificateValidation"; - private const ApiType DefaultApiType = ApiType.None; + private const ApiType DefaultApiType = ApiType.None; + + /// + /// Default maximum size in bytes for Summary mode diagnostic output. + /// + internal const int DefaultMaxDiagnosticsSummarySizeBytes = 8192; + + /// + /// Maximum allowed value for (10 MB). + /// + internal const int MaxAllowedDiagnosticsSummarySizeBytes = 10_485_760; /// /// Default request timeout @@ -74,8 +85,9 @@ public class CosmosClientOptions private IWebProxy webProxy; private Func httpClientFactory; private string applicationName; - private IFaultInjector faultInjector; - private bool isCustomSerializerProvided; + private IFaultInjector faultInjector; + private bool isCustomSerializerProvided; + private int maxDiagnosticsSummarySizeBytes = DefaultMaxDiagnosticsSummarySizeBytes; /// /// Creates a new CosmosClientOptions @@ -89,8 +101,40 @@ public CosmosClientOptions() this.ConnectionProtocol = CosmosClientOptions.DefaultProtocol; this.ApiType = CosmosClientOptions.DefaultApiType; this.CustomHandlers = new Collection(); - this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); - this.SessionRetryOptions = new SessionRetryOptions(); + this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); + this.SessionRetryOptions = new SessionRetryOptions(); + + string envVerbosity = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + if (!string.IsNullOrEmpty(envVerbosity) + && Enum.TryParse(envVerbosity, ignoreCase: true, out DiagnosticsVerbosity parsedVerbosity)) + { + this.DiagnosticsVerbosity = parsedVerbosity; + } + else if (!string.IsNullOrEmpty(envVerbosity)) + { + DefaultTrace.TraceWarning( + "Invalid {0} value: '{1}'. Expected 'Detailed' or 'Summary'. Using default (Detailed).", + ConfigurationManager.DiagnosticsVerbosityVariable, + envVerbosity); + } + + string envMaxSize = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + if (!string.IsNullOrEmpty(envMaxSize) + && int.TryParse(envMaxSize, out int parsedMaxSize) + && parsedMaxSize >= 4096 + && parsedMaxSize <= MaxAllowedDiagnosticsSummarySizeBytes) + { + this.maxDiagnosticsSummarySizeBytes = parsedMaxSize; + } + else if (!string.IsNullOrEmpty(envMaxSize)) + { + DefaultTrace.TraceWarning( + "Invalid {0} value: '{1}'. Must be an integer between 4096 and {2}. Using default ({3}).", + ConfigurationManager.DiagnosticsMaxSummarySizeVariable, + envMaxSize, + MaxAllowedDiagnosticsSummarySizeBytes, + DefaultMaxDiagnosticsSummarySizeBytes); + } } /// @@ -413,7 +457,63 @@ public ConnectionMode ConnectionMode /// If is set to true in CosmosClientOptions, priority level set on the CosmosClient is used. /// /// - public PriorityLevel? PriorityLevel { get; set; } + public PriorityLevel? PriorityLevel { get; set; } + + /// + /// Gets or sets the preferred verbosity for serialization. + /// Default: . + /// + /// + /// + /// This property stores the preferred verbosity level. To obtain diagnostics at + /// the configured verbosity, pass it to + /// : + /// response.Diagnostics.ToString(client.ClientOptions.DiagnosticsVerbosity). + /// + /// + /// When is used, + /// the diagnostics output is compacted by grouping requests by region and deduplicating + /// retries with aggregate statistics (count, total RU, min/max/P50/avg latency). + /// + /// + /// The parameterless always returns + /// output for + /// backward compatibility. + /// + /// + public DiagnosticsVerbosity DiagnosticsVerbosity { get; set; } = DiagnosticsVerbosity.Detailed; + + /// + /// Gets or sets the maximum size in bytes for Summary mode diagnostic output. + /// If the summary output exceeds this limit, a truncated indicator is returned. + /// Default: 8192 (8 KB). Minimum: 4096 (4 KB). Maximum: 10 MB. + /// + /// + /// This property is only relevant when is set to + /// . + /// + /// Note: In v1, this custom value is respected by ChangeFeed estimator, LINQ, + /// ReadMany, and throughput operations. Standard CRUD operations (ReadItem, + /// CreateItem, etc.) use the default value. This is a known limitation that + /// may be addressed in a future release. + /// + /// + public int MaxDiagnosticsSummarySizeBytes + { + get => this.maxDiagnosticsSummarySizeBytes; + set + { + if (value < 4096 || value > MaxAllowedDiagnosticsSummarySizeBytes) + { + throw new ArgumentOutOfRangeException( + nameof(this.MaxDiagnosticsSummarySizeBytes), + value, + $"{nameof(this.MaxDiagnosticsSummarySizeBytes)} must be between 4096 and {MaxAllowedDiagnosticsSummarySizeBytes} bytes."); + } + + this.maxDiagnosticsSummarySizeBytes = value; + } + } /// /// Gets or sets the maximum number of retries in the case where the request fails diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs index b88eaa8efe..318103a59b 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosDiagnostics.cs @@ -61,7 +61,7 @@ public virtual ServerSideCumulativeMetrics GetQueryMetrics() /// /// The string field instance in the Azure Cosmos DB database service. /// - /// implements lazy materialization and is only materialized when is called. + /// implements lazy materialization and is only materialized when is called. /// /// /// Do not eagerly materialize the diagnostics until the moment of consumption to avoid unnecessary allocations, let the ToString be called only when needed. @@ -89,6 +89,16 @@ public virtual ServerSideCumulativeMetrics GetQueryMetrics() /// public abstract override string ToString(); + /// + /// Returns the string representation of diagnostics using the specified verbosity. + /// When is , + /// produces a compacted region-grouped summary. When , + /// produces the full trace output (same as parameterless ). + /// + /// The verbosity level to use for serialization. + /// A JSON string with diagnostics at the requested verbosity level. + public abstract string ToString(DiagnosticsVerbosity verbosity); + /// /// Gets the list of all regions that were contacted for a request /// diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs index 5eae8492ea..6b1f5d5380 100644 --- a/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs @@ -17,8 +17,14 @@ namespace Microsoft.Azure.Cosmos.Diagnostics internal sealed class CosmosTraceDiagnostics : CosmosDiagnostics { private readonly Lazy accumulatedMetrics; + private readonly Lazy cachedSummaryJson; public CosmosTraceDiagnostics(ITrace trace) + : this(trace, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes) + { + } + + internal CosmosTraceDiagnostics(ITrace trace, int maxDiagnosticsSummarySizeBytes) { if (trace == null) { @@ -34,6 +40,8 @@ public CosmosTraceDiagnostics(ITrace trace) this.Value = rootTrace; this.accumulatedMetrics = new Lazy(() => PopulateServerSideCumulativeMetrics(this.Value)); + this.cachedSummaryJson = new Lazy(() => + DiagnosticsSummaryWriter.WriteSummary(this.Value, maxDiagnosticsSummarySizeBytes)); } public ITrace Value { get; } @@ -48,6 +56,16 @@ public override string ToString() return this.ToJsonString(); } + public override string ToString(DiagnosticsVerbosity verbosity) + { + return verbosity switch + { + DiagnosticsVerbosity.Summary => this.cachedSummaryJson.Value, + DiagnosticsVerbosity.Detailed => this.ToString(), + _ => this.ToString(), + }; + } + public override TimeSpan GetClientElapsedTime() { return this.Value.Duration; diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs new file mode 100644 index 0000000000..91f8d546f6 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs @@ -0,0 +1,528 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Diagnostics +{ + using System; + using System.Collections.Generic; + using System.Globalization; + using System.Linq; + using System.Net; + using System.Net.Http; + using System.Runtime.CompilerServices; + using System.Text; + using Microsoft.Azure.Cosmos.Json; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + + /// + /// Produces compacted summary JSON from an ITrace tree. + /// Groups requests by region, keeps first/last in full detail, + /// and aggregates middle entries by (StatusCode, SubStatusCode). + /// + internal static class DiagnosticsSummaryWriter + { + private const string UnknownRegion = "Unknown"; + + /// + /// Produces the summary JSON string for the given trace. + /// If the output exceeds maxSizeBytes, returns a truncated indicator. + /// + public static string WriteSummary( + ITrace trace, + int maxSizeBytes) + { + if (trace == null) + { + throw new ArgumentNullException(nameof(trace)); + } + + if (trace is Tracing.Trace concreteTrace) + { + concreteTrace.SetWalkingStateRecursively(); + } + + List entries = CollectRequestEntries(trace); + + double totalRequestCharge = 0; + foreach (RequestEntry e in entries) + { + totalRequestCharge += e.RequestCharge; + } + + string summaryJson = BuildSummaryJson(trace, entries, totalRequestCharge); + + // v1 tradeoff: the full summary JSON is always computed before checking the size limit. + // For pathological scenarios with hundreds of retries, this means allocating and then + // potentially discarding the full string. A future optimization could estimate output + // size before serialization or use a streaming approach that bails early. + if (Encoding.UTF8.GetByteCount(summaryJson) <= maxSizeBytes) + { + return summaryJson; + } + + return BuildTruncatedJson(trace, entries.Count, totalRequestCharge); + } + + private static List CollectRequestEntries(ITrace trace) + { + List entries = new List(); + HashSet visited = new HashSet(TraceReferenceEqualityComparer.Instance); + CollectRequestEntriesRecursive(trace, entries, visited); + return entries; + } + + private static void CollectRequestEntriesRecursive(ITrace currentTrace, List entries, HashSet visited) + { + if (!visited.Add(currentTrace)) + { + return; + } + + foreach (object datum in currentTrace.Data.Values) + { + if (datum is ClientSideRequestStatisticsTraceDatum clientSideStats) + { + foreach (ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics storeStat + in clientSideStats.StoreResponseStatisticsList) + { + if (storeStat.IsSupplementalResponse) + { + continue; + } + + entries.Add(new RequestEntry( + region: storeStat.Region ?? UnknownRegion, + statusCode: (int)storeStat.StoreResult.StatusCode, + subStatusCode: (int)storeStat.StoreResult.SubStatusCode, + requestCharge: storeStat.StoreResult.RequestCharge, + durationMs: storeStat.RequestLatency.TotalMilliseconds, + requestStartTimeUtc: storeStat.RequestStartTime, + endpoint: storeStat.LocationEndpoint?.ToString(), + operationType: storeStat.RequestOperationType.ToString(), + resourceType: storeStat.RequestResourceType.ToString())); + } + + foreach (ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStat + in clientSideStats.HttpResponseStatisticsList) + { + int statusCode = 0; + int subStatusCode = 0; + double requestCharge = 0; + + if (httpStat.HttpResponseMessage != null) + { + statusCode = (int)httpStat.HttpResponseMessage.StatusCode; + subStatusCode = GetHttpSubStatusCode(httpStat); + + if (httpStat.HttpResponseMessage.Headers.TryGetValues( + HttpConstants.HttpHeaders.RequestCharge, + out IEnumerable chargeValues)) + { + string chargeStr = chargeValues.FirstOrDefault(); + if (chargeStr != null) + { + double.TryParse(chargeStr, NumberStyles.Float, CultureInfo.InvariantCulture, out requestCharge); + } + } + } + + entries.Add(new RequestEntry( + region: httpStat.Region ?? UnknownRegion, + statusCode: statusCode, + subStatusCode: subStatusCode, + requestCharge: requestCharge, + durationMs: httpStat.Duration.TotalMilliseconds, + requestStartTimeUtc: httpStat.RequestStartTime, + endpoint: httpStat.RequestUri?.Host, + operationType: httpStat.HttpMethod?.ToString(), + resourceType: httpStat.ResourceType.ToString())); + } + } + } + + foreach (ITrace childTrace in currentTrace.Children) + { + CollectRequestEntriesRecursive(childTrace, entries, visited); + } + } + + private static int GetHttpSubStatusCode( + ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStat) + { + if (httpStat.HttpResponseMessage?.Headers != null + && httpStat.HttpResponseMessage.Headers.TryGetValues( + WFConstants.BackendHeaders.SubStatus, + out IEnumerable values)) + { + string first = values.FirstOrDefault(); + if (first != null + && int.TryParse(first, NumberStyles.Integer, CultureInfo.InvariantCulture, out int sub)) + { + return sub; + } + } + + return 0; + } + + private static string BuildSummaryJson(ITrace trace, List entries, double totalRequestCharge) + { + IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); + writer.WriteObjectStart(); + writer.WriteFieldName("Summary"); + writer.WriteObjectStart(); + + writer.WriteFieldName("DiagnosticsVerbosity"); + writer.WriteStringValue("Summary"); + + writer.WriteFieldName("SummaryFormatVersion"); + writer.WriteNumberValue(1); + + string activityId = FindActivityId(trace); + if (activityId != null) + { + writer.WriteFieldName("ActivityId"); + writer.WriteStringValue(activityId); + } + + writer.WriteFieldName("TotalDurationMs"); + writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalRequestCharge); + + writer.WriteFieldName("TotalRequestCount"); + writer.WriteNumberValue(entries.Count); + + // Group by region, preserving chronological order within each group + Dictionary> regionGroups = new Dictionary>(); + List regionOrder = new List(); + + foreach (RequestEntry entry in entries.OrderBy(e => e.RequestStartTimeUtc ?? DateTime.MinValue)) + { + if (!regionGroups.TryGetValue(entry.Region, out List group)) + { + group = new List(); + regionGroups[entry.Region] = group; + regionOrder.Add(entry.Region); + } + + group.Add(entry); + } + + writer.WriteFieldName("RegionsSummary"); + writer.WriteArrayStart(); + + foreach (string region in regionOrder) + { + List regionEntries = regionGroups[region]; + WriteRegionSummary(writer, region, regionEntries); + } + + writer.WriteArrayEnd(); + + writer.WriteObjectEnd(); // Summary + writer.WriteObjectEnd(); // root + + return Encoding.UTF8.GetString(writer.GetResult().Span); + } + + private static void WriteRegionSummary( + IJsonWriter writer, + string region, + List entries) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("Region"); + writer.WriteStringValue(region); + + double regionRequestCharge = 0; + foreach (RequestEntry e in entries) + { + regionRequestCharge += e.RequestCharge; + } + + writer.WriteFieldName("RequestCount"); + writer.WriteNumberValue(entries.Count); + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(regionRequestCharge); + + // First entry (always present) + writer.WriteFieldName("First"); + WriteRequestEntryDetail(writer, entries[0]); + + // Last entry (only if more than 1) + if (entries.Count > 1) + { + writer.WriteFieldName("Last"); + WriteRequestEntryDetail(writer, entries[entries.Count - 1]); + } + + // Aggregated groups for middle entries (all except first and last) + if (entries.Count > 2) + { + List middleEntries = entries.GetRange(1, entries.Count - 2); + + // Group by (StatusCode, SubStatusCode) + Dictionary<(int, int), List> statusGroups = + new Dictionary<(int, int), List>(); + + foreach (RequestEntry entry in middleEntries) + { + (int, int) key = (entry.StatusCode, entry.SubStatusCode); + if (!statusGroups.TryGetValue(key, out List group)) + { + group = new List(); + statusGroups[key] = group; + } + + group.Add(entry); + } + + writer.WriteFieldName("AggregatedGroups"); + writer.WriteArrayStart(); + + foreach (KeyValuePair<(int, int), List> kvp in statusGroups) + { + WriteAggregatedGroup(writer, kvp.Key.Item1, kvp.Key.Item2, kvp.Value); + } + + writer.WriteArrayEnd(); + } + + writer.WriteObjectEnd(); + } + + private static void WriteRequestEntryDetail(IJsonWriter writer, RequestEntry entry) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("StatusCode"); + writer.WriteNumberValue(entry.StatusCode); + + writer.WriteFieldName("SubStatusCode"); + writer.WriteNumberValue(entry.SubStatusCode); + + writer.WriteFieldName("RequestCharge"); + writer.WriteNumberValue(entry.RequestCharge); + + writer.WriteFieldName("DurationMs"); + writer.WriteNumberValue(entry.DurationMs); + + writer.WriteFieldName("Region"); + writer.WriteStringValue(entry.Region); + + if (entry.Endpoint != null) + { + writer.WriteFieldName("Endpoint"); + writer.WriteStringValue(entry.Endpoint); + } + + if (entry.RequestStartTimeUtc.HasValue) + { + writer.WriteFieldName("RequestStartTimeUtc"); + writer.WriteStringValue(entry.RequestStartTimeUtc.Value.ToString("o", CultureInfo.InvariantCulture)); + } + + if (entry.OperationType != null) + { + writer.WriteFieldName("OperationType"); + writer.WriteStringValue(entry.OperationType); + } + + if (entry.ResourceType != null) + { + writer.WriteFieldName("ResourceType"); + writer.WriteStringValue(entry.ResourceType); + } + + writer.WriteObjectEnd(); + } + + private static void WriteAggregatedGroup( + IJsonWriter writer, + int statusCode, + int subStatusCode, + List entries) + { + writer.WriteObjectStart(); + + writer.WriteFieldName("StatusCode"); + writer.WriteNumberValue(statusCode); + + writer.WriteFieldName("SubStatusCode"); + writer.WriteNumberValue(subStatusCode); + + writer.WriteFieldName("Count"); + writer.WriteNumberValue(entries.Count); + + double totalCharge = 0; + foreach (RequestEntry e in entries) + { + totalCharge += e.RequestCharge; + } + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalCharge); + + // Sort durations for percentile computation + List durations = new List(entries.Count); + foreach (RequestEntry e in entries) + { + durations.Add(e.DurationMs); + } + + durations.Sort(); + + writer.WriteFieldName("MinDurationMs"); + writer.WriteNumberValue(durations[0]); + + writer.WriteFieldName("MaxDurationMs"); + writer.WriteNumberValue(durations[durations.Count - 1]); + + writer.WriteFieldName("P50DurationMs"); + writer.WriteNumberValue(ComputeP50(durations)); + + double avgDuration = 0; + foreach (double d in durations) + { + avgDuration += d; + } + + avgDuration /= durations.Count; + + writer.WriteFieldName("AvgDurationMs"); + writer.WriteNumberValue(Math.Round(avgDuration, 1)); + + writer.WriteObjectEnd(); + } + + private static double ComputeP50(List sortedValues) + { + int count = sortedValues.Count; + if (count == 1) + { + return sortedValues[0]; + } + + // For odd count, take the middle element. + // For even count, take the lower of the two middle elements + // (matching the Rust SDK's floor-based approach). + int midIndex = (count - 1) / 2; + return sortedValues[midIndex]; + } + + private static string BuildTruncatedJson(ITrace trace, int totalRequestCount, double totalRequestCharge) + { + IJsonWriter writer = JsonWriter.Create(JsonSerializationFormat.Text); + writer.WriteObjectStart(); + writer.WriteFieldName("Summary"); + writer.WriteObjectStart(); + + writer.WriteFieldName("DiagnosticsVerbosity"); + writer.WriteStringValue("Summary"); + + writer.WriteFieldName("SummaryFormatVersion"); + writer.WriteNumberValue(1); + + writer.WriteFieldName("TotalDurationMs"); + writer.WriteNumberValue(trace.Duration.TotalMilliseconds); + + writer.WriteFieldName("TotalRequestCharge"); + writer.WriteNumberValue(totalRequestCharge); + + writer.WriteFieldName("TotalRequestCount"); + writer.WriteNumberValue(totalRequestCount); + + writer.WriteFieldName("Truncated"); + writer.WriteBoolValue(true); + + writer.WriteFieldName("Message"); + writer.WriteStringValue( + "Summary output truncated to fit size limit. Set DiagnosticsVerbosity to Detailed for full diagnostics."); + + writer.WriteObjectEnd(); // Summary + writer.WriteObjectEnd(); // root + + return Encoding.UTF8.GetString(writer.GetResult().Span); + } + + /// + /// Finds the ActivityId from a PointOperationStatisticsTraceDatum in the trace tree. + /// + private static string FindActivityId(ITrace trace) + { + foreach (object datum in trace.Data.Values) + { + if (datum is PointOperationStatisticsTraceDatum pointOpStats + && !string.IsNullOrEmpty(pointOpStats.ActivityId)) + { + return pointOpStats.ActivityId; + } + } + + foreach (ITrace child in trace.Children) + { + string activityId = FindActivityId(child); + if (activityId != null) + { + return activityId; + } + } + + return null; + } + + /// + /// Internal representation of a single request entry collected from the trace tree. + /// + private readonly struct RequestEntry + { + public RequestEntry( + string region, + int statusCode, + int subStatusCode, + double requestCharge, + double durationMs, + DateTime? requestStartTimeUtc, + string endpoint, + string operationType, + string resourceType) + { + this.Region = region; + this.StatusCode = statusCode; + this.SubStatusCode = subStatusCode; + this.RequestCharge = requestCharge; + this.DurationMs = durationMs; + this.RequestStartTimeUtc = requestStartTimeUtc; + this.Endpoint = endpoint; + this.OperationType = operationType; + this.ResourceType = resourceType; + } + + public string Region { get; } + public int StatusCode { get; } + public int SubStatusCode { get; } + public double RequestCharge { get; } + public double DurationMs { get; } + public DateTime? RequestStartTimeUtc { get; } + public string Endpoint { get; } + public string OperationType { get; } + public string ResourceType { get; } + } + + private sealed class TraceReferenceEqualityComparer : IEqualityComparer + { + public static readonly TraceReferenceEqualityComparer Instance = new TraceReferenceEqualityComparer(); + + public bool Equals(ITrace x, ITrace y) => ReferenceEquals(x, y); + + public int GetHashCode(ITrace obj) => RuntimeHelpers.GetHashCode(obj); + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs new file mode 100644 index 0000000000..cec4825fed --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs @@ -0,0 +1,26 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + /// + /// Controls the level of detail in serialized output. + /// + public enum DiagnosticsVerbosity + { + /// + /// Full diagnostic output with all individual request traces. + /// This is the default and preserves backward compatibility. + /// + Detailed = 0, + + /// + /// Compacted diagnostic output optimized for log size constraints. + /// Groups requests by region. Keeps first and last request in full detail. + /// Deduplicates middle requests by (StatusCode, SubStatusCode) with + /// aggregate statistics (count, total RU, min/max/P50/avg latency). + /// + Summary = 1, + } +} diff --git a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs index e33de132e0..2126dc28bf 100644 --- a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs +++ b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs @@ -869,5 +869,30 @@ CosmosClientBuilder WithReadConsistencyStrategy(Cosmos.ReadConsistencyStrategy r this.clientOptions.ReadConsistencyStrategy = readConsistencyStrategy; return this; } + + /// + /// Sets the preferred for diagnostics serialization. + /// + /// The verbosity level to use when calling + /// . + /// The current . + /// + public CosmosClientBuilder WithDiagnosticsVerbosity(DiagnosticsVerbosity verbosity) + { + this.clientOptions.DiagnosticsVerbosity = verbosity; + return this; + } + + /// + /// Sets the maximum size in bytes for Summary mode diagnostic output. + /// + /// Maximum size in bytes (minimum 4096). + /// The current . + /// + public CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(int maxSizeBytes) + { + this.clientOptions.MaxDiagnosticsSummarySizeBytes = maxSizeBytes; + return this; + } } } diff --git a/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs b/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs index 136e0d3ba0..abb81ffebc 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/CosmosLinqQuery.cs @@ -235,7 +235,7 @@ internal async Task> AggregateResultAsync(CancellationToken cancella System.Net.HttpStatusCode.OK, headers, result.FirstOrDefault(), - new CosmosTraceDiagnostics(rootTrace), + new CosmosTraceDiagnostics(rootTrace, this.container.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes), null); } diff --git a/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs b/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs index ee969e57a4..257a2c304b 100644 --- a/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs +++ b/Microsoft.Azure.Cosmos/src/ReadManyQueryHelper.cs @@ -239,7 +239,7 @@ private FeedResponse CombineFeedResponseFromQueryResponses(List OfferRetryHelperForStaleRidCacheAsync( responseMessage.StatusCode, responseMessage.Headers, null, - new CosmosTraceDiagnostics(trace), + new CosmosTraceDiagnostics(trace, this.ClientContext.ClientOptions.MaxDiagnosticsSummarySizeBytes), responseMessage.RequestMessage); } diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index 0164d1e282..8778fb74f5 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -125,6 +125,18 @@ internal static class ConfigurationManager /// internal static readonly string UseLengthAwareRangeComparator = "AZURE_COSMOS_USE_LENGTH_AWARE_RANGE_COMPARATOR"; + /// + /// Environment variable name for overriding the default diagnostics verbosity. + /// Accepts "Summary" or "Detailed" (case-insensitive). + /// + internal static readonly string DiagnosticsVerbosityVariable = "AZURE_COSMOS_DIAGNOSTICS_VERBOSITY"; + + /// + /// Environment variable name for overriding the maximum summary diagnostics size in bytes. + /// Must be an integer >= 4096. + /// + internal static readonly string DiagnosticsMaxSummarySizeVariable = "AZURE_COSMOS_DIAGNOSTICS_MAX_SUMMARY_SIZE"; + /// /// Environment variable name to enable DNS dot-suffix (FQDN trailing dot) for /// Direct mode TCP connections. When enabled, appends a trailing '.' to hostnames diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs new file mode 100644 index 0000000000..966e9f9170 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/DiagnosticsVerbosityEmulatorTests.cs @@ -0,0 +1,180 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests +{ + using System; + using System.Text; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + + [TestClass] + public class DiagnosticsVerbosityEmulatorTests + { + private Container Container = null; + private Cosmos.Database Database = null; + private CosmosClient Client = null; + + [TestInitialize] + public async Task TestInitialize() + { + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary, + MaxDiagnosticsSummarySizeBytes = 8192, + ConsistencyLevel = Cosmos.ConsistencyLevel.Session + }; + this.Client = TestCommon.CreateCosmosClient(clientOptions); + this.Database = (await this.Client.CreateDatabaseAsync(Guid.NewGuid().ToString())).Database; + this.Container = (await this.Database.CreateContainerAsync(Guid.NewGuid().ToString(), "/pk")).Container; + } + + [TestCleanup] + public async Task Cleanup() + { + await this.Database.DeleteAsync(); + this.Client.Dispose(); + } + + [TestMethod] + public async Task CreateItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + Assert.IsNotNull(summary); + + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + Assert.IsNotNull(summaryObj, "Summary object should exist"); + Assert.AreEqual("Summary", summaryObj["DiagnosticsVerbosity"].ToString()); + Assert.IsTrue(summaryObj["TotalRequestCount"].Value() >= 1); + Assert.IsNotNull(summaryObj["RegionsSummary"]); + } + + [TestMethod] + public async Task ReadItem_SummaryMode_ContainsRegionInfo() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + ItemResponse response = await this.Container.ReadItemAsync( + testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + Assert.IsTrue(regions.Count >= 1, "Should have at least one region"); + JObject firstRegion = (JObject)regions[0]; + Assert.IsNotNull(firstRegion["Region"]); + Assert.IsNotNull(firstRegion["First"]); + Assert.IsTrue(firstRegion["RequestCount"].Value() >= 1); + } + + [TestMethod] + public async Task SummaryMode_SmallerThanDetailed() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string detailed = response.Diagnostics.ToString(); + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + + int detailedBytes = Encoding.UTF8.GetByteCount(detailed); + int summaryBytes = Encoding.UTF8.GetByteCount(summary); + + Assert.IsTrue(summaryBytes <= detailedBytes, + $"Summary ({summaryBytes} bytes) should be <= Detailed ({detailedBytes} bytes)"); + } + + [TestMethod] + public async Task ParameterlessToString_UnchangedBySummaryOption() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string parameterless = response.Diagnostics.ToString(); + string explicitDetailed = response.Diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + JObject parsedDefault = JObject.Parse(parameterless); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + // Both should have the trace name (detailed format), not a Summary wrapper + Assert.IsNotNull(parsedDefault["name"], "Parameterless ToString should have trace name"); + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + } + + [TestMethod] + public async Task SummaryCaching_ReturnsSameInstance() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + ItemResponse response = await this.Container.CreateItemAsync( + testItem, new Cosmos.PartitionKey(testItem.pk)); + + string summary1 = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + string summary2 = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + + Assert.AreSame(summary1, summary2, "Summary should be cached via Lazy"); + } + + [TestMethod] + public async Task Query_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + FeedIterator feedIterator = this.Container.GetItemQueryIterator( + $"select * from c where c.id = '{testItem.id}'"); + + while (feedIterator.HasMoreResults) + { + FeedResponse feedResponse = await feedIterator.ReadNextAsync(); + string summary = feedResponse.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + Assert.IsNotNull(summary); + + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + } + + [TestMethod] + public async Task ReplaceItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + testItem.cost = 9999; + ItemResponse response = await this.Container.ReplaceItemAsync( + testItem, testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.IsTrue(parsed["Summary"]["TotalRequestCount"].Value() >= 1); + } + + [TestMethod] + public async Task DeleteItem_SummaryMode_ProducesValidJson() + { + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + ItemResponse response = await this.Container.DeleteItemAsync( + testItem.id, new Cosmos.PartitionKey(testItem.pk)); + + string summary = response.Diagnostics.ToString(DiagnosticsVerbosity.Summary); + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"]); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs index cca7863af2..3b816b79f0 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ChangeFeed/ChangeFeedEstimatorIteratorTests.cs @@ -384,6 +384,7 @@ static FeedIteratorInternal feedCreator(DocumentServiceLease lease, string conti Mock mockedContext = new Mock(MockBehavior.Strict); mockedContext.Setup(c => c.Client).Returns(MockCosmosUtil.CreateMockCosmosClient()); + mockedContext.Setup(c => c.ClientOptions).Returns(new CosmosClientOptions()); mockedContext.Setup(x => x.OperationHelperAsync>( It.Is(str => str.Contains("Change Feed Estimator")), It.IsAny(), @@ -492,6 +493,7 @@ private static ContainerInternal GetMockedContainer() Mock containerMock = new Mock(MockBehavior.Strict); Mock mockContext = new Mock(MockBehavior.Strict); mockContext.Setup(x => x.Client).Returns(mockClient.Object); + mockContext.Setup(x => x.ClientOptions).Returns(new CosmosClientOptions()); containerMock.Setup(c => c.ClientContext).Returns(mockContext.Object); containerMock.Setup(c => c.Id).Returns("containerId"); containerMock.Setup(c => c.Database.Id).Returns("databaseId"); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json index 0328f86c40..2c54925002 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Contracts/DotNetSDKAPI.net6.json @@ -2871,6 +2871,16 @@ "Attributes": [], "MethodInfo": "Int32 get_GatewayModeMaxConnectionLimit();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Int32 get_MaxDiagnosticsSummarySizeBytes()": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Int32 get_MaxDiagnosticsSummarySizeBytes();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Int32 MaxDiagnosticsSummarySizeBytes": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Int32 MaxDiagnosticsSummarySizeBytes;CanRead:True;CanWrite:True;Int32 get_MaxDiagnosticsSummarySizeBytes();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_MaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.AvailabilityStrategy AvailabilityStrategy": { "Type": "Property", "Attributes": [], @@ -2927,6 +2937,18 @@ ], "MethodInfo": "Microsoft.Azure.Cosmos.CosmosSerializer Serializer;CanRead:True;CanWrite:True;Microsoft.Azure.Cosmos.CosmosSerializer get_Serializer();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_Serializer(Microsoft.Azure.Cosmos.CosmosSerializer);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity DiagnosticsVerbosity": { + "Type": "Property", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity DiagnosticsVerbosity;CanRead:True;CanWrite:True;Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity()[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity get_DiagnosticsVerbosity();IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.FaultInjection.IFaultInjector FaultInjector": { "Type": "Property", "Attributes": [], @@ -3243,6 +3265,13 @@ ], "MethodInfo": "Void set_CosmosClientTelemetryOptions(Microsoft.Azure.Cosmos.CosmosClientTelemetryOptions);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { + "Type": "Method", + "Attributes": [ + "CompilerGeneratedAttribute" + ], + "MethodInfo": "Void set_DiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Void set_EnableContentResponseOnWrite(System.Nullable`1[System.Boolean])[System.Runtime.CompilerServices.CompilerGeneratedAttribute()]": { "Type": "Method", "Attributes": [ @@ -3284,6 +3313,11 @@ ], "MethodInfo": "Void set_LimitToEndpoint(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Void set_MaxDiagnosticsSummarySizeBytes(Int32)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Void set_MaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Void set_MaxRequestsPerTcpConnection(System.Nullable`1[System.Int32])": { "Type": "Method", "Attributes": [], @@ -3484,6 +3518,11 @@ "Attributes": [], "MethodInfo": "System.String ToString();IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "System.String ToString(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "System.String ToString(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:True;IsStatic:False;IsVirtual:True;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "System.TimeSpan GetClientElapsedTime()": { "Type": "Method", "Attributes": [], @@ -4438,6 +4477,27 @@ }, "NestedTypes": {} }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity;System.Enum;IsAbstract:False;IsSealed:True;IsInterface:False;IsEnum:True;IsClass:False;IsValueType:True;IsNested:False;IsGenericType:False;IsSerializable:True": { + "Subclasses": {}, + "Members": { + "Int32 value__": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Int32 value__;IsInitOnly:False;IsStatic:False;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Detailed": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Detailed;IsInitOnly:False;IsStatic:True;" + }, + "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Summary": { + "Type": "Field", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.DiagnosticsVerbosity Summary;IsInitOnly:False;IsStatic:True;" + } + }, + "NestedTypes": {} + }, "Microsoft.Azure.Cosmos.DistanceFunction;System.Enum;IsAbstract:False;IsSealed:True;IsInterface:False;IsEnum:True;IsClass:False;IsValueType:True;IsNested:False;IsGenericType:False;IsSerializable:True": { "Subclasses": {}, "Members": { @@ -5117,6 +5177,11 @@ "Attributes": [], "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithContentResponseOnWrite(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithDiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithDiagnosticsVerbosity(Microsoft.Azure.Cosmos.DiagnosticsVerbosity);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithCustomAccountEndpoints(System.Collections.Generic.IEnumerable`1[System.Uri])": { "Type": "Method", "Attributes": [], @@ -5142,6 +5207,11 @@ "Attributes": [], "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithLimitToEndpoint(Boolean);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" }, + "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(Int32)": { + "Type": "Method", + "Attributes": [], + "MethodInfo": "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithMaxDiagnosticsSummarySizeBytes(Int32);IsAbstract:False;IsStatic:False;IsVirtual:False;IsGenericMethod:False;IsConstructor:False;IsFinal:False;" + }, "Microsoft.Azure.Cosmos.Fluent.CosmosClientBuilder WithPriorityLevel(Microsoft.Azure.Cosmos.PriorityLevel)": { "Type": "Method", "Attributes": [], diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs index 1fe9127de4..4b681bcced 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosClientOptionsUnitTests.cs @@ -17,6 +17,7 @@ namespace Microsoft.Azure.Cosmos.Tests using System.Text; using global::Azure.Core; using Microsoft.Azure.Cosmos.Fluent; + using Microsoft.Azure.Cosmos.Util; using Microsoft.Azure.Documents; using Microsoft.Azure.Documents.Client; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -1335,5 +1336,227 @@ public int Compare(object x, object y) return 1; } } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_DefaultIsDetailed() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_CanSetToSummary() + { + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary + }; + + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_DefaultIs8192() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_ThrowsBelowMinimum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 2048 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsMinimum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 4096 + }; + + Assert.AreEqual(4096, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsLargeValue() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 65536 + }; + + Assert.AreEqual(65536, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_Summary() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "Summary"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_CaseInsensitive() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "summary"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_DiagnosticsVerbosity_InvalidIgnored() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "InvalidValue"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, "16384"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(16384, options.MaxDiagnosticsSummarySizeBytes); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize_BelowMinimumIgnored() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, "1024"); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes, "Below-minimum env var should be ignored"); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); + } + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_ExplicitPropertyOverridesEnvVar() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable); + try + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, "Summary"); + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Detailed + }; + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsVerbosityVariable, original); + } + } + + [TestMethod] + public void CosmosClientBuilder_WithDiagnosticsVerbosity() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithDiagnosticsVerbosity(DiagnosticsVerbosity.Summary); + + CosmosClient client = builder.Build(new MockDocumentClient()); + Assert.AreEqual(DiagnosticsVerbosity.Summary, client.ClientOptions.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientBuilder_WithMaxDiagnosticsSummarySizeBytes() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithMaxDiagnosticsSummarySizeBytes(16384); + + CosmosClient client = builder.Build(new MockDocumentClient()); + Assert.AreEqual(16384, client.ClientOptions.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientBuilder_WithMaxDiagnosticsSummarySizeBytes_TooSmallThrows() + { + CosmosClientBuilder builder = new CosmosClientBuilder(CosmosClientOptionsUnitTests.AccountEndpoint, MockCosmosUtil.RandomInvalidCorrectlyFormatedAuthKey); + builder.WithMaxDiagnosticsSummarySizeBytes(2048); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_ThrowsAboveMaximum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + 1 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxDiagnosticsSummarySizeBytes_AcceptsMaximum() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + }; + Assert.AreEqual(CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_EnvVar_MaxSummarySize_AboveMaximumIgnored() + { + string original = Environment.GetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable); + try + { + Environment.SetEnvironmentVariable( + ConfigurationManager.DiagnosticsMaxSummarySizeVariable, + (CosmosClientOptions.MaxAllowedDiagnosticsSummarySizeBytes + 1).ToString()); + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes, "Above-maximum env var should be ignored"); + } + finally + { + Environment.SetEnvironmentVariable(ConfigurationManager.DiagnosticsMaxSummarySizeVariable, original); + } + } } } \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs new file mode 100644 index 0000000000..af9c662e3c --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryBaselineTests.cs @@ -0,0 +1,383 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Tracing +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Reflection; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + using static Microsoft.Azure.Cosmos.Tracing.TraceData.ClientSideRequestStatisticsTraceDatum; + + /// + /// Baseline tests that validate the exact JSON schema produced by DiagnosticsSummaryWriter. + /// These tests ensure the summary output structure does not change accidentally across releases. + /// + [TestClass] + public class DiagnosticsSummaryBaselineTests + { + /// + /// Validates the exact set of top-level fields in a single-request summary. + /// This catches any accidental field additions/removals/renames. + /// + [TestMethod] + public void Baseline_SingleRequest_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Top-level summary fields (exact set) + HashSet expectedTopLevelFields = new HashSet + { + "DiagnosticsVerbosity", + "SummaryFormatVersion", + "TotalDurationMs", + "TotalRequestCharge", + "TotalRequestCount", + "RegionsSummary" + }; + + HashSet actualFields = new HashSet(summaryObj.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedTopLevelFields.SetEquals(actualFields), + $"Summary top-level fields mismatch. Expected: [{string.Join(", ", expectedTopLevelFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a region summary entry. + /// + [TestMethod] + public void Baseline_RegionSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + // Single-request region has: Region, RequestCount, TotalRequestCharge, First + HashSet expectedFields = new HashSet + { + "Region", + "RequestCount", + "TotalRequestCharge", + "First" + }; + + HashSet actualFields = new HashSet(region.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Region fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a request entry detail. + /// + [TestMethod] + public void Baseline_RequestEntryDetail_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, fixedTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject first = (JObject)parsed["Summary"]["RegionsSummary"][0]["First"]; + + HashSet expectedFields = new HashSet + { + "StatusCode", + "SubStatusCode", + "RequestCharge", + "DurationMs", + "Region", + "Endpoint", + "RequestStartTimeUtc", + "OperationType", + "ResourceType" + }; + + HashSet actualFields = new HashSet(first.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Request entry fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in an aggregated group. + /// + [TestMethod] + public void Baseline_AggregatedGroup_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, fixedTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject group = (JObject)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"][0]; + + HashSet expectedFields = new HashSet + { + "StatusCode", + "SubStatusCode", + "Count", + "TotalRequestCharge", + "MinDurationMs", + "MaxDurationMs", + "P50DurationMs", + "AvgDurationMs" + }; + + HashSet actualFields = new HashSet(group.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Aggregated group fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the exact set of fields in a truncated summary. + /// + [TestMethod] + public void Baseline_TruncatedSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic(trace, $"Region {r}", StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, 0.0, 5 + i, fixedTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + HashSet expectedFields = new HashSet + { + "DiagnosticsVerbosity", + "SummaryFormatVersion", + "TotalDurationMs", + "TotalRequestCount", + "TotalRequestCharge", + "Truncated", + "Message" + }; + + HashSet actualFields = new HashSet(summaryObj.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Truncated summary fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates the full region summary schema when both First, Last, and AggregatedGroups are present. + /// + [TestMethod] + public void Baseline_FullRegionSummary_SchemaFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 5, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 15, fixedTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 12, fixedTime.AddMilliseconds(300)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + HashSet expectedFields = new HashSet + { + "Region", + "RequestCount", + "TotalRequestCharge", + "First", + "Last", + "AggregatedGroups" + }; + + HashSet actualFields = new HashSet(region.Properties().Select(p => p.Name)); + Assert.IsTrue(expectedFields.SetEquals(actualFields), + $"Full region fields mismatch. Expected: [{string.Join(", ", expectedFields.OrderBy(x => x))}], " + + $"Actual: [{string.Join(", ", actualFields.OrderBy(x => x))}]"); + } + + /// + /// Validates field types are correct (numbers are numbers, strings are strings, etc.). + /// + [TestMethod] + public void Baseline_FieldTypes_Consistent() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 5, fixedTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, fixedTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 12, fixedTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Top-level types + Assert.AreEqual(JTokenType.String, summaryObj["DiagnosticsVerbosity"].Type); + Assert.AreEqual(JTokenType.Integer, summaryObj["SummaryFormatVersion"].Type); + Assert.AreEqual(1, summaryObj["SummaryFormatVersion"].Value()); + Assert.IsTrue(summaryObj["TotalDurationMs"].Type == JTokenType.Float + || summaryObj["TotalDurationMs"].Type == JTokenType.Integer, + "TotalDurationMs should be numeric"); + Assert.IsTrue(summaryObj["TotalRequestCharge"].Type == JTokenType.Float + || summaryObj["TotalRequestCharge"].Type == JTokenType.Integer, + "TotalRequestCharge should be numeric"); + Assert.AreEqual(JTokenType.Integer, summaryObj["TotalRequestCount"].Type); + Assert.AreEqual(JTokenType.Array, summaryObj["RegionsSummary"].Type); + + // Region types + JObject region = (JObject)summaryObj["RegionsSummary"][0]; + Assert.AreEqual(JTokenType.String, region["Region"].Type); + Assert.AreEqual(JTokenType.Integer, region["RequestCount"].Type); + Assert.IsTrue(region["TotalRequestCharge"].Type == JTokenType.Float + || region["TotalRequestCharge"].Type == JTokenType.Integer, + "TotalRequestCharge should be numeric"); + Assert.AreEqual(JTokenType.Object, region["First"].Type); + Assert.AreEqual(JTokenType.Object, region["Last"].Type); + + // Request entry types + JObject first = (JObject)region["First"]; + Assert.AreEqual(JTokenType.Integer, first["StatusCode"].Type); + Assert.AreEqual(JTokenType.Integer, first["SubStatusCode"].Type); + Assert.IsTrue(first["RequestCharge"].Type == JTokenType.Float + || first["RequestCharge"].Type == JTokenType.Integer, + "RequestCharge should be numeric"); + Assert.IsTrue(first["DurationMs"].Type == JTokenType.Float + || first["DurationMs"].Type == JTokenType.Integer, + "DurationMs should be numeric"); + Assert.AreEqual(JTokenType.String, first["Region"].Type); + } + + /// + /// Validates that the DiagnosticsVerbosity field always has the value "Summary". + /// + [TestMethod] + public void Baseline_DiagnosticsVerbosityField_AlwaysSummary() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].Value()); + } + + /// + /// Validates that the truncated summary has the correct Truncated and Message fields. + /// + [TestMethod] + public void Baseline_TruncatedMessage_Content() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime fixedTime = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc); + + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic(trace, $"Region {r}", StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, 0.0, 5, fixedTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.IsTrue(summaryObj["Truncated"].Value()); + Assert.AreEqual(JTokenType.Boolean, summaryObj["Truncated"].Type); + string message = summaryObj["Message"].Value(); + Assert.IsTrue(message.Contains("truncated", StringComparison.OrdinalIgnoreCase), + $"Truncation message should mention 'truncated'. Actual: {message}"); + Assert.IsTrue(message.Contains("Detailed", StringComparison.OrdinalIgnoreCase), + $"Truncation message should mention 'Detailed' mode. Actual: {message}"); + } + + #region Helpers + + private static void AddStoreResponseStatistic( + ITrace trace, + string region, + StatusCodes statusCode, + SubStatusCodes subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + StoreResponse storeResponse = new StoreResponse(); + storeResponse.Status = (int)statusCode; + storeResponse.Headers = new DictionaryNameValueCollection(); + storeResponse.Headers[HttpConstants.HttpHeaders.RequestCharge] = requestCharge.ToString(); + storeResponse.Headers[WFConstants.BackendHeaders.SubStatus] = ((int)subStatusCode).ToString(); + + ReferenceCountedDisposable storeResultRef = StoreResult.CreateForTesting(storeResponse: storeResponse); + + DateTime responseTime = requestStartTime.AddMilliseconds(durationMs); + + StoreResponseStatistics stats = new StoreResponseStatistics( + requestStartTime: requestStartTime, + requestResponseTime: responseTime, + storeResult: storeResultRef.Target, + resourceType: ResourceType.Document, + operationType: OperationType.Read, + requestSessionToken: null, + locationEndpoint: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + region: region); + + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("storeResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(stats); + } + + private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace trace) + { + const string datumKey = "ClientSideStats"; + + if (trace.TryGetDatum(datumKey, out object existing) + && existing is ClientSideRequestStatisticsTraceDatum existingDatum) + { + return existingDatum; + } + + ClientSideRequestStatisticsTraceDatum datum = new ClientSideRequestStatisticsTraceDatum( + DateTime.UtcNow, + trace); + trace.AddDatum(datumKey, datum); + return datum; + } + + #endregion + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs new file mode 100644 index 0000000000..39f03b39c7 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Diagnostics/DiagnosticsSummaryWriterTests.cs @@ -0,0 +1,689 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Tracing +{ + using System; + using System.Collections.Generic; + using System.Net; + using System.Net.Http; + using System.Net.Http.Headers; + using System.Reflection; + using System.Linq; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Newtonsoft.Json.Linq; + using static Microsoft.Azure.Cosmos.Tracing.TraceData.ClientSideRequestStatisticsTraceDatum; + + [TestClass] + public class DiagnosticsSummaryWriterTests + { + [TestMethod] + public void DiagnosticsVerbosity_DefaultIsDetailed() + { + Assert.AreEqual(0, (int)DiagnosticsVerbosity.Detailed); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, default(DiagnosticsVerbosity)); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_DefaultValue() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(DiagnosticsVerbosity.Detailed, options.DiagnosticsVerbosity); + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_DefaultValue() + { + CosmosClientOptions options = new CosmosClientOptions(); + Assert.AreEqual(8192, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + [ExpectedException(typeof(ArgumentOutOfRangeException))] + public void CosmosClientOptions_MaxSummarySizeBytes_Validation_TooSmall() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 2048 + }; + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_Validation_MinAllowed() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 4096 + }; + Assert.AreEqual(4096, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void ToString_Parameterless_AlwaysDetailed() + { + // Parameterless ToString() must always return detailed output + // regardless of any options setting. We verify structural equivalence + // (same keys in JSON) rather than exact string match since duration changes. + using ITrace trace = Trace.GetRootTrace("TestOperation"); + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string detailed = diagnostics.ToString(); + string explicitDetailed = diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + JObject parsedDefault = JObject.Parse(detailed); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + // Same structure: both have name, Summary, start datetime + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + Assert.AreEqual(parsedDefault["start datetime"].ToString(), parsedExplicit["start datetime"].ToString()); + } + + [TestMethod] + public void ToString_Summary_ProducesValidJson() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string summary = diagnostics.ToString(DiagnosticsVerbosity.Summary); + + JObject parsed = JObject.Parse(summary); + Assert.IsNotNull(parsed["Summary"], "Summary object should exist"); + Assert.AreEqual("Summary", parsed["Summary"]["DiagnosticsVerbosity"].ToString()); + } + + [TestMethod] + public void Summary_SingleRegion_SingleRequest() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(1, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"]); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + + JObject region = (JObject)regions[0]; + Assert.AreEqual("West US 2", region["Region"].ToString()); + Assert.AreEqual(1, region["RequestCount"].Value()); + Assert.IsNotNull(region["First"]); + Assert.IsNull(region["Last"], "Last should be omitted when only 1 request"); + Assert.IsNull(region["AggregatedGroups"], "No aggregated groups for single request"); + } + + [TestMethod] + public void Summary_SingleRegion_TwoRequests() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + AddStoreResponseStatistic(trace, "East US", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "East US", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddSeconds(1)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(2, summaryObj["TotalRequestCount"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + JObject region = (JObject)regions[0]; + Assert.AreEqual(2, region["RequestCount"].Value()); + Assert.IsNotNull(region["First"]); + Assert.IsNotNull(region["Last"]); + Assert.AreEqual((int)StatusCodes.TooManyRequests, region["First"]["StatusCode"].Value()); + Assert.AreEqual((int)StatusCodes.Ok, region["Last"]["StatusCode"].Value()); + Assert.IsNull(region["AggregatedGroups"], "No middle entries for exactly 2 requests"); + } + + [TestMethod] + public void Summary_SingleRegion_ManyRetries_429() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // First request: 429 + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + + // 48 middle retries: all 429 + for (int i = 1; i <= 48; i++) + { + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 3 + i, baseTime.AddMilliseconds(i * 100)); + } + + // Last request: 200 OK + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddSeconds(5)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(50, summaryObj["TotalRequestCount"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + + JObject region = (JObject)regions[0]; + Assert.AreEqual(50, region["RequestCount"].Value()); + Assert.AreEqual((int)StatusCodes.TooManyRequests, region["First"]["StatusCode"].Value()); + Assert.AreEqual((int)StatusCodes.Ok, region["Last"]["StatusCode"].Value()); + + JArray groups = (JArray)region["AggregatedGroups"]; + Assert.AreEqual(1, groups.Count, "All middle entries are 429 so 1 aggregated group"); + + JObject group = (JObject)groups[0]; + Assert.AreEqual((int)StatusCodes.TooManyRequests, group["StatusCode"].Value()); + Assert.AreEqual(48, group["Count"].Value()); + Assert.IsTrue(group["MinDurationMs"].Value() > 0); + Assert.IsTrue(group["MaxDurationMs"].Value() >= group["MinDurationMs"].Value()); + Assert.IsTrue(group["P50DurationMs"].Value() > 0); + Assert.IsTrue(group["AvgDurationMs"].Value() > 0); + } + + [TestMethod] + public void Summary_MultiRegion_Failover() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // West US 2: 3 requests + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 15, baseTime.AddMilliseconds(200)); + + // East US 2: 2 requests + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 8, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(5, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(2, ((JArray)summaryObj["RegionsSummary"]).Count); + + JObject westRegion = (JObject)summaryObj["RegionsSummary"][0]; + Assert.AreEqual("West US 2", westRegion["Region"].ToString()); + Assert.AreEqual(3, westRegion["RequestCount"].Value()); + + JObject eastRegion = (JObject)summaryObj["RegionsSummary"][1]; + Assert.AreEqual("East US 2", eastRegion["Region"].ToString()); + Assert.AreEqual(2, eastRegion["RequestCount"].Value()); + } + + [TestMethod] + public void Summary_MixedStatusCodes() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 5, baseTime); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0.0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.ServiceUnavailable, SubStatusCodes.Unknown, 0.0, 25, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 12, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + JObject region = (JObject)regions[0]; + JArray groups = (JArray)region["AggregatedGroups"]; + + Assert.AreEqual(2, groups.Count, "Two distinct status codes in middle entries"); + } + + [TestMethod] + public void Summary_P50_OddCount() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 5 requests total: first, 3 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 30, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(400)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Middle entries have durations: 10, 20, 30 (sorted) + // P50 of 3 items = index (3-1)/2 = 1 → 20 + Assert.AreEqual(10, group["MinDurationMs"].Value()); + Assert.AreEqual(30, group["MaxDurationMs"].Value()); + Assert.AreEqual(20, group["P50DurationMs"].Value()); + } + + [TestMethod] + public void Summary_P50_EvenCount() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 6 requests total: first, 4 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 10, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 20, baseTime.AddMilliseconds(200)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 30, baseTime.AddMilliseconds(300)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 40, baseTime.AddMilliseconds(400)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(500)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Middle entries have durations: 10, 20, 30, 40 (sorted) + // P50 of 4 items = index (4-1)/2 = 1 → 20 + Assert.AreEqual(10, group["MinDurationMs"].Value()); + Assert.AreEqual(40, group["MaxDurationMs"].Value()); + Assert.AreEqual(20, group["P50DurationMs"].Value()); + } + + [TestMethod] + public void Summary_P50_SingleItem() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // 3 requests total: first, 1 middle, last + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 1, baseTime); + AddStoreResponseStatistic(trace, "R1", StatusCodes.TooManyRequests, SubStatusCodes.Unknown, 0, 42, baseTime.AddMilliseconds(100)); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5, 5, baseTime.AddMilliseconds(200)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray groups = (JArray)parsed["Summary"]["RegionsSummary"][0]["AggregatedGroups"]; + JObject group = (JObject)groups[0]; + + // Single middle entry with duration 42 + Assert.AreEqual(1, group["Count"].Value()); + Assert.AreEqual(42, group["P50DurationMs"].Value()); + Assert.AreEqual(42, group["MinDurationMs"].Value()); + Assert.AreEqual(42, group["MaxDurationMs"].Value()); + } + + [TestMethod] + public void Summary_SizeEnforcement_UnderLimit() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "R1", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + int byteCount = System.Text.Encoding.UTF8.GetByteCount(summary); + + Assert.IsTrue(byteCount <= 8192, $"Summary size {byteCount} should be under limit"); + Assert.IsFalse(summary.Contains("Truncated"), "Should not be truncated"); + } + + [TestMethod] + public void Summary_SizeEnforcement_OverLimit_Truncated() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Generate many requests across many regions to exceed a tiny limit + for (int r = 0; r < 20; r++) + { + for (int i = 0; i < 10; i++) + { + AddStoreResponseStatistic( + trace, + $"Region {r}", + StatusCodes.TooManyRequests, + SubStatusCodes.Unknown, + 0.0, + 5 + i, + baseTime.AddMilliseconds(r * 1000 + i * 100)); + } + } + + // Use a very small limit to force truncation + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 512); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.IsTrue(summaryObj["Truncated"].Value(), "Should be truncated"); + Assert.IsNotNull(summaryObj["Message"]); + Assert.AreEqual(200, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"], "Truncated summary should include TotalRequestCharge"); + } + + [TestMethod] + public void Summary_EmptyTrace() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(0, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(0, summaryObj["TotalRequestCharge"].Value()); + Assert.AreEqual(0, ((JArray)summaryObj["RegionsSummary"]).Count); + } + + [TestMethod] + public void Detailed_Mode_Unchanged() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + string parameterless = diagnostics.ToString(); + string explicitDetailed = diagnostics.ToString(DiagnosticsVerbosity.Detailed); + + // Verify structural equivalence (duration may differ slightly due to timing) + JObject parsedDefault = JObject.Parse(parameterless); + JObject parsedExplicit = JObject.Parse(explicitDetailed); + + Assert.AreEqual(parsedDefault["name"].ToString(), parsedExplicit["name"].ToString()); + Assert.AreEqual(parsedDefault["start datetime"].ToString(), parsedExplicit["start datetime"].ToString()); + + // Detailed output should contain the full trace tree structure + Assert.IsNotNull(parsedDefault["name"]); + Assert.IsNotNull(parsedDefault["Summary"]); + } + + [TestMethod] + public void Summary_RegionOrdering_Deterministic() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Add in chronological order: West first, then East + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime); + AddStoreResponseStatistic(trace, "East US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime.AddMilliseconds(100)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + // Regions should appear in the order they were first encountered chronologically + Assert.AreEqual("West US 2", regions[0]["Region"].ToString()); + Assert.AreEqual("East US 2", regions[1]["Region"].ToString()); + } + + [TestMethod] + public void Summary_NullRegion_GroupedAsUnknown() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, null, StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JArray regions = (JArray)parsed["Summary"]["RegionsSummary"]; + + Assert.AreEqual("Unknown", regions[0]["Region"].ToString()); + } + + [TestMethod] + public void Summary_RequestEntryDetail_HasAllFields() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime requestTime = new DateTime(2026, 3, 17, 12, 0, 0, DateTimeKind.Utc); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, requestTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject first = (JObject)parsed["Summary"]["RegionsSummary"][0]["First"]; + + Assert.AreEqual((int)StatusCodes.Ok, first["StatusCode"].Value()); + Assert.IsNotNull(first["SubStatusCode"]); + Assert.IsNotNull(first["RequestCharge"]); + Assert.IsNotNull(first["DurationMs"]); + Assert.AreEqual("West US 2", first["Region"].ToString()); + Assert.IsNotNull(first["Endpoint"]); + Assert.IsNotNull(first["RequestStartTimeUtc"]); + Assert.IsNotNull(first["OperationType"]); + Assert.IsNotNull(first["ResourceType"]); + } + + [TestMethod] + public void CosmosTraceDiagnostics_SummaryCaching() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + + // Call ToString(Summary) multiple times — should return same cached instance + string summary1 = diagnostics.ToString(DiagnosticsVerbosity.Summary); + string summary2 = diagnostics.ToString(DiagnosticsVerbosity.Summary); + + Assert.AreSame(summary1, summary2, "Summary should be cached via Lazy"); + } + + #region Helpers + + private static void AddStoreResponseStatistic( + ITrace trace, + string region, + StatusCodes statusCode, + SubStatusCodes subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + // Create or find existing ClientSideRequestStatisticsTraceDatum on the trace + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + // Create a StoreResult with the desired status code and request charge + StoreResponse storeResponse = new StoreResponse(); + storeResponse.Status = (int)statusCode; + storeResponse.Headers = new DictionaryNameValueCollection(); + storeResponse.Headers[HttpConstants.HttpHeaders.RequestCharge] = requestCharge.ToString(); + storeResponse.Headers[WFConstants.BackendHeaders.SubStatus] = ((int)subStatusCode).ToString(); + + ReferenceCountedDisposable storeResultRef = StoreResult.CreateForTesting(storeResponse: storeResponse); + + DateTime responseTime = requestStartTime.AddMilliseconds(durationMs); + + StoreResponseStatistics stats = new StoreResponseStatistics( + requestStartTime: requestStartTime, + requestResponseTime: responseTime, + storeResult: storeResultRef.Target, + resourceType: ResourceType.Document, + operationType: OperationType.Read, + requestSessionToken: null, + locationEndpoint: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + region: region); + + // Use reflection to add to the private storeResponseStatistics list + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("storeResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(stats); + } + + private static ClientSideRequestStatisticsTraceDatum GetOrCreateDatum(ITrace trace) + { + const string datumKey = "ClientSideStats"; + + if (trace.TryGetDatum(datumKey, out object existing) + && existing is ClientSideRequestStatisticsTraceDatum existingDatum) + { + return existingDatum; + } + + ClientSideRequestStatisticsTraceDatum datum = new ClientSideRequestStatisticsTraceDatum( + DateTime.UtcNow, + trace); + trace.AddDatum(datumKey, datum); + return datum; + } + + private static void AddHttpResponseStatistic( + ITrace trace, + string region, + HttpStatusCode statusCode, + int subStatusCode, + double requestCharge, + double durationMs, + DateTime requestStartTime) + { + ClientSideRequestStatisticsTraceDatum datum = GetOrCreateDatum(trace); + + HttpResponseMessage responseMessage = new HttpResponseMessage(statusCode); + responseMessage.Headers.Add(WFConstants.BackendHeaders.SubStatus, subStatusCode.ToString()); + responseMessage.Headers.Add(HttpConstants.HttpHeaders.RequestCharge, requestCharge.ToString()); + responseMessage.Headers.Add(HttpConstants.HttpHeaders.ActivityId, Guid.NewGuid().ToString()); + + DateTime requestEndTime = requestStartTime.AddMilliseconds(durationMs); + + HttpResponseStatistics httpStats = new HttpResponseStatistics( + requestStartTime: requestStartTime, + requestEndTime: requestEndTime, + requestUri: new Uri("https://account-" + (region ?? "unknown").Replace(" ", "").ToLower() + ".documents.azure.com"), + httpMethod: HttpMethod.Get, + resourceType: ResourceType.Document, + responseMessage: responseMessage, + exception: null, + region: region); + + FieldInfo field = typeof(ClientSideRequestStatisticsTraceDatum) + .GetField("httpResponseStatistics", BindingFlags.NonPublic | BindingFlags.Instance); + List list = (List)field.GetValue(datum); + list.Add(httpStats); + } + + #endregion + + #region Gateway Mode (HttpResponseStatistics) Tests + + [TestMethod] + public void Summary_GatewayMode_SingleRequest() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddHttpResponseStatistic(trace, "West US 2", HttpStatusCode.OK, 0, 3.5, 15, baseTime); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + Assert.AreEqual(1, summaryObj["TotalRequestCount"].Value()); + Assert.AreEqual(3.5, summaryObj["TotalRequestCharge"].Value()); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count); + JObject region = (JObject)regions[0]; + Assert.AreEqual("West US 2", region["Region"].ToString()); + + JObject first = (JObject)region["First"]; + Assert.AreEqual(200, first["StatusCode"].Value()); + Assert.AreEqual(0, first["SubStatusCode"].Value()); + Assert.AreEqual(3.5, first["RequestCharge"].Value()); + } + + [TestMethod] + public void Summary_GatewayMode_SubStatusCodeExtraction() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + AddHttpResponseStatistic(trace, "East US", HttpStatusCode.TooManyRequests, 3200, 0, 5, baseTime); + AddHttpResponseStatistic(trace, "East US", HttpStatusCode.OK, 0, 5.0, 12, baseTime.AddSeconds(1)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject region = (JObject)parsed["Summary"]["RegionsSummary"][0]; + + JObject first = (JObject)region["First"]; + Assert.AreEqual(429, first["StatusCode"].Value()); + Assert.AreEqual(3200, first["SubStatusCode"].Value()); + } + + [TestMethod] + public void Summary_MixedDirectAndGateway() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + DateTime baseTime = DateTime.UtcNow; + + // Direct mode request + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, baseTime); + + // Gateway mode request + AddHttpResponseStatistic(trace, "West US 2", HttpStatusCode.OK, 0, 3.0, 15, baseTime.AddMilliseconds(100)); + + string summary = DiagnosticsSummaryWriter.WriteSummary(trace, 8192); + JObject parsed = JObject.Parse(summary); + JObject summaryObj = (JObject)parsed["Summary"]; + + // Both Direct and Gateway entries should be collected + Assert.AreEqual(2, summaryObj["TotalRequestCount"].Value()); + Assert.IsNotNull(summaryObj["TotalRequestCharge"]); + + JArray regions = (JArray)summaryObj["RegionsSummary"]; + Assert.AreEqual(1, regions.Count, "Both entries are in the same region"); + Assert.AreEqual(2, regions[0]["RequestCount"].Value()); + } + + #endregion + + #region Edge Case Tests + + [TestMethod] + [ExpectedException(typeof(ArgumentNullException))] + public void WriteSummary_NullTrace_ThrowsArgumentNullException() + { + DiagnosticsSummaryWriter.WriteSummary(null, 8192); + } + + [TestMethod] + public void ToString_InvalidEnumValue_FallsBackToDetailed() + { + using ITrace trace = Trace.GetRootTrace("ReadItemAsync"); + AddStoreResponseStatistic(trace, "West US 2", StatusCodes.Ok, SubStatusCodes.Unknown, 5.0, 10, DateTime.UtcNow); + + CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace); + + // Invalid enum value should fall back to detailed output (same as parameterless ToString) + string result = diagnostics.ToString((DiagnosticsVerbosity)99); + JObject parsed = JObject.Parse(result); + + Assert.IsNotNull(parsed["name"], "Invalid verbosity should produce detailed output with trace name"); + Assert.IsNull(parsed["Summary"]?["DiagnosticsVerbosity"], + "Should not contain Summary.DiagnosticsVerbosity since it is detailed output"); + } + + [TestMethod] + public void CosmosClientOptions_MaxSummarySizeBytes_CustomValuePropagated() + { + CosmosClientOptions options = new CosmosClientOptions + { + MaxDiagnosticsSummarySizeBytes = 16384 + }; + + Assert.AreEqual(16384, options.MaxDiagnosticsSummarySizeBytes); + } + + [TestMethod] + public void CosmosClientOptions_DiagnosticsVerbosity_CanBeSetToSummary() + { + CosmosClientOptions options = new CosmosClientOptions + { + DiagnosticsVerbosity = DiagnosticsVerbosity.Summary + }; + + Assert.AreEqual(DiagnosticsVerbosity.Summary, options.DiagnosticsVerbosity); + } + + #endregion + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs index 9640097210..8fdc8d94dd 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryRecorderTests.cs @@ -184,12 +184,12 @@ public async Task CheckResponseCompatibility() private static readonly Func> ChangefeedResponseFunc = (Type type) => { ConstructorInfo constructorInfo = type - .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(ReadOnlyCollection), typeof(double) }, null); + .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(ReadOnlyCollection), typeof(double), typeof(int) }, null); if (constructorInfo != null) { return (FeedResponse)constructorInfo.Invoke( new object[] { - NoOpTrace.Singleton, new List().AsReadOnly(), 10 }); + NoOpTrace.Singleton, new List().AsReadOnly(), 10, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes }); } return null; @@ -198,12 +198,12 @@ public async Task CheckResponseCompatibility() private static readonly Func> ChangeFeedEstimatorEmptyFeedResponseFunc = (Type type) => { ConstructorInfo constructorInfo = type - .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace) }, null); + .GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[] { typeof(ITrace), typeof(int) }, null); if (constructorInfo != null) { return (FeedResponse)constructorInfo.Invoke( new object[] { - NoOpTrace.Singleton}); + NoOpTrace.Singleton, CosmosClientOptions.DefaultMaxDiagnosticsSummarySizeBytes }); } return null; diff --git a/openspec/changes/diagnostics-compaction/design.md b/openspec/changes/diagnostics-compaction/design.md new file mode 100644 index 0000000000..1024dd9b82 --- /dev/null +++ b/openspec/changes/diagnostics-compaction/design.md @@ -0,0 +1,109 @@ +# Diagnostics Compaction — Design + +## Summary Compaction Algorithm + +### Data Collection + +Walk the `ITrace` tree (same traversal as `SummaryDiagnostics.CollectSummaryFromTraceTree()`) to collect all `StoreResponseStatistics` and `HttpResponseStatistics` entries from every `ClientSideRequestStatisticsTraceDatum` in the trace hierarchy. + +### Region Grouping + +Group collected entries by `Region` (string). Entries with a null/empty region are grouped under `"Unknown"`. + +### Per-Region Summary + +For each region group (ordered chronologically by request start time): + +1. **First**: Full details of the chronologically first request +2. **Last**: Full details of the chronologically last request (omitted if only 1 request) +3. **Middle entries** (all except first and last): Group by `(StatusCode, SubStatusCode)`: + - **Count**: Number of requests in this group + - **TotalRequestCharge**: Sum of RU charges + - **MinDurationMs / MaxDurationMs / P50DurationMs / AvgDurationMs**: Latency statistics + +### Size Enforcement + +1. Serialize the summary JSON +2. If `serializedBytes <= MaxDiagnosticsSummarySizeBytes` → return as-is +3. If `serializedBytes > MaxDiagnosticsSummarySizeBytes` → return truncated output + +### Handling Both Direct and Gateway Requests + +Both `StoreResponseStatistics` (direct mode) and `HttpResponseStatistics` (gateway mode) are collected and treated uniformly in the summary. The aggregated groups include entries from both transport paths. An optional `"TransportType"` field (`"Direct"` / `"Gateway"`) can be included in aggregated groups if needed to distinguish. + +## Request Flow + +```mermaid +flowchart TD + A["ToString(DiagnosticsVerbosity)"] --> B{Verbosity?} + B -->|Detailed| C["Existing TraceJsonWriter path"] + B -->|Summary| D["DiagnosticsSummaryWriter"] + D --> E["Walk ITrace tree"] + E --> F["Collect StoreResponseStatistics\n+ HttpResponseStatistics"] + F --> G["Group by Region"] + G --> H["Per region:\nFirst + Last + Aggregated Middle"] + H --> I["Serialize to JSON"] + I --> J{Size <= Max?} + J -->|Yes| K["Return summary JSON"] + J -->|No| L["Return truncated JSON"] + C --> M["Return full trace JSON"] +``` + +## Files Created + +| File | Description | +|------|-------------| +| `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsVerbosity.cs` | `DiagnosticsVerbosity` enum | +| `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs` | Summary computation and JSON serialization logic | + +## Files Modified + +| File | Change | +|------|--------| +| `CosmosClientOptions.cs` | Add `DiagnosticsVerbosity` and `MaxDiagnosticsSummarySizeBytes` properties with validation | +| `CosmosClientBuilder.cs` | Add `WithDiagnosticsVerbosity()` and `WithMaxDiagnosticsSummarySizeBytes()` builder methods | +| `ConfigurationManager.cs` | Add environment variable constants for diagnostics verbosity and max summary size | +| `CosmosDiagnostics.cs` | Add `ToString(DiagnosticsVerbosity)` abstract overload | +| `CosmosTraceDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload with `Lazy` caching; delegate to `DiagnosticsSummaryWriter` when verbosity is `Summary` | +| `EncryptionCosmosDiagnostics.cs` | Implement `ToString(DiagnosticsVerbosity)` overload (`SDKPROJECTREF`-gated) with caching | +| `ContainerCore.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `ReadManyQueryHelper.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `CosmosLinqQuery.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | +| `ChangeFeedEstimatorIterator.cs` | Wire `MaxDiagnosticsSummarySizeBytes` from options | + +## Contract/Baseline Updates + +| File | Change | +|------|--------| +| `ContractEnforcementTests.cs` baseline | Update public API contract for new enum and properties | + +## Alternatives Considered + +### Alternative 1: Emit summary alongside truncated trace tree +Instead of replacing the full trace, emit the summary _alongside_ the first + last children of the trace tree. + +**Pros:** Preserves some trace structure for tooling that parses it. +**Cons:** Larger output size; complex to implement; defeats the purpose of compaction. +**Decision:** Rejected — summary replaces the full trace. The `First` and `Last` entries in each region summary provide the detailed bookends. + +### Alternative 2: Per-request verbosity via RequestOptions +Add a `DiagnosticsVerbosity` property to `RequestOptions` for per-request control. + +**Pros:** More granular control. +**Cons:** Verbosity is a serialization concern, not a request concern. The `ToString(DiagnosticsVerbosity)` overload provides the same flexibility without complicating `RequestOptions`. +**Decision:** Deferred. Can be added later if needed. + +### Alternative 3: Transport type distinction in aggregated groups +Include a `TransportType` field (`"Direct"` / `"Gateway"`) in each aggregated group. + +**Pros:** Helps distinguish transport-specific issues. +**Cons:** Increases output size; `StatusCode/SubStatusCode` is usually sufficient. +**Decision:** Deferred. Can add later if customer feedback warrants it. + +## Key References + +- `Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs` — concrete diagnostics implementation +- `Microsoft.Azure.Cosmos/src/Diagnostics/DiagnosticsSummaryWriter.cs` — summary computation and JSON serialization +- `Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs` — current trace serialization (detailed mode) +- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/ClientSideRequestStatisticsTraceDatum.cs` — stats data (`StoreResponseStatistics`, `HttpResponseStatistics`) +- `Microsoft.Azure.Cosmos/src/Tracing/TraceData/PointOperationStatisticsTraceDatum.cs` — ActivityId source diff --git a/openspec/changes/diagnostics-compaction/proposal.md b/openspec/changes/diagnostics-compaction/proposal.md new file mode 100644 index 0000000000..b0db031a50 --- /dev/null +++ b/openspec/changes/diagnostics-compaction/proposal.md @@ -0,0 +1,71 @@ +# Diagnostics Compaction — Proposal + +## Problem + +`CosmosDiagnostics.ToString()` produces a JSON trace that grows **unboundedly** with retries. Each retry attempt creates a new child `ITrace` node containing a full `ClientSideRequestStatisticsTraceDatum` with complete `StoreResponseStatistics` and `HttpResponseStatistics` entries. In pathological scenarios (sustained 429 throttling, transient failures, cross-region failovers), a single operation's diagnostics can grow to hundreds of KB. + +**Impact:** +- **Log truncation** — monitoring systems (Application Insights, Azure Monitor, etc.) silently drop oversized log entries +- **Memory pressure** — large diagnostic strings increase GC overhead, especially at high throughput +- **Readability** — operators cannot quickly extract signal from noise when hundreds of identical retry entries are listed + +**Example scenario:** A point read that encounters 50 retries due to 429 throttling in West US 2, then fails over to East US 2 with 10 more retries, produces ~60 full `StoreResponseStatistics` entries in the trace tree. With summary mode, this compacts to: first request + last request + 1 aggregated group per region. + +## Proposed Approach + +Introduce a **`DiagnosticsVerbosity`** concept (modeled after [Azure/azure-sdk-for-rust#3592](https://github.com/Azure/azure-sdk-for-rust/pull/3592)) that controls how `CosmosDiagnostics.ToString()` serializes trace data: + +| Mode | Behavior | Use Case | +|------|----------|----------| +| **Detailed** (default) | Current behavior — full trace tree output | Debugging, development | +| **Summary** | Region-grouped compaction with first/last + aggregated middle | Production logging, size-constrained environments | + +**Key design principle:** The in-memory representation (`ITrace` tree, `ClientSideRequestStatisticsTraceDatum`) stays **unchanged**. Compaction only happens at **serialization time** in the `TraceJsonWriter` path. This preserves full programmatic access to diagnostics data while reducing serialized output size. + +## SDK Area + +- **Primary:** Diagnostics +- **Secondary:** Client-config (new options properties) + +## Preview vs GA + +The `DiagnosticsVerbosity` enum and related options should ship as **GA** (non-preview) since it's an additive, backward-compatible feature with no impact when not opted into. + +## Backward Compatibility + +- **Default is `Detailed`** — no behavioral change for existing users +- **No breaking changes** — `ToString()` output format only changes when `Summary` is explicitly opted into +- **Programmatic API unchanged** — `GetContactedRegions()`, `GetFailedRequestCount()`, etc. continue to work from the full in-memory trace regardless of verbosity + +## Rollout Strategy + +1. Ship with `Detailed` as default in initial release +2. Document `Summary` mode in SDK documentation and changelog +3. Consider making `Summary` the default in a future major version after customer feedback + +## Non-Goals + +- Changing the in-memory `ITrace` tree structure +- Modifying the `Detailed` mode output format +- Adding new programmatic APIs beyond `ToString(DiagnosticsVerbosity)` overload +- Per-request verbosity override via `RequestOptions` (can be added later) + +## Resolved Questions + +1. **Should `AggregatedGroups` include an `AvgDurationMs` field?** The Rust SDK only includes min/max/P50. Adding avg is cheap to compute but adds to the output size. _Decision: Include avg. It's a single field and provides useful signal._ + +2. **Should the summary include the `children` trace tree at all?** Currently proposed as replacing the entire trace output. An alternative is to emit the summary _alongside_ a truncated trace tree (e.g., first + last children only). _Decision: Summary replaces the full trace. The `First` and `Last` entries in each region summary provide the detailed bookends._ + +3. **Gateway vs Direct distinction in aggregated groups.** Should each `AggregatedGroup` indicate whether it's from Direct or Gateway transport? _Decision: Defer. The `StatusCode/SubStatusCode` combination is usually sufficient. Can add a `TransportType` field later if needed._ + +4. **Caching.** The Rust SDK caches serialized JSON per verbosity level via `OnceLock`. Should the .NET SDK cache the summary JSON? _Decision: Yes, use `Lazy` or similar. `ToString()` may be called multiple times (logging, telemetry, etc.)._ + +5. **Thread safety.** `CosmosDiagnostics.Verbosity` as a settable property on a potentially shared object needs consideration. _Decision: Use the `ToString(DiagnosticsVerbosity)` overload which avoids mutating state entirely. The property is set once from `CosmosClientOptions` during response creation and read during serialization._ + +## References + +- **Rust SDK PR:** [Azure/azure-sdk-for-rust#3592](https://github.com/Azure/azure-sdk-for-rust/pull/3592) — `DiagnosticsContext` with `Summary` and `Detailed` modes +- **Current .NET diagnostics:** `Microsoft.Azure.Cosmos/src/Diagnostics/` and `Microsoft.Azure.Cosmos/src/Tracing/` +- **Existing summary:** `SummaryDiagnostics.cs` — aggregates `(StatusCode, SubStatusCode)` counts (foundation to build on) +- **Trace tree:** `ITrace` → `Trace` with recursive children and `ClientSideRequestStatisticsTraceDatum` data +- **Related spec:** `openspec/specs/diagnostics-and-observability/spec.md` diff --git a/openspec/changes/diagnostics-compaction/tasks.md b/openspec/changes/diagnostics-compaction/tasks.md new file mode 100644 index 0000000000..6048a7aac1 --- /dev/null +++ b/openspec/changes/diagnostics-compaction/tasks.md @@ -0,0 +1,88 @@ +# Diagnostics Compaction — Tasks + +## Task 1: DiagnosticsVerbosity Enum & Options Plumbing + +**Scope:** Create the enum, add `DiagnosticsVerbosity` and `MaxDiagnosticsSummarySizeBytes` properties to `CosmosClientOptions`, add `ToString(DiagnosticsVerbosity)` abstract overload to `CosmosDiagnostics`, add environment variable support. + +**Acceptance:** `ToString(verbosity)` overloads compile and delegate correctly. Parameterless `ToString()` is unchanged (always `Detailed`). No behavioral change yet. + +**Spec requirements:** Diagnostics Verbosity (default verbosity, parameterless ToString, environment variable configuration, code-level override, verbosity precedence) + +## Task 2: Summary Computation Engine + +**Scope:** Implement `DiagnosticsSummaryWriter` — the core logic that walks the trace tree, collects stats, groups by region, computes first/last/aggregated groups, and produces the summary JSON structure. + +**Acceptance:** Given an `ITrace` tree, produces the correct summary JSON. Unit-testable in isolation. + +**Spec requirements:** Summary mode region grouping, first/last preservation, single request region, aggregated groups, mixed Direct and Gateway, region ordering + +## Task 3: Summary Serialization Integration + +**Scope:** Implement `CosmosTraceDiagnostics.ToString(DiagnosticsVerbosity)`. When `Summary`, delegate to `DiagnosticsSummaryWriter`. Implement size enforcement and truncated output fallback. Implement caching. Parameterless `ToString()` remains unchanged. + +**Acceptance:** `ToString(DiagnosticsVerbosity.Summary)` returns compact summary JSON. `ToString()` (parameterless) continues to return full `Detailed` trace. + +**Spec requirements:** In-memory trace tree unchanged, size enforcement, size under limit, summary mode caching, Summary JSON Format, truncated output format + +## Task 4: Contract Updates & Public API Validation + +**Scope:** Update `ContractEnforcementTests` baselines for new public API surface. Ensure the new enum and properties appear in contracts. + +**Acceptance:** All contract tests pass. Public API is correctly documented. + +## Task 5: Unit Tests + +**Scope:** Comprehensive unit tests for the summary engine. + +| Test | Description | Spec Requirement | +|------|-------------|------------------| +| `DiagnosticsVerbosity_DefaultIsDetailed` | Verify enum default | Default verbosity is Detailed | +| `CosmosClientOptions_DiagnosticsVerbosity_DefaultValue` | Verify options default | Default verbosity is Detailed | +| `CosmosClientOptions_MaxSummarySizeBytes_Validation` | Min 4096 enforced | MaxDiagnosticsSummarySizeBytes minimum validation | +| `CosmosClientOptions_DiagnosticsVerbosity_EnvVarFallback` | Env var populates options | Environment variable configuration | +| `CosmosClientOptions_DiagnosticsVerbosity_CodeOverridesEnvVar` | Code takes precedence | Code-level value overrides env var | +| `ToString_Overload_UsesSummary_WhenExplicit` | `ToString(Summary)` produces summary | Verbosity precedence | +| `Summary_SingleRegion_SingleRequest` | No deduplication, first only | Single request region | +| `Summary_SingleRegion_TwoRequests` | First + last, no middle | First/last preservation | +| `Summary_SingleRegion_ManyRetries_429` | First + last + 1 aggregated group | Aggregated groups | +| `Summary_MultiRegion_Failover` | Separate region summaries | Region grouping | +| `Summary_MixedStatusCodes` | Multiple aggregated groups per region | Aggregated groups | +| `Summary_DirectAndGateway_Combined` | Both transport types in summary | Mixed Direct and Gateway | +| `Summary_P50_OddCount` | Percentile on odd-sized collection | Aggregated groups | +| `Summary_P50_EvenCount` | Percentile on even-sized collection | Aggregated groups | +| `Summary_P50_SingleItem` | Percentile with 1 item | Aggregated groups | +| `Summary_SizeEnforcement_UnderLimit` | Summary fits within max size | Size under limit | +| `Summary_SizeEnforcement_OverLimit_Truncated` | Falls back to truncated output | Size enforcement | +| `Summary_EmptyTrace` | No requests produces minimal output | Region grouping | +| `Summary_RegionOrdering_Deterministic` | Regions sorted alphabetically | Region ordering | +| `Detailed_Mode_Unchanged` | Existing detailed output is byte-for-byte identical | Parameterless ToString | +| `ToString_Parameterless_AlwaysDetailed` | Parameterless always returns Detailed | Parameterless ToString | + +## Task 6: Integration Tests (Emulator) + +| Test | Description | Spec Requirement | +|------|-------------|------------------| +| `ReadItem_SummaryMode_ProducesValidJson` | Real read → summary JSON parses correctly | Summary JSON Format | +| `ReadItem_SummaryMode_SizeWithinLimit` | Summary output ≤ configured max bytes | Size under limit | +| `QueryItems_SummaryMode_MultipleRequests` | Query with continuations → summary compacts | Aggregated groups | +| `BulkOperations_SummaryMode_HighRetryCount` | Simulate throttling → verify compaction | Aggregated groups | +| `CrossRegion_SummaryMode_RegionGroups` | Multi-region → separate region summaries | Region grouping | + +## Task 7: Baseline / Golden-File Tests + +**Scope:** Create baseline JSON files for summary mode output (similar to existing `EndToEndTraceWriterBaselineTests`). Verify serialization stability across code changes. + +**Spec requirements:** Summary JSON Format, truncated output format + +## Task 8: Changelog & Documentation + +**Scope:** Update `changelog.md` with the new feature. Update `.github/copilot-instructions.md` if diagnostics verbosity affects AI assistant behavior. + +## Expected Size Reductions + +| Scenario | Detailed Size | Expected Summary Size | Reduction | +|----------|--------------|----------------------|-----------| +| 1 request, no retries | ~2 KB | ~1 KB | ~50% | +| 10 retries, same region | ~20 KB | ~2 KB | ~90% | +| 50 retries, 2 regions | ~100 KB | ~3 KB | ~97% | +| 100 retries, 3 regions | ~200 KB | ~4 KB | ~98% |