Skip to content
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ public CosmosTraceDiagnostics(ITrace trace)

public override string ToString()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
return this.ToJsonString();
}

Comment thread
kirankumarkolli marked this conversation as resolved.
return this.ToJsonString();
}

Expand All @@ -50,16 +56,31 @@ public override TimeSpan GetClientElapsedTime()

public override IReadOnlyList<(string regionName, Uri uri)> GetContactedRegions()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.Value?.Summary?.RegionsContacted;
}

public override ServerSideCumulativeMetrics GetQueryMetrics()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.accumulatedMetrics.Value;
}

internal bool IsGoneExceptionHit()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.WalkTraceTreeForGoneException(this.Value);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Microsoft.Azure.Cosmos.Query.Core.Metrics
{
using System;
using System.Linq;
using Microsoft.Azure.Cosmos.Tracing;
using Microsoft.Azure.Cosmos.Tracing.TraceData;

Expand All @@ -17,6 +18,12 @@ public static void WalkTraceTreeForQueryMetrics(ITrace currentTrace, ServerSideM
return;
Comment thread
kirankumarkolli marked this conversation as resolved.
}

// Assert that walking state is set
if (currentTrace is Tracing.Trace concreteTrace)
{
System.Diagnostics.Debug.Assert(concreteTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");
}

foreach (object datum in currentTrace.Data.Values)
{
if (datum is QueryMetricsTraceDatum queryMetricsTraceDatum)
Expand All @@ -41,6 +48,12 @@ private static void WalkTraceTreeForPartitionInfo(ITrace currentTrace, ServerSid
return;
}

// Assert that walking state is set
if (currentTrace is Tracing.Trace concreteTrace)
{
System.Diagnostics.Debug.Assert(concreteTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");
}

foreach (Object datum in currentTrace.Data.Values)
{
if (datum is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
Expand Down
71 changes: 67 additions & 4 deletions Microsoft.Azure.Cosmos/src/Tracing/Trace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ internal sealed class Trace : ITrace
private readonly List<ITrace> children;
private readonly Lazy<Dictionary<string, object>> data;
private ValueStopwatch stopwatch;
private volatile bool isBeingWalked;

private Trace(
string name,
Expand Down Expand Up @@ -58,6 +59,8 @@ private Trace(

public IReadOnlyDictionary<string, object> Data => this.data.IsValueCreated ? this.data.Value : Trace.EmptyDictionary;

internal bool IsBeingWalked => this.isBeingWalked;
Comment thread
kirankumarkolli marked this conversation as resolved.
Outdated

public void Dispose()
{
this.stopwatch.Stop();
Expand All @@ -66,6 +69,11 @@ public void Dispose()
public ITrace StartChild(
string name)
{
if (this.isBeingWalked)
{
return NoOpTrace.Singleton;
}

return this.StartChild(
name,
level: TraceLevel.Verbose,
Expand All @@ -77,6 +85,11 @@ public ITrace StartChild(
TraceComponent component,
TraceLevel level)
{
if (this.isBeingWalked)
{
return NoOpTrace.Singleton;
}

if (this.Parent != null && !this.stopwatch.IsRunning)
{
return this.Parent.StartChild(name, component, level);
Expand All @@ -97,6 +110,11 @@ public void AddChild(ITrace child)
{
lock (this.children)
{
if (this.isBeingWalked)
{
return; // Ignore modifications while being walked
}

this.children.Add(child);
}
}
Expand Down Expand Up @@ -124,18 +142,63 @@ public static Trace GetRootTrace(

public void AddDatum(string key, TraceDatum traceDatum)
{
this.data.Value.Add(key, traceDatum);
this.Summary.UpdateRegionContacted(traceDatum);
lock (this.children)
{
if (this.isBeingWalked)
{
return; // Ignore modifications while being walked
}

this.data.Value.Add(key, traceDatum);
this.Summary.UpdateRegionContacted(traceDatum);
}
}

public void AddDatum(string key, object value)
{
this.data.Value.Add(key, value);
lock (this.children)
{
if (this.isBeingWalked)
{
return; // Ignore modifications while being walked
}

this.data.Value.Add(key, value);
}
}

public void AddOrUpdateDatum(string key, object value)
{
this.data.Value[key] = value;
lock (this.children)
{
if (this.isBeingWalked)
{
return; // Ignore modifications while being walked
}

this.data.Value[key] = value;
}
}

internal void SetWalkingStateRecursively()
{
lock (this.children)
{
if (this.isBeingWalked)
{
return; // Already set, return early
}

this.isBeingWalked = true;
Comment thread
kirankumarkolli marked this conversation as resolved.
Outdated

foreach (ITrace child in this.children)
{
if (child is Trace concreteChild)
{
concreteChild.SetWalkingStateRecursively();
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,15 @@ public SummaryDiagnostics(ITrace trace)

private void CollectSummaryFromTraceTree(ITrace currentTrace)
{
foreach (object datums in currentTrace.Data.Values)
// Assert that walking state is set
if (currentTrace is Trace concreteTrace)
{
if (datums is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
System.Diagnostics.Debug.Assert(concreteTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");
}

foreach (var datum in currentTrace.Data)
{
if (datum.Value is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
{
this.AggregateStatsFromStoreResults(clientSideRequestStatisticsTraceDatum.StoreResponseStatisticsList);
this.AggregateGatewayStatistics(clientSideRequestStatisticsTraceDatum.HttpResponseStatisticsList);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public static void WriteTrace(
writer.WriteFieldName("duration in milliseconds");
writer.WriteNumberValue(trace.Duration.TotalMilliseconds);

Comment thread
kirankumarkolli marked this conversation as resolved.
// Use direct enumeration protected by walking state
if (trace.Data.Any())
{
writer.WriteFieldName("data");
Expand All @@ -70,6 +71,7 @@ public static void WriteTrace(
writer.WriteObjectEnd();
}

// Use direct enumeration protected by walking state
if (trace.Children.Any())
{
writer.WriteFieldName("children");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ namespace Microsoft.Azure.Cosmos.Tests
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Azure.Cosmos.Diagnostics;
using Microsoft.Azure.Cosmos.Tracing;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using Microsoft.Azure.Documents;
Expand Down Expand Up @@ -137,6 +138,78 @@ public async Task ValidateActivityIdWithSynchronizationContext()
}
}

[TestMethod]
public void ValidateConcurrentToStringCalls()
{
// Create a trace and simulate concurrent access while calling ToString()
using ITrace trace = Microsoft.Azure.Cosmos.Tracing.Trace.GetRootTrace("concurrency-test");

Exception caughtException = null;
const int numThreads = 10;
const int numIterations = 100;
using CountdownEvent countdown = new CountdownEvent(numThreads);

// Start multiple threads that will concurrently modify the trace
for (int i = 0; i < numThreads; i++)
{
int threadId = i;
Task.Run(() =>
{
try
{
for (int j = 0; j < numIterations; j++)
{
// Add children and data concurrently
using ITrace child = trace.StartChild($"child-{threadId}-{j}");
child.AddDatum($"key-{threadId}-{j}", $"value-{threadId}-{j}");

// Simulate some work
Thread.Sleep(1);
}
}
catch (Exception ex)
{
Interlocked.CompareExchange(ref caughtException, ex, null);
}
finally
{
countdown.Signal();
}
});
}

// Concurrently call ToString() which caused the original issue
Task toStringTask = Task.Run(() =>
{
try
{
for (int i = 0; i < numIterations; i++)
{
CosmosTraceDiagnostics diagnostics = new CosmosTraceDiagnostics(trace);
string diagnosticsString = diagnostics.ToString();
Assert.IsNotNull(diagnosticsString);

// Simulate some work
Thread.Sleep(1);
}
}
catch (Exception ex)
{
Interlocked.CompareExchange(ref caughtException, ex, null);
}
});

// Wait for all threads to complete
countdown.Wait(TimeSpan.FromSeconds(30));
toStringTask.Wait(TimeSpan.FromSeconds(30));

// Verify no exceptions occurred
if (caughtException != null)
{
Assert.Fail($"Concurrent access caused exception: {caughtException}");
}
}

private Task<Guid> ValidateActivityIdHelper()
{
Guid activityId = System.Diagnostics.Trace.CorrelationManager.ActivityId;
Expand Down