Skip to content

Commit 5c4a3f1

Browse files
Add crash/failure telemetry to MSBuild
Add CrashTelemetry data class and CrashTelemetryRecorder helper that capture rich exception information: exception type, inner exception type, stack trace hash (SHA-256 for bucketing without PII), top stack frame, HResult, exit type classification, criticality flag, MSBuild version, framework name, and host. CrashTelemetryRecorder centralizes recording and flushing logic used by all three crash telemetry emission points: 1. MSBuild.exe (XMake.Execute): - All catch blocks record crash telemetry via RecordCrashTelemetry - FlushCrashTelemetry in the finally block emits via TelemetryManager 2. API mode (BuildManager.EndBuild): - Catch block records crash telemetry for shutdown exceptions - _threadException (node crashes) is recorded before re-throwing - FlushCrashTelemetry emits via TelemetryManager 3. Unhandled exceptions (ExceptionHandling.UnhandledExceptionHandler): - RecordAndFlushCrashTelemetry immediately emits since process is dying All telemetry code is best-effort with catch-all guards to prevent secondary failures during crash handling. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 8dc8e37 commit 5c4a3f1

File tree

7 files changed

+415
-0
lines changed

7 files changed

+415
-0
lines changed

src/Build/BackEnd/BuildManager/BuildManager.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,7 @@ public void EndBuild()
10961096
catch (Exception e)
10971097
{
10981098
exceptionsThrownInEndBuild = true;
1099+
RecordCrashTelemetry(e, isUnhandled: false);
10991100

11001101
if (e is AggregateException ae && ae.InnerExceptions.Count == 1)
11011102
{
@@ -1180,6 +1181,13 @@ public void EndBuild()
11801181

11811182
MSBuildEventSource.Log.BuildStop();
11821183

1184+
if (_threadException is not null)
1185+
{
1186+
RecordCrashTelemetry(_threadException.SourceException, isUnhandled: true);
1187+
}
1188+
1189+
CrashTelemetryRecorder.FlushCrashTelemetry();
1190+
11831191
_threadException?.Throw();
11841192

11851193
if (BuildParameters.DumpOpportunisticInternStats)
@@ -1217,6 +1225,21 @@ private void EndBuildTelemetry()
12171225
includeTargetDetails: false));
12181226
}
12191227

1228+
/// <summary>
1229+
/// Records crash telemetry data for later emission via <see cref="CrashTelemetryRecorder.FlushCrashTelemetry"/>.
1230+
/// </summary>
1231+
private void RecordCrashTelemetry(Exception exception, bool isUnhandled)
1232+
{
1233+
CrashTelemetryRecorder.RecordCrashTelemetry(
1234+
exception,
1235+
isUnhandled ? "UnhandledException" : exception.GetType().Name,
1236+
isUnhandled,
1237+
ExceptionHandling.IsCriticalException(exception),
1238+
ProjectCollection.Version?.ToString(),
1239+
NativeMethodsShared.FrameworkName,
1240+
_buildTelemetry?.BuildEngineHost);
1241+
}
1242+
12201243
/// <summary>
12211244
/// Convenience method. Submits a lone build request and blocks until results are available.
12221245
/// </summary>
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Runtime.CompilerServices;
7+
using System.Security.Cryptography;
8+
using System.Text;
9+
10+
namespace Microsoft.Build.Framework.Telemetry;
11+
12+
/// <summary>
13+
/// Telemetry data for MSBuild crashes and unhandled exceptions.
14+
/// </summary>
15+
internal class CrashTelemetry : TelemetryBase, IActivityTelemetryDataHolder
16+
{
17+
public override string EventName => "crash";
18+
19+
/// <summary>
20+
/// The full name of the exception type (e.g., "System.NullReferenceException").
21+
/// </summary>
22+
public string? ExceptionType { get; set; }
23+
24+
/// <summary>
25+
/// Inner exception type, if any.
26+
/// </summary>
27+
public string? InnerExceptionType { get; set; }
28+
29+
/// <summary>
30+
/// The exit type / category of the crash (e.g., "LoggerFailure", "Unexpected", "UnhandledException").
31+
/// </summary>
32+
public string? ExitType { get; set; }
33+
34+
/// <summary>
35+
/// Whether the exception is classified as critical (OOM, StackOverflow, AccessViolation, etc.).
36+
/// </summary>
37+
public bool? IsCritical { get; set; }
38+
39+
/// <summary>
40+
/// Whether the crash came from the unhandled exception handler (true) or a catch block (false).
41+
/// </summary>
42+
public bool IsUnhandled { get; set; }
43+
44+
/// <summary>
45+
/// SHA-256 hash of the stack trace, for bucketing without sending PII.
46+
/// </summary>
47+
public string? StackHash { get; set; }
48+
49+
/// <summary>
50+
/// The method at the top of the call stack where the exception originated.
51+
/// </summary>
52+
public string? StackTop { get; set; }
53+
54+
/// <summary>
55+
/// The HResult from the exception, if available.
56+
/// </summary>
57+
public int? HResult { get; set; }
58+
59+
/// <summary>
60+
/// Version of MSBuild.
61+
/// </summary>
62+
public string? BuildEngineVersion { get; set; }
63+
64+
/// <summary>
65+
/// Framework name (.NET 10.0, .NET Framework 4.7.2, etc.).
66+
/// </summary>
67+
public string? BuildEngineFrameworkName { get; set; }
68+
69+
/// <summary>
70+
/// Host in which MSBuild is running (VS, VSCode, CLI, etc.).
71+
/// </summary>
72+
public string? BuildEngineHost { get; set; }
73+
74+
/// <summary>
75+
/// Timestamp when the crash occurred.
76+
/// </summary>
77+
public DateTime? CrashTimestamp { get; set; }
78+
79+
/// <summary>
80+
/// Populates this instance from an exception.
81+
/// </summary>
82+
public void PopulateFromException(Exception exception)
83+
{
84+
ExceptionType = exception.GetType().FullName;
85+
InnerExceptionType = exception.InnerException?.GetType().FullName;
86+
HResult = exception.HResult;
87+
CrashTimestamp = DateTime.UtcNow;
88+
StackHash = ComputeStackHash(exception);
89+
StackTop = ExtractStackTop(exception);
90+
}
91+
92+
/// <summary>
93+
/// Create a list of properties sent to VS telemetry as activity tags.
94+
/// </summary>
95+
public Dictionary<string, object> GetActivityProperties()
96+
{
97+
Dictionary<string, object> telemetryItems = new(10);
98+
99+
AddIfNotNull(ExceptionType);
100+
AddIfNotNull(InnerExceptionType);
101+
AddIfNotNull(ExitType);
102+
AddIfNotNull(IsCritical);
103+
AddIfNotNull(IsUnhandled);
104+
AddIfNotNull(StackHash);
105+
AddIfNotNull(StackTop);
106+
AddIfNotNull(HResult);
107+
AddIfNotNull(BuildEngineVersion);
108+
AddIfNotNull(BuildEngineFrameworkName);
109+
AddIfNotNull(BuildEngineHost);
110+
111+
if (CrashTimestamp.HasValue)
112+
{
113+
telemetryItems.Add(nameof(CrashTimestamp), CrashTimestamp.Value.ToString("O"));
114+
}
115+
116+
return telemetryItems;
117+
118+
void AddIfNotNull(object? value, [CallerArgumentExpression(nameof(value))] string key = "")
119+
{
120+
if (value is not null)
121+
{
122+
telemetryItems.Add(key, value);
123+
}
124+
}
125+
}
126+
127+
public override IDictionary<string, string> GetProperties()
128+
{
129+
var properties = new Dictionary<string, string>();
130+
131+
AddIfNotNull(ExceptionType);
132+
AddIfNotNull(InnerExceptionType);
133+
AddIfNotNull(ExitType);
134+
AddIfNotNull(IsCritical?.ToString(), nameof(IsCritical));
135+
AddIfNotNull(IsUnhandled.ToString(), nameof(IsUnhandled));
136+
AddIfNotNull(StackHash);
137+
AddIfNotNull(StackTop);
138+
AddIfNotNull(HResult?.ToString(), nameof(HResult));
139+
AddIfNotNull(BuildEngineVersion);
140+
AddIfNotNull(BuildEngineFrameworkName);
141+
AddIfNotNull(BuildEngineHost);
142+
AddIfNotNull(CrashTimestamp?.ToString("O"), nameof(CrashTimestamp));
143+
144+
return properties;
145+
146+
void AddIfNotNull(string? value, [CallerArgumentExpression(nameof(value))] string key = "")
147+
{
148+
if (value is not null)
149+
{
150+
properties[key] = value;
151+
}
152+
}
153+
}
154+
155+
/// <summary>
156+
/// Computes a SHA-256 hash of the exception stack trace for bucketing without PII.
157+
/// </summary>
158+
private static string? ComputeStackHash(Exception exception)
159+
{
160+
string? stackTrace = exception.StackTrace;
161+
if (stackTrace is null)
162+
{
163+
return null;
164+
}
165+
166+
#if NET
167+
byte[] hashBytes = SHA256.HashData(Encoding.UTF8.GetBytes(stackTrace));
168+
return Convert.ToHexString(hashBytes);
169+
#else
170+
using SHA256 sha256 = SHA256.Create();
171+
byte[] hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(stackTrace));
172+
StringBuilder sb = new(hashBytes.Length * 2);
173+
foreach (byte b in hashBytes)
174+
{
175+
sb.Append(b.ToString("X2"));
176+
}
177+
178+
return sb.ToString();
179+
#endif
180+
}
181+
182+
/// <summary>
183+
/// Extracts the top frame of the stack trace to identify the crash location.
184+
/// </summary>
185+
private static string? ExtractStackTop(Exception exception)
186+
{
187+
string? stackTrace = exception.StackTrace;
188+
if (stackTrace is null)
189+
{
190+
return null;
191+
}
192+
193+
// Get the first line of the stack trace (the top frame).
194+
int newLineIndex = stackTrace.IndexOf('\n');
195+
string topFrame = newLineIndex >= 0 ? stackTrace.Substring(0, newLineIndex) : stackTrace;
196+
return topFrame.Trim();
197+
}
198+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
7+
namespace Microsoft.Build.Framework.Telemetry;
8+
9+
/// <summary>
10+
/// Centralized helper for recording and flushing crash/failure telemetry.
11+
/// All methods are best-effort and will never throw.
12+
/// </summary>
13+
internal static class CrashTelemetryRecorder
14+
{
15+
/// <summary>
16+
/// Records crash telemetry data for later emission via <see cref="FlushCrashTelemetry"/>.
17+
/// </summary>
18+
/// <param name="exception">The exception that caused the crash.</param>
19+
/// <param name="exitType">Exit type classification (e.g. "LoggerFailure", "Unexpected").</param>
20+
/// <param name="isUnhandled">True if the exception was not caught by any catch block.</param>
21+
/// <param name="isCritical">Whether the exception is classified as critical (OOM, StackOverflow, etc.).</param>
22+
/// <param name="buildEngineVersion">MSBuild version string, if available.</param>
23+
/// <param name="buildEngineFrameworkName">Framework name, if available.</param>
24+
/// <param name="buildEngineHost">Host name (VS, VSCode, CLI, etc.), if available.</param>
25+
public static void RecordCrashTelemetry(
26+
Exception exception,
27+
string exitType,
28+
bool isUnhandled,
29+
bool isCritical,
30+
string? buildEngineVersion = null,
31+
string? buildEngineFrameworkName = null,
32+
string? buildEngineHost = null)
33+
{
34+
try
35+
{
36+
CrashTelemetry crashTelemetry = CreateCrashTelemetry(exception, exitType, isUnhandled, isCritical);
37+
crashTelemetry.BuildEngineVersion = buildEngineVersion;
38+
crashTelemetry.BuildEngineFrameworkName = buildEngineFrameworkName;
39+
crashTelemetry.BuildEngineHost = buildEngineHost;
40+
KnownTelemetry.CrashTelemetry = crashTelemetry;
41+
}
42+
catch
43+
{
44+
// Best effort: telemetry must never cause a secondary failure.
45+
}
46+
}
47+
48+
/// <summary>
49+
/// Records crash telemetry and immediately flushes it.
50+
/// Use when the process is about to terminate (e.g. unhandled exception handler).
51+
/// </summary>
52+
[MethodImpl(MethodImplOptions.NoInlining)]
53+
public static void RecordAndFlushCrashTelemetry(
54+
Exception exception,
55+
string exitType,
56+
bool isUnhandled,
57+
bool isCritical)
58+
{
59+
try
60+
{
61+
CrashTelemetry crashTelemetry = CreateCrashTelemetry(exception, exitType, isUnhandled, isCritical);
62+
63+
TelemetryManager.Instance?.Initialize(isStandalone: false);
64+
65+
using IActivity? activity = TelemetryManager.Instance
66+
?.DefaultActivitySource
67+
?.StartActivity(TelemetryConstants.Crash);
68+
activity?.SetTags(crashTelemetry);
69+
}
70+
catch
71+
{
72+
// Best effort: telemetry must never cause a secondary failure.
73+
}
74+
}
75+
76+
/// <summary>
77+
/// Flushes any pending crash telemetry via the telemetry manager.
78+
/// Requires that TelemetryManager has already been initialized by the caller.
79+
/// </summary>
80+
[MethodImpl(MethodImplOptions.NoInlining)]
81+
public static void FlushCrashTelemetry()
82+
{
83+
try
84+
{
85+
CrashTelemetry? crashTelemetry = KnownTelemetry.CrashTelemetry;
86+
if (crashTelemetry is null)
87+
{
88+
return;
89+
}
90+
91+
KnownTelemetry.CrashTelemetry = null;
92+
93+
// Do not call TelemetryManager.Initialize here — the caller (Main or BuildManager)
94+
// is responsible for initialization. Calling Initialize from here would create a
95+
// VS telemetry session when tests call MSBuildApp.Execute() in-process, causing
96+
// environment variable side effects.
97+
using IActivity? activity = TelemetryManager.Instance
98+
?.DefaultActivitySource
99+
?.StartActivity(TelemetryConstants.Crash);
100+
activity?.SetTags(crashTelemetry);
101+
}
102+
catch
103+
{
104+
// Best effort: telemetry must never cause a secondary failure.
105+
}
106+
}
107+
108+
private static CrashTelemetry CreateCrashTelemetry(
109+
Exception exception,
110+
string exitType,
111+
bool isUnhandled,
112+
bool isCritical)
113+
{
114+
CrashTelemetry crashTelemetry = new();
115+
crashTelemetry.PopulateFromException(exception);
116+
crashTelemetry.ExitType = exitType;
117+
crashTelemetry.IsCritical = isCritical;
118+
crashTelemetry.IsUnhandled = isUnhandled;
119+
return crashTelemetry;
120+
}
121+
}

src/Framework/Telemetry/KnownTelemetry.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,10 @@ internal static class KnownTelemetry
2525
/// Describes if and how BuildCheck was used.
2626
/// </summary>
2727
public static BuildCheckTelemetry BuildCheckTelemetry { get; } = new BuildCheckTelemetry();
28+
29+
/// <summary>
30+
/// Partial Telemetry for crash/failure reporting.
31+
/// This is set when a crash occurs and emitted before the process exits.
32+
/// </summary>
33+
public static CrashTelemetry? CrashTelemetry { get; set; }
2834
}

src/Framework/Telemetry/TelemetryConstants.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,9 @@ internal static class TelemetryConstants
4747
/// Name of the property for build activity.
4848
/// </summary>
4949
public const string Build = "Build";
50+
51+
/// <summary>
52+
/// Name of the property for crash/failure activity.
53+
/// </summary>
54+
public const string Crash = "Crash";
5055
}

0 commit comments

Comments
 (0)