Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/Build/BackEnd/BuildManager/BuildManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ public void EndBuild()
catch (Exception e)
{
exceptionsThrownInEndBuild = true;
RecordCrashTelemetry(e, isUnhandled: false);

if (e is AggregateException ae && ae.InnerExceptions.Count == 1)
{
Expand Down Expand Up @@ -1180,6 +1181,13 @@ public void EndBuild()

MSBuildEventSource.Log.BuildStop();

if (_threadException is not null)
{
RecordCrashTelemetry(_threadException.SourceException, isUnhandled: true);
}

CrashTelemetryRecorder.FlushCrashTelemetry();

_threadException?.Throw();

if (BuildParameters.DumpOpportunisticInternStats)
Expand Down Expand Up @@ -1217,6 +1225,21 @@ private void EndBuildTelemetry()
includeTargetDetails: false));
}

/// <summary>
/// Records crash telemetry data for later emission via <see cref="CrashTelemetryRecorder.FlushCrashTelemetry"/>.
/// </summary>
private void RecordCrashTelemetry(Exception exception, bool isUnhandled)
{
CrashTelemetryRecorder.RecordCrashTelemetry(
exception,
isUnhandled ? "UnhandledException" : exception.GetType().Name,
isUnhandled,
ExceptionHandling.IsCriticalException(exception),
ProjectCollection.Version?.ToString(),
NativeMethodsShared.FrameworkName,
_buildTelemetry?.BuildEngineHost);
}

/// <summary>
/// Convenience method. Submits a lone build request and blocks until results are available.
/// </summary>
Expand Down
198 changes: 198 additions & 0 deletions src/Framework/Telemetry/CrashTelemetry.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Text;

namespace Microsoft.Build.Framework.Telemetry;

/// <summary>
/// Telemetry data for MSBuild crashes and unhandled exceptions.
/// </summary>
internal class CrashTelemetry : TelemetryBase, IActivityTelemetryDataHolder
{
public override string EventName => "crash";

/// <summary>
/// The full name of the exception type (e.g., "System.NullReferenceException").
/// </summary>
public string? ExceptionType { get; set; }

/// <summary>
/// Inner exception type, if any.
/// </summary>
public string? InnerExceptionType { get; set; }

/// <summary>
/// The exit type / category of the crash (e.g., "LoggerFailure", "Unexpected", "UnhandledException").
/// </summary>
public string? ExitType { get; set; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typing this string seems a bit too liberal?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know!
it's a version 1, before that we haven't had insights in the crashes - once it's available and analyzed by the team, the corresponding adjustments can be made.


/// <summary>
/// Whether the exception is classified as critical (OOM, StackOverflow, AccessViolation, etc.).
/// </summary>
public bool? IsCritical { get; set; }

/// <summary>
/// Whether the crash came from the unhandled exception handler (true) or a catch block (false).
/// </summary>
public bool IsUnhandled { get; set; }

/// <summary>
/// SHA-256 hash of the stack trace, for bucketing without sending PII.
/// </summary>
public string? StackHash { get; set; }

/// <summary>
/// The method at the top of the call stack where the exception originated.
/// </summary>
public string? StackTop { get; set; }

/// <summary>
/// The HResult from the exception, if available.
/// </summary>
public int? HResult { get; set; }

/// <summary>
/// Version of MSBuild.
/// </summary>
public string? BuildEngineVersion { get; set; }

/// <summary>
/// Framework name (.NET 10.0, .NET Framework 4.7.2, etc.).
/// </summary>
public string? BuildEngineFrameworkName { get; set; }

/// <summary>
/// Host in which MSBuild is running (VS, VSCode, CLI, etc.).
/// </summary>
public string? BuildEngineHost { get; set; }

/// <summary>
/// Timestamp when the crash occurred.
/// </summary>
public DateTime? CrashTimestamp { get; set; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why? isn't this already included in the data field in the database on ingestion?


/// <summary>
/// Populates this instance from an exception.
/// </summary>
public void PopulateFromException(Exception exception)
{
ExceptionType = exception.GetType().FullName;
InnerExceptionType = exception.InnerException?.GetType().FullName;
HResult = exception.HResult;
CrashTimestamp = DateTime.UtcNow;
StackHash = ComputeStackHash(exception);
StackTop = ExtractStackTop(exception);
}

/// <summary>
/// Create a list of properties sent to VS telemetry as activity tags.
/// </summary>
public Dictionary<string, object> GetActivityProperties()
{
Dictionary<string, object> telemetryItems = new(10);

AddIfNotNull(ExceptionType);
AddIfNotNull(InnerExceptionType);
AddIfNotNull(ExitType);
AddIfNotNull(IsCritical);
AddIfNotNull(IsUnhandled);
AddIfNotNull(StackHash);
AddIfNotNull(StackTop);
AddIfNotNull(HResult);
AddIfNotNull(BuildEngineVersion);
AddIfNotNull(BuildEngineFrameworkName);
AddIfNotNull(BuildEngineHost);

if (CrashTimestamp.HasValue)
{
telemetryItems.Add(nameof(CrashTimestamp), CrashTimestamp.Value.ToString("O"));
}

return telemetryItems;

void AddIfNotNull(object? value, [CallerArgumentExpression(nameof(value))] string key = "")
{
if (value is not null)
{
telemetryItems.Add(key, value);
}
}
}

public override IDictionary<string, string> GetProperties()
{
var properties = new Dictionary<string, string>();

AddIfNotNull(ExceptionType);
AddIfNotNull(InnerExceptionType);
AddIfNotNull(ExitType);
AddIfNotNull(IsCritical?.ToString(), nameof(IsCritical));
AddIfNotNull(IsUnhandled.ToString(), nameof(IsUnhandled));
AddIfNotNull(StackHash);
AddIfNotNull(StackTop);
AddIfNotNull(HResult?.ToString(), nameof(HResult));
AddIfNotNull(BuildEngineVersion);
AddIfNotNull(BuildEngineFrameworkName);
AddIfNotNull(BuildEngineHost);
AddIfNotNull(CrashTimestamp?.ToString("O"), nameof(CrashTimestamp));

return properties;

void AddIfNotNull(string? value, [CallerArgumentExpression(nameof(value))] string key = "")
{
if (value is not null)
{
properties[key] = value;
}
}
}

/// <summary>
/// Computes a SHA-256 hash of the exception stack trace for bucketing without PII.
/// </summary>
private static string? ComputeStackHash(Exception exception)
{
string? stackTrace = exception.StackTrace;
if (stackTrace is null)
{
return null;
}

#if NET
byte[] hashBytes = SHA256.HashData(Encoding.UTF8.GetBytes(stackTrace));
return Convert.ToHexString(hashBytes);
#else
using SHA256 sha256 = SHA256.Create();
byte[] hashBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(stackTrace));
StringBuilder sb = new(hashBytes.Length * 2);
foreach (byte b in hashBytes)
{
sb.Append(b.ToString("X2"));
}

return sb.ToString();
#endif
}

/// <summary>
/// Extracts the top frame of the stack trace to identify the crash location.
/// </summary>
private static string? ExtractStackTop(Exception exception)
{
string? stackTrace = exception.StackTrace;
if (stackTrace is null)
{
return null;
}

// Get the first line of the stack trace (the top frame).
int newLineIndex = stackTrace.IndexOf('\n');
string topFrame = newLineIndex >= 0 ? stackTrace.Substring(0, newLineIndex) : stackTrace;
return topFrame.Trim();
}
}
121 changes: 121 additions & 0 deletions src/Framework/Telemetry/CrashTelemetryRecorder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;

namespace Microsoft.Build.Framework.Telemetry;

/// <summary>
/// Centralized helper for recording and flushing crash/failure telemetry.
/// All methods are best-effort and will never throw.
/// </summary>
internal static class CrashTelemetryRecorder
{
/// <summary>
/// Records crash telemetry data for later emission via <see cref="FlushCrashTelemetry"/>.
/// </summary>
/// <param name="exception">The exception that caused the crash.</param>
/// <param name="exitType">Exit type classification (e.g. "LoggerFailure", "Unexpected").</param>
/// <param name="isUnhandled">True if the exception was not caught by any catch block.</param>
/// <param name="isCritical">Whether the exception is classified as critical (OOM, StackOverflow, etc.).</param>
/// <param name="buildEngineVersion">MSBuild version string, if available.</param>
/// <param name="buildEngineFrameworkName">Framework name, if available.</param>
/// <param name="buildEngineHost">Host name (VS, VSCode, CLI, etc.), if available.</param>
public static void RecordCrashTelemetry(
Exception exception,
string exitType,
bool isUnhandled,
bool isCritical,
string? buildEngineVersion = null,
string? buildEngineFrameworkName = null,
string? buildEngineHost = null)
{
try
{
CrashTelemetry crashTelemetry = CreateCrashTelemetry(exception, exitType, isUnhandled, isCritical);
crashTelemetry.BuildEngineVersion = buildEngineVersion;
crashTelemetry.BuildEngineFrameworkName = buildEngineFrameworkName;
crashTelemetry.BuildEngineHost = buildEngineHost;
KnownTelemetry.CrashTelemetry = crashTelemetry;
}
catch
{
// Best effort: telemetry must never cause a secondary failure.
}
}

/// <summary>
/// Records crash telemetry and immediately flushes it.
/// Use when the process is about to terminate (e.g. unhandled exception handler).
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
public static void RecordAndFlushCrashTelemetry(
Exception exception,
string exitType,
bool isUnhandled,
bool isCritical)
{
try
{
CrashTelemetry crashTelemetry = CreateCrashTelemetry(exception, exitType, isUnhandled, isCritical);

TelemetryManager.Instance?.Initialize(isStandalone: false);

using IActivity? activity = TelemetryManager.Instance
?.DefaultActivitySource
?.StartActivity(TelemetryConstants.Crash);
activity?.SetTags(crashTelemetry);
}
catch
{
// Best effort: telemetry must never cause a secondary failure.
}
}

/// <summary>
/// Flushes any pending crash telemetry via the telemetry manager.
/// Requires that TelemetryManager has already been initialized by the caller.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
public static void FlushCrashTelemetry()
{
try
{
CrashTelemetry? crashTelemetry = KnownTelemetry.CrashTelemetry;
if (crashTelemetry is null)
{
return;
}

KnownTelemetry.CrashTelemetry = null;

// Do not call TelemetryManager.Initialize here — the caller (Main or BuildManager)
// is responsible for initialization. Calling Initialize from here would create a
// VS telemetry session when tests call MSBuildApp.Execute() in-process, causing
// environment variable side effects.
using IActivity? activity = TelemetryManager.Instance
?.DefaultActivitySource
?.StartActivity(TelemetryConstants.Crash);
activity?.SetTags(crashTelemetry);
}
catch
{
// Best effort: telemetry must never cause a secondary failure.
}
}

private static CrashTelemetry CreateCrashTelemetry(
Exception exception,
string exitType,
bool isUnhandled,
bool isCritical)
{
CrashTelemetry crashTelemetry = new();
crashTelemetry.PopulateFromException(exception);
crashTelemetry.ExitType = exitType;
crashTelemetry.IsCritical = isCritical;
crashTelemetry.IsUnhandled = isUnhandled;
return crashTelemetry;
}
}
6 changes: 6 additions & 0 deletions src/Framework/Telemetry/KnownTelemetry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,10 @@ internal static class KnownTelemetry
/// Describes if and how BuildCheck was used.
/// </summary>
public static BuildCheckTelemetry BuildCheckTelemetry { get; } = new BuildCheckTelemetry();

/// <summary>
/// Partial Telemetry for crash/failure reporting.
/// This is set when a crash occurs and emitted before the process exits.
/// </summary>
public static CrashTelemetry? CrashTelemetry { get; set; }
}
5 changes: 5 additions & 0 deletions src/Framework/Telemetry/TelemetryConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,9 @@ internal static class TelemetryConstants
/// Name of the property for build activity.
/// </summary>
public const string Build = "Build";

/// <summary>
/// Name of the property for crash/failure activity.
/// </summary>
public const string Crash = "Crash";
}
Loading
Loading