diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
index 4eb3cbac115d1f..6b78d7b077b394 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
@@ -29,6 +29,176 @@ public partial class RegexGenerator
private static string EscapeXmlComment(string text) =>
text.Replace("&", "&").Replace("<", "<").Replace(">", ">");
+ private static bool EscapingHelpsPatternReadability(char c)
+ {
+ switch (CharUnicodeInfo.GetUnicodeCategory(c))
+ {
+ case UnicodeCategory.Control:
+ case UnicodeCategory.OtherNotAssigned:
+ case UnicodeCategory.ParagraphSeparator:
+ case UnicodeCategory.LineSeparator:
+ case UnicodeCategory.Surrogate:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ private static bool TryGetEscapedPatternChar(char c, out char e)
+ {
+ // For String, see https://learn.microsoft.com/en-us/dotnet/csharp/programming-guide/strings/#string-escape-sequences
+ // For Regex, see https://learn.microsoft.com/en-us/dotnet/standard/base-types/character-escapes-in-regular-expressions#character-escapes-in-net
+ // We pick those who mean the same character under both contexts.
+ switch (c)
+ {
+ case '\a':
+ e = 'a';
+ return true;
+ case '\f':
+ e = 'f';
+ return true;
+ case '\n':
+ e = 'n';
+ return true;
+ case '\r':
+ e = 'r';
+ return true;
+ case '\t':
+ e = 't';
+ return true;
+ case '\v':
+ e = 'v';
+ return true;
+ default:
+ e = '\0';
+ return false;
+ }
+ }
+
+ private static bool TryGetEscapedXmlText(char c, [NotNullWhen(true)] out string? e)
+ {
+ switch (c)
+ {
+ case '&':
+ e = "&";
+ return true;
+ case '<':
+ e = "<";
+ return true;
+ case '>':
+ e = ">";
+ return true;
+ default:
+ e = null;
+ return false;
+ }
+ }
+
+ private static void WritePatternInXmlComment(string pattern, bool ignorePatternWhitespace, IndentedTextWriter writer)
+ {
+ writer.Write("/// ");
+
+ bool inCharClass = false;
+ int backslashes = 0;
+
+ for (int i = 0; i < pattern.Length; i++)
+ {
+ char c = pattern[i];
+
+ if (c == ' ' || char.IsLetterOrDigit(c))
+ {
+ writer.Write(c);
+
+ backslashes = 0;
+ }
+ else if (!Xml.XmlConvert.IsXmlChar(c) || EscapingHelpsPatternReadability(c))
+ {
+ // For the behavior of IgnorePatternWhitespace,
+ // see https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-options#ignore-white-space
+
+ if (!ignorePatternWhitespace || !char.IsWhiteSpace(c) || inCharClass)
+ {
+ // We need to look back if any \ could change our \, when it follows odd number of backslashes.
+ // For example,
+ // @"\\\" + '\uFFFF'
+ // In this case,
+ // the first \ escapes the second \, instructs the regex engine to match a backslash.
+ // The third \ on the left becomes effectively nothing, because '\uFFFF' is not recognized as an escaped character.
+
+ if (!inEscape(backslashes))
+ {
+ writer.Write('\\');
+ }
+
+ if (TryGetEscapedPatternChar(c, out char e))
+ {
+ writer.Write(e);
+ }
+ else
+ {
+ writer.Write($"u{(int)c:x4}");
+ }
+ }
+ else if (SyntaxFacts.IsNewLine(c))
+ {
+ // IgnorePatternWhitespace only recognizes '\n' as the new line char for # comments,
+ // otherwise they're ignored anyways. To avoid confusions, we don't print them in the summary.
+
+ if (c == '\n')
+ {
+ writer.WriteLine();
+ writer.Write("/// ");
+ }
+ }
+ else if (Xml.XmlConvert.IsXmlChar(c))
+ {
+ writer.Write(c);
+ }
+ else
+ {
+ // Illegal whitespace char for xml, ignored anyways.
+ // To avoid confusions, we just print normal spaces for them in the summary.
+
+ writer.Write(' ');
+ }
+
+ backslashes = 0;
+ }
+ else
+ {
+ if (TryGetEscapedXmlText(c, out string? entity))
+ {
+ writer.Write(entity);
+ }
+ else
+ {
+ writer.Write(c);
+ }
+
+ if (c == '[' && !inEscape(backslashes) && !inCharClass)
+ {
+ inCharClass = true;
+ }
+ else if (c == ']' && !inEscape(backslashes) && inCharClass)
+ {
+ inCharClass = false;
+ }
+ else if (c == '\\')
+ {
+ backslashes++;
+ }
+ else
+ {
+ backslashes = 0;
+ }
+ }
+ }
+
+ writer.WriteLine();
+
+ static bool inEscape(int backslashes) => backslashes % 2 != 0;
+ }
+
/// Emits the definition of the partial method. This method just delegates to the property cache on the generated Regex-derived type.
private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedTextWriter writer)
{
@@ -58,7 +228,9 @@ private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedText
// Emit the partial method definition.
writer.WriteLine($"/// ");
writer.WriteLine($"/// Pattern:
");
- writer.WriteLine($"/// {EscapeXmlComment(Literal(regexMethod.Pattern, quote: false))}
");
+ writer.WriteLine($"/// ");
+ WritePatternInXmlComment(regexMethod.Pattern, regexMethod.Options.HasFlag(RegexOptions.IgnorePatternWhitespace), writer);
+ writer.WriteLine($"///
");
if (regexMethod.Options != RegexOptions.None)
{
writer.WriteLine($"/// Options:
");
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
index 181c978a376661..073730154940d8 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
@@ -49,6 +49,1062 @@ static string Normalize(string code)
public static IEnumerable