diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 4eb3cbac115d1f..6b78d7b077b394 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -29,6 +29,176 @@ public partial class RegexGenerator private static string EscapeXmlComment(string text) => text.Replace("&", "&").Replace("<", "<").Replace(">", ">"); + private static bool EscapingHelpsPatternReadability(char c) + { + switch (CharUnicodeInfo.GetUnicodeCategory(c)) + { + case UnicodeCategory.Control: + case UnicodeCategory.OtherNotAssigned: + case UnicodeCategory.ParagraphSeparator: + case UnicodeCategory.LineSeparator: + case UnicodeCategory.Surrogate: + return true; + default: + return false; + } + } + + private static bool TryGetEscapedPatternChar(char c, out char e) + { + // For String, see https://learn.microsoft.com/en-us/dotnet/csharp/programming-guide/strings/#string-escape-sequences + // For Regex, see https://learn.microsoft.com/en-us/dotnet/standard/base-types/character-escapes-in-regular-expressions#character-escapes-in-net + // We pick those who mean the same character under both contexts. + switch (c) + { + case '\a': + e = 'a'; + return true; + case '\f': + e = 'f'; + return true; + case '\n': + e = 'n'; + return true; + case '\r': + e = 'r'; + return true; + case '\t': + e = 't'; + return true; + case '\v': + e = 'v'; + return true; + default: + e = '\0'; + return false; + } + } + + private static bool TryGetEscapedXmlText(char c, [NotNullWhen(true)] out string? e) + { + switch (c) + { + case '&': + e = "&"; + return true; + case '<': + e = "<"; + return true; + case '>': + e = ">"; + return true; + default: + e = null; + return false; + } + } + + private static void WritePatternInXmlComment(string pattern, bool ignorePatternWhitespace, IndentedTextWriter writer) + { + writer.Write("/// "); + + bool inCharClass = false; + int backslashes = 0; + + for (int i = 0; i < pattern.Length; i++) + { + char c = pattern[i]; + + if (c == ' ' || char.IsLetterOrDigit(c)) + { + writer.Write(c); + + backslashes = 0; + } + else if (!Xml.XmlConvert.IsXmlChar(c) || EscapingHelpsPatternReadability(c)) + { + // For the behavior of IgnorePatternWhitespace, + // see https://learn.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-options#ignore-white-space + + if (!ignorePatternWhitespace || !char.IsWhiteSpace(c) || inCharClass) + { + // We need to look back if any \ could change our \, when it follows odd number of backslashes. + // For example, + // @"\\\" + '\uFFFF' + // In this case, + // the first \ escapes the second \, instructs the regex engine to match a backslash. + // The third \ on the left becomes effectively nothing, because '\uFFFF' is not recognized as an escaped character. + + if (!inEscape(backslashes)) + { + writer.Write('\\'); + } + + if (TryGetEscapedPatternChar(c, out char e)) + { + writer.Write(e); + } + else + { + writer.Write($"u{(int)c:x4}"); + } + } + else if (SyntaxFacts.IsNewLine(c)) + { + // IgnorePatternWhitespace only recognizes '\n' as the new line char for # comments, + // otherwise they're ignored anyways. To avoid confusions, we don't print them in the summary. + + if (c == '\n') + { + writer.WriteLine(); + writer.Write("/// "); + } + } + else if (Xml.XmlConvert.IsXmlChar(c)) + { + writer.Write(c); + } + else + { + // Illegal whitespace char for xml, ignored anyways. + // To avoid confusions, we just print normal spaces for them in the summary. + + writer.Write(' '); + } + + backslashes = 0; + } + else + { + if (TryGetEscapedXmlText(c, out string? entity)) + { + writer.Write(entity); + } + else + { + writer.Write(c); + } + + if (c == '[' && !inEscape(backslashes) && !inCharClass) + { + inCharClass = true; + } + else if (c == ']' && !inEscape(backslashes) && inCharClass) + { + inCharClass = false; + } + else if (c == '\\') + { + backslashes++; + } + else + { + backslashes = 0; + } + } + } + + writer.WriteLine(); + + static bool inEscape(int backslashes) => backslashes % 2 != 0; + } + /// Emits the definition of the partial method. This method just delegates to the property cache on the generated Regex-derived type. private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedTextWriter writer) { @@ -58,7 +228,9 @@ private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedText // Emit the partial method definition. writer.WriteLine($"/// "); writer.WriteLine($"/// Pattern:
"); - writer.WriteLine($"/// {EscapeXmlComment(Literal(regexMethod.Pattern, quote: false))}
"); + writer.WriteLine($"/// "); + WritePatternInXmlComment(regexMethod.Pattern, regexMethod.Options.HasFlag(RegexOptions.IgnorePatternWhitespace), writer); + writer.WriteLine($"///
"); if (regexMethod.Options != RegexOptions.None) { writer.WriteLine($"/// Options:
"); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 181c978a376661..073730154940d8 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -49,6 +49,1062 @@ static string Normalize(string code) public static IEnumerable ValidateExpectedOutput_MemberData() { + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex("[x\t] # match an x or a tab\ny # then match a y", RegexOptions.IgnorePatternWhitespace)] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// [x\t] # match an x or a tab + /// y # then match a y + ///
+ /// Options:
+ /// RegexOptions.IgnorePatternWhitespace
+ /// Explanation:
+ /// + /// ○ Match a character in the set [\tx].
+ /// ○ Match 'y'.
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "[x\t] # match an x or a tab\ny # then match a y"; + base.roptions = RegexOptions.IgnorePatternWhitespace; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + // Search until we can't find a valid starting position, we find a match, or we reach the end of the input. + while (TryFindNextPossibleStartingPosition(inputSpan) && + !TryMatchAtCurrentPosition(inputSpan) && + base.runtextpos != inputSpan.Length) + { + base.runtextpos++; + if (Utilities.s_hasTimeout) + { + base.CheckTimeout(); + } + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern begins with a character in the set [\tx]. + // Find the next occurrence. If it can't be found, there's no match. + ReadOnlySpan span = inputSpan.Slice(pos); + for (int i = 0; i < span.Length - 1; i++) + { + int indexOfPos = span.Slice(i).IndexOfAny('\t', 'x'); + if (indexOfPos < 0) + { + goto NoMatchFound; + } + i += indexOfPos; + + // The primary set being searched for was found. 1 more set will be checked so as + // to minimize the number of places TryMatchAtCurrentPosition is run unnecessarily. + // Make sure it fits in the remainder of the input. + if ((uint)(i + 1) >= (uint)span.Length) + { + goto NoMatchFound; + } + + if ((span[i + 1] == 'y')) + { + base.runtextpos = pos + i; + return true; + } + } + } + + // No match found. + NoMatchFound: + base.runtextpos = inputSpan.Length; + return false; + } + + /// Determine whether at base.runtextpos is a match for the regular expression. + /// The text being scanned by the regular expression. + /// true if the regular expression matches at the current position; otherwise, false. + private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + int matchStart = pos; + char ch; + ReadOnlySpan slice = inputSpan.Slice(pos); + + if ((uint)slice.Length < 2 || + (((ch = slice[0]) != '\t') & (ch != 'x')) || // Match a character in the set [\tx]. + slice[1] != 'y') // Match 'y'. + { + return false; // The input didn't match. + } + + // The input matched. + pos += 2; + base.runtextpos = pos; + base.Capture(0, matchStart, pos); + return true; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex("[x ] # match an x or a space\r\ny # then match a y", RegexOptions.IgnorePatternWhitespace)] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// [x ] # match an x or a space + /// y # then match a y + ///
+ /// Options:
+ /// RegexOptions.IgnorePatternWhitespace
+ /// Explanation:
+ /// + /// ○ Match a character in the set [ x].
+ /// ○ Match 'y'.
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "[x ] # match an x or a space\r\ny # then match a y"; + base.roptions = RegexOptions.IgnorePatternWhitespace; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + // Search until we can't find a valid starting position, we find a match, or we reach the end of the input. + while (TryFindNextPossibleStartingPosition(inputSpan) && + !TryMatchAtCurrentPosition(inputSpan) && + base.runtextpos != inputSpan.Length) + { + base.runtextpos++; + if (Utilities.s_hasTimeout) + { + base.CheckTimeout(); + } + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + char ch; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern matches a character in the set y at index 1. + // Find the next occurrence. If it can't be found, there's no match. + ReadOnlySpan span = inputSpan.Slice(pos); + for (int i = 0; i < span.Length - 1; i++) + { + int indexOfPos = span.Slice(i + 1).IndexOf('y'); + if (indexOfPos < 0) + { + goto NoMatchFound; + } + i += indexOfPos; + + if ((((ch = span[i]) == ' ') | (ch == 'x'))) + { + base.runtextpos = pos + i; + return true; + } + } + } + + // No match found. + NoMatchFound: + base.runtextpos = inputSpan.Length; + return false; + } + + /// Determine whether at base.runtextpos is a match for the regular expression. + /// The text being scanned by the regular expression. + /// true if the regular expression matches at the current position; otherwise, false. + private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + int matchStart = pos; + char ch; + ReadOnlySpan slice = inputSpan.Slice(pos); + + if ((uint)slice.Length < 2 || + (((ch = slice[0]) != ' ') & (ch != 'x')) || // Match a character in the set [ x]. + slice[1] != 'y') // Match 'y'. + { + return false; // The input didn't match. + } + + // The input matched. + pos += 2; + base.runtextpos = pos; + base.Capture(0, matchStart, pos); + return true; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex("x # match an x\ny # then match a y", RegexOptions.IgnorePatternWhitespace)] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// x # match an x + /// y # then match a y + ///
+ /// Options:
+ /// RegexOptions.IgnorePatternWhitespace
+ /// Explanation:
+ /// + /// ○ Match the string "xy".
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "x # match an x\ny # then match a y"; + base.roptions = RegexOptions.IgnorePatternWhitespace; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + if (TryFindNextPossibleStartingPosition(inputSpan)) + { + // The search in TryFindNextPossibleStartingPosition performed the entire match. + int start = base.runtextpos; + int end = base.runtextpos = start + 2; + base.Capture(0, start, end); + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern has the literal "xy" at the beginning of the pattern. Find the next occurrence. + // If it can't be found, there's no match. + int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_xy_Ordinal); + if (i >= 0) + { + base.runtextpos = pos + i; + return true; + } + } + + // No match found. + base.runtextpos = inputSpan.Length; + return false; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + + /// Supports searching for the string "xy". + internal static readonly SearchValues s_indexOfString_xy_Ordinal = SearchValues.Create(["xy"], StringComparison.Ordinal); + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex("\n")] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// \n + ///
+ /// Explanation:
+ /// + /// ○ Match '\n'.
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "\n"; + base.roptions = RegexOptions.None; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + if (TryFindNextPossibleStartingPosition(inputSpan)) + { + // The search in TryFindNextPossibleStartingPosition performed the entire match. + int start = base.runtextpos; + int end = base.runtextpos = start + 1; + base.Capture(0, start, end); + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Empty matches aren't possible. + if ((uint)pos < (uint)inputSpan.Length) + { + // The pattern begins with a character in the set \n. + // Find the next occurrence. If it can't be found, there's no match. + int i = inputSpan.Slice(pos).IndexOf('\n'); + if (i >= 0) + { + base.runtextpos = pos + i; + return true; + } + } + + // No match found. + base.runtextpos = inputSpan.Length; + return false; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex(@"\\\" + "\xFFFF")] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// \\\uffff + ///
+ /// Explanation:
+ /// + /// ○ Match the string "\\\uffff".
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "\\\\\\\uffff"; + base.roptions = RegexOptions.None; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + if (TryFindNextPossibleStartingPosition(inputSpan)) + { + // The search in TryFindNextPossibleStartingPosition performed the entire match. + int start = base.runtextpos; + int end = base.runtextpos = start + 2; + base.Capture(0, start, end); + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern has the literal "\\\uffff" at the beginning of the pattern. Find the next occurrence. + // If it can't be found, there's no match. + int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_6D4536B5396DF4C9BDE2BA4B2B55651CDD8BD8CB135649680B3997EC2D00AE2B); + if (i >= 0) + { + base.runtextpos = pos + i; + return true; + } + } + + // No match found. + base.runtextpos = inputSpan.Length; + return false; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + + /// Supports searching for the string "\\\uffff". + internal static readonly SearchValues s_indexOfString_6D4536B5396DF4C9BDE2BA4B2B55651CDD8BD8CB135649680B3997EC2D00AE2B = SearchValues.Create(["\\\uffff"], StringComparison.Ordinal); + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex(@"\\" + "\0")] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// \\\u0000 + ///
+ /// Explanation:
+ /// + /// ○ Match the string "\\\0".
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "\\\\\0"; + base.roptions = RegexOptions.None; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + if (TryFindNextPossibleStartingPosition(inputSpan)) + { + // The search in TryFindNextPossibleStartingPosition performed the entire match. + int start = base.runtextpos; + int end = base.runtextpos = start + 2; + base.Capture(0, start, end); + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern has the literal "\\\0" at the beginning of the pattern. Find the next occurrence. + // If it can't be found, there's no match. + int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_F5AD4E08A1DC241CCEB8A64C3AE3475B0ABA933EF3F444A8DD725FEEBB8F767B); + if (i >= 0) + { + base.runtextpos = pos + i; + return true; + } + } + + // No match found. + base.runtextpos = inputSpan.Length; + return false; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + + /// Supports searching for the string "\\\0". + internal static readonly SearchValues s_indexOfString_F5AD4E08A1DC241CCEB8A64C3AE3475B0ABA933EF3F444A8DD725FEEBB8F767B = SearchValues.Create(["\\\0"], StringComparison.Ordinal); + } + } + """ + }; + + yield return new object[] + { + """ + using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex("\\d\0")] + public static partial Regex Valid(); + } + """, + + """ + // + #nullable enable + #pragma warning disable CS0162 // Unreachable code + #pragma warning disable CS0164 // Unreferenced label + #pragma warning disable CS0219 // Variable assigned but never used + + partial class C + { + /// + /// Pattern:
+ /// + /// \d\u0000 + ///
+ /// Explanation:
+ /// + /// ○ Match a Unicode digit.
+ /// ○ Match '\0'.
+ ///
+ ///
+ [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + public static partial global::System.Text.RegularExpressions.Regex Valid() => global::System.Text.RegularExpressions.Generated.Valid_0.Instance; + } + + namespace System.Text.RegularExpressions.Generated + { + using System; + using System.Buffers; + using System.CodeDom.Compiler; + using System.Collections; + using System.ComponentModel; + using System.Globalization; + using System.Runtime.CompilerServices; + using System.Text.RegularExpressions; + using System.Threading; + + /// Custom -derived type for the Valid method. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + [SkipLocalsInit] + file sealed class Valid_0 : Regex + { + /// Cached, thread-safe singleton instance. + internal static readonly Valid_0 Instance = new(); + + /// Initializes the instance. + private Valid_0() + { + base.pattern = "\\d\0"; + base.roptions = RegexOptions.None; + ValidateMatchTimeout(Utilities.s_defaultTimeout); + base.internalMatchTimeout = Utilities.s_defaultTimeout; + base.factory = new RunnerFactory(); + base.capsize = 1; + } + + /// Provides a factory for creating instances to be used by methods on . + private sealed class RunnerFactory : RegexRunnerFactory + { + /// Creates an instance of a used by methods on . + protected override RegexRunner CreateInstance() => new Runner(); + + /// Provides the runner that contains the custom logic implementing the specified regular expression. + private sealed class Runner : RegexRunner + { + /// Scan the starting from base.runtextstart for the next match. + /// The text being scanned by the regular expression. + protected override void Scan(ReadOnlySpan inputSpan) + { + // Search until we can't find a valid starting position, we find a match, or we reach the end of the input. + while (TryFindNextPossibleStartingPosition(inputSpan) && + !TryMatchAtCurrentPosition(inputSpan) && + base.runtextpos != inputSpan.Length) + { + base.runtextpos++; + if (Utilities.s_hasTimeout) + { + base.CheckTimeout(); + } + } + } + + /// Search starting from base.runtextpos for the next location a match could possibly start. + /// The text being scanned by the regular expression. + /// true if a possible match was found; false if no more matches are possible. + private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + + // Any possible match is at least 2 characters. + if (pos <= inputSpan.Length - 2) + { + // The pattern matches a character in the set \0 at index 1. + // Find the next occurrence. If it can't be found, there's no match. + ReadOnlySpan span = inputSpan.Slice(pos); + for (int i = 0; i < span.Length - 1; i++) + { + int indexOfPos = span.Slice(i + 1).IndexOf('\0'); + if (indexOfPos < 0) + { + goto NoMatchFound; + } + i += indexOfPos; + + if (char.IsDigit(span[i])) + { + base.runtextpos = pos + i; + return true; + } + } + } + + // No match found. + NoMatchFound: + base.runtextpos = inputSpan.Length; + return false; + } + + /// Determine whether at base.runtextpos is a match for the regular expression. + /// The text being scanned by the regular expression. + /// true if the regular expression matches at the current position; otherwise, false. + private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) + { + int pos = base.runtextpos; + int matchStart = pos; + ReadOnlySpan slice = inputSpan.Slice(pos); + + if ((uint)slice.Length < 2 || + !char.IsDigit(slice[0]) || // Match a Unicode digit. + slice[1] != '\0') // Match '\0'. + { + return false; // The input didn't match. + } + + // The input matched. + pos += 2; + base.runtextpos = pos; + base.Capture(0, matchStart, pos); + return true; + } + } + } + + } + + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "%VERSION%")] + file static class Utilities + { + /// Default timeout value set in , or if none was set. + internal static readonly TimeSpan s_defaultTimeout = AppContext.GetData("REGEX_DEFAULT_MATCH_TIMEOUT") is TimeSpan timeout ? timeout : Regex.InfiniteMatchTimeout; + + /// Whether is non-infinite. + internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; + } + } + """ + }; + yield return new object[] { """ @@ -71,7 +1127,9 @@ partial class C { /// /// Pattern:
- /// ^(?<proto>\\w+)://[^/]+?(?<port>:\\d+)?/
+ /// + /// ^(?<proto>\w+)://[^/]+?(?<port>:\d+)?/ + ///
/// Explanation:
/// /// ○ Match if at the beginning of the string.
@@ -434,7 +1492,9 @@ partial class C { /// /// Pattern:
- /// href\\s*=\\s*(?:["'](?<1>[^"']*)["']|(?<1>[^>\\s]+))
+ /// + /// href\s*=\s*(?:["'](?<1>[^"']*)["']|(?<1>[^>\s]+)) + ///
/// Explanation:
/// /// ○ Match the string "href".
@@ -739,7 +1799,9 @@ partial class C { /// /// Pattern:
- /// [A-Za-z]+
+ /// + /// [A-Za-z]+ + ///
/// Explanation:
/// /// ○ Match a character in the set [A-Za-z] atomically at least once.