Skip to content

Commit d0ac02b

Browse files
committed
Compile adapted regex patterns on .NET 7+ only if they don't contain negative lookaround assertions
1 parent a5a6c4b commit d0ac02b

File tree

11 files changed

+291
-239
lines changed

11 files changed

+291
-239
lines changed

src/Acornima/Ast/INode.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,8 @@ public interface INode
1010
ref readonly Range RangeRef { get; }
1111
SourceLocation Location { get; }
1212
ref readonly SourceLocation LocationRef { get; }
13+
/// <remarks>
14+
/// The operation is not guaranteed to be thread-safe. In case concurrent access or update is possible, the necessary synchronization is caller's responsibility.
15+
/// </remarks>
1316
object? UserData { get; }
1417
}

src/Acornima/RegExpParseMode.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ public enum RegExpParseMode
3636
/// In case an invalid regular expression is encountered, <see cref="SyntaxErrorException"/> is thrown.<br/>
3737
/// In the case of a valid regular expression for which an equivalent <see cref="Regex"/> cannot be constructed, either <see cref="RegExpConversionErrorException"/> is thrown
3838
/// or a <see cref="Token"/> is created with the <see cref="Token.Value"/> property set to <see langword="null"/>, depending on the <see cref="TokenizerOptions.Tolerant"/> option.
39+
/// <para>
40+
/// Please note that adapted patterns containing negative lookaround assertions won't be compiled on .NET 7+ because of a <seealso href="https://github.com/dotnet/runtime/issues/97455">regression of .NET's regex compiler</seealso>.
41+
/// </para>
3942
/// </remarks>
4043
AdaptToCompiled,
4144
}

src/Acornima/Tokenizer.RegExpParser.cs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ internal partial struct RegExpParser
4646
private const int SetRangeNotStarted = int.MaxValue;
4747
private const int SetRangeStartedWithCharClass = int.MaxValue - 1;
4848

49+
// Negative lookaround assertions don't work as expected under .NET 7 and .NET 8 when the regex is compiled
50+
// (see also https://github.com/dotnet/runtime/issues/97455).
51+
private static readonly bool s_canCompileNegativeLookaroundAssertions = typeof(Regex).Assembly.GetName().Version?.Major < 7;
52+
4953
internal static RegExpFlags ParseFlags(string value, int startIndex, Tokenizer tokenizer)
5054
{
5155
var flags = RegExpFlags.None;
@@ -175,7 +179,7 @@ public RegExpParseResult Parse()
175179
}
176180
}
177181

178-
var adaptedPattern = ParseCore(out var capturingGroups, out conversionError);
182+
var adaptedPattern = ParseCore(out var capturingGroups, out conversionError, out var canCompile);
179183
if (adaptedPattern is null)
180184
{
181185
// NOTE: ParseCore should return null
@@ -188,7 +192,7 @@ public RegExpParseResult Parse()
188192
Debug.Assert(conversionError is null);
189193
capturingGroups.TrimExcess();
190194

191-
var options = FlagsToOptions(_flags, compiled: _tokenizer._options._regExpParseMode == RegExpParseMode.AdaptToCompiled);
195+
var options = FlagsToOptions(_flags, compiled: _tokenizer._options._regExpParseMode == RegExpParseMode.AdaptToCompiled && canCompile);
192196
var matchTimeout = _tokenizer._options._regexTimeout;
193197

194198
try
@@ -202,7 +206,7 @@ public RegExpParseResult Parse()
202206
}
203207
}
204208

205-
internal string? ParseCore(out ArrayList<RegExpCapturingGroup> capturingGroups, out RegExpConversionError? conversionError)
209+
internal string? ParseCore(out ArrayList<RegExpCapturingGroup> capturingGroups, out RegExpConversionError? conversionError, out bool canCompile)
206210
{
207211
_tokenizer.AcquireStringBuilder(out var sb);
208212
try
@@ -234,9 +238,11 @@ public RegExpParseResult Parse()
234238
};
235239
context.SetFollowingQuantifierError(RegExpNothingToRepeat);
236240

237-
return (_flags & RegExpFlags.Unicode) != 0
241+
var adaptedPattern = (_flags & RegExpFlags.Unicode) != 0
238242
? ParsePattern(UnicodeMode.Instance, ref context, out conversionError)
239243
: ParsePattern(LegacyMode.Instance, ref context, out conversionError);
244+
canCompile = context.CanCompile;
245+
return adaptedPattern;
240246
}
241247
finally
242248
{
@@ -514,6 +520,10 @@ private void CheckBracesBalance(out ArrayList<RegExpCapturingGroup> capturingGro
514520
context.SetFollowingQuantifierError(RegExpNothingToRepeat);
515521
break;
516522
}
523+
else if (!s_canCompileNegativeLookaroundAssertions && groupType is RegExpGroupType.NegativeLookaheadAssertion or RegExpGroupType.NegativeLookbehindAssertion)
524+
{
525+
context.CanCompile = false;
526+
}
517527

518528
sb?.Append(_pattern, i, 1 + ((int)groupType >> 2));
519529
i += (int)groupType >> 2;
@@ -1166,6 +1176,7 @@ public ParsePatternContext(StringBuilder? sb, ReadOnlySpan<RegExpCapturingGroup>
11661176

11671177
CapturingGroups = capturingGroups;
11681178
CapturingGroupNames = capturingGroupNames;
1179+
CanCompile = true;
11691180
}
11701181

11711182
public int Index;
@@ -1225,6 +1236,8 @@ public void SetFollowingQuantifierError(string message, [CallerArgumentExpressio
12251236
// * Lone surrogates need special care too.
12261237
// We use the following list to build the adjusted character set.
12271238
public ArrayList<CodePointRange> UnicodeSet;
1239+
1240+
public bool CanCompile;
12281241
}
12291242

12301243
private interface IMode

test/Acornima.Tests/Acornima.Tests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFrameworks>net8.0</TargetFrameworks>
4+
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
55
<TargetFrameworks Condition="'$(OS)' == 'Windows_NT'">$(TargetFrameworks);net462</TargetFrameworks>
66
<SignAssembly>true</SignAssembly>
77
<AssemblyOriginatorKeyFile>..\..\src\Karambolo.Public.snk</AssemblyOriginatorKeyFile>

test/Acornima.Tests/Fixtures.RegExp/Generator/Generator.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
<AssemblyName>Acornima.Tests</AssemblyName>
55
<OutputType>Exe</OutputType>
66
<TargetFramework>net8.0</TargetFramework>
7-
<Nullable>enable</Nullable>
7+
<SignAssembly>true</SignAssembly>
8+
<AssemblyOriginatorKeyFile>..\..\..\..\src\Karambolo.Public.snk</AssemblyOriginatorKeyFile>
89
</PropertyGroup>
910

1011
<ItemGroup>

test/Acornima.Tests/Fixtures.RegExp/Generator/Generator.sln

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ VisualStudioVersion = 17.6.33717.318
55
MinimumVisualStudioVersion = 10.0.40219.1
66
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Generator", "Generator.csproj", "{5FC6B784-BAA9-4BF6-8845-0D762D938816}"
77
EndProject
8-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Acornima.Core", "..\..\..\..\src\Acornima.Core\Acornima.Core.csproj", "{C3607046-115D-45CE-8EA2-580D270DDC08}"
8+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Acornima.Core", "..\..\..\..\src\Acornima\Acornima.csproj", "{C3607046-115D-45CE-8EA2-580D270DDC08}"
99
EndProject
1010
Global
1111
GlobalSection(SolutionConfigurationPlatforms) = preSolution

test/Acornima.Tests/Fixtures.RegExp/Generator/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ static string DecodeStringIfEscaped(string value) => JavaScriptString.IsStringLi
7474
var flags = DecodeStringIfEscaped(parts[1]);
7575

7676
var regexParser = new Tokenizer.RegExpParser(pattern, flags, tokenizerOptions);
77-
try { adaptedPattern = regexParser.ParseCore(out _, out _) ?? ")inconvertible("; }
77+
try { adaptedPattern = regexParser.ParseCore(out _, out _, out _) ?? ")inconvertible("; }
7878
catch (SyntaxErrorException) { adaptedPattern = ")syntax-error("; }
7979
var encodedDotnetPattern = JavaScriptString.Encode(adaptedPattern, addDoubleQuotes: false);
8080
if (adaptedPattern != encodedDotnetPattern)

0 commit comments

Comments
 (0)