stdlib: Fix O(n^2) algorithm in erl_eval:extended_parse_exprs/1

pguyot · pguyot · commit ea2976161a90 · 2025-11-01T17:04:58.000+01:00
`erl_eval:extended_parse_exprs/1` exhibited O(n^2) time complexity when passed more and more tokens, with both erts and AtomVM. The reason seems to be related to the `try <expr> of ... <recurse> catch _:_ -> <recurse otherwise> end.` pattern, where `<expr>` would usually fail. By replacing `<expr>` (in this case `unscannable/1`) with some expression that usually succeeds, evaluated complexity is back to O(n) which it should be for this algorithm. The script to evaluate the complexity can be found here: https://gist.github.com/pguyot/1aa53791a819709f147e2ad55aadb279 With OTP 28.1.1: ``` === Results Summary === Size | Tokens | Avg (ms) | Min (ms) | Max (ms) | StdDev | Ratio -----|--------|----------|----------|----------|--------|------- 512 | 1026 | 4 | 3 | 4 | 1 | 0.31/0.5 1024 | 2050 | 13 | 12 | 13 | 0 | 0.27/0.5 2048 | 4098 | 49 | 48 | 49 | 1 | 0.26/0.5 4096 | 8194 | 188 | 187 | 189 | 1 | 0.25/0.5 8192 | 16386 | 739 | 736 | 743 | 2 | - === Complexity Analysis === Expected behavior for doubling size: - O(n): 2x time - O(n^2): 4x time - O(n^3): 8x time Size 512 -> 1024: time ratio 3.25 (between O(n) and O(n^2)) Size 1024 -> 2048: time ratio 3.77 (between O(n) and O(n^2)) Size 2048 -> 4096: time ratio 3.84 (between O(n) and O(n^2)) Size 4096 -> 8192: time ratio 3.93 (between O(n) and O(n^2)) ``` With this change, it is both much faster and it exhibits O(n). ``` === Results Summary === Size | Tokens | Avg (ms) | Min (ms) | Max (ms) | StdDev | Ratio -----|--------|----------|----------|----------|--------|------- 512 | 1026 | 1 | 1 | 1 | 0 | 0.50/0.5 1024 | 2050 | 2 | 1 | 3 | 1 | 0.50/0.5 2048 | 4098 | 4 | 3 | 5 | 0 | 0.50/0.5 4096 | 8194 | 8 | 7 | 10 | 1 | 0.50/0.5 8192 | 16386 | 16 | 14 | 19 | 2 | - === Complexity Analysis === Expected behavior for doubling size: - O(n): 2x time - O(n^2): 4x time - O(n^3): 8x time Size 512 -> 1024: time ratio 2.00 (approximately O(n)) Size 1024 -> 2048: time ratio 2.00 (approximately O(n)) Size 2048 -> 4096: time ratio 2.00 (approximately O(n)) Size 4096 -> 8192: time ratio 2.00 (approximately O(n)) ``` Signed-off-by: Paul Guyot <pguyot@kallisys.net>
diff --git a/lib/stdlib/src/erl_eval.erl b/lib/stdlib/src/erl_eval.erl
@@ -2012,11 +2012,15 @@ tokens_fixup([T|Ts]=Ts0) ->
     end.
 
 token_fixup(Ts) ->
-    {AnnoL, NewTs, FixupTag} = unscannable(Ts),
-    String = lists:append([erl_anno:text(A) || A <- AnnoL]),
-    _ = validate_tag(FixupTag, String),
-    NewAnno = erl_anno:set_text(fixup_text(FixupTag), hd(AnnoL)),
-    {{string, NewAnno, String}, NewTs}.
+    case unscannable(Ts) of
+        {AnnoL, NewTs, FixupTag} ->
+            String = lists:append([erl_anno:text(A) || A <- AnnoL]),
+            _ = validate_tag(FixupTag, String),
+            NewAnno = erl_anno:set_text(fixup_text(FixupTag), hd(AnnoL)),
+            {{string, NewAnno, String}, NewTs};
+        false ->
+            {hd(Ts), tl(Ts)}
+    end.
 
 unscannable([{'#', A1}, {var, A2, 'Fun'}, {'<', A3}, {atom, A4, _},
              {'.', A5}, {float, A6, _}, {'>', A7}|Ts]) ->
@@ -2033,7 +2037,9 @@ unscannable([{'#', A1}, {var, A2, 'Port'}, {'<', A3}, {float, A4, _},
     {[A1, A2, A3, A4, A5], Ts, port};
 unscannable([{'#', A1}, {var, A2, 'Ref'}, {'<', A3}, {float, A4, _},
              {'.', A5}, {float, A6, _}, {'>', A7}|Ts]) ->
-    {[A1, A2, A3, A4, A5, A6, A7], Ts, reference}.
+    {[A1, A2, A3, A4, A5, A6, A7], Ts, reference};
+unscannable(_) ->
+    false.
 
 expr_fixup({string,A,S}=T) ->
     try string_fixup(A, S, T) of