Skip to content

Commit 027a4f5

Browse files
committed
unicode_util: Optimize gc_1
Postpone cons creation, make gc_1 with two arguments instead of one. Reduces move instructions.
1 parent bd27796 commit 027a4f5

File tree

1 file changed

+38
-36
lines changed

1 file changed

+38
-36
lines changed

lib/stdlib/uc_spec/gen_unicode_mod.escript

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -797,8 +797,8 @@ gen_gc(Fd, GBP) ->
797797
" _ -> %% Keep the tail binary.\n"
798798
" case cp_no_bin(T1) of\n"
799799
" [CP2|_]=T3 when ?IS_LATIN1(CP2) -> [CP1|T3]; %% Asciii Fast path\n"
800-
" binary_found -> gc_1(T);\n"
801-
" T4 -> gc_1([CP1|T4])\n"
800+
" binary_found -> gc_1(T1, CP1);\n"
801+
" T4 -> gc_1(T4, CP1)\n"
802802
" end\n"
803803
" end;\n"
804804
"gc(<<>>) -> [];\n"
@@ -807,70 +807,72 @@ gen_gc(Fd, GBP) ->
807807
" case Rest of\n"
808808
" <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n"
809809
" [CP1|Rest];\n"
810-
" _ -> gc_1([CP1|Rest])\n"
810+
" _ -> gc_1(Rest, CP1)\n"
811811
" end;\n"
812-
" true -> gc_1([CP1|Rest])\n"
812+
" true -> gc_1(Rest, CP1)\n"
813813
" end;\n"
814-
"gc([CP|_]=T) when ?IS_CP(CP) -> gc_1(T);\n"
814+
"gc([CP|T]) when ?IS_CP(CP) -> gc_1(T,CP);\n"
815815
"gc(Str) ->\n"
816816
" case cp(Str) of\n"
817817
" {error,_}=Error -> Error;\n"
818818
" CPs -> gc(CPs)\n"
819819
" end.\n"
820820
),
821821

822-
GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R1,[CP]);\n", [gen_clause(Range)]) end,
823-
ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
824-
%% Pick codepoints below 256 (some data knowledge here)
825-
{ExtendedPictographicLow,_ExtendedPictographicHigh} =
826-
lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
827822
io:put_chars(Fd,
828-
"\ngc_1([$\\r|R0] = R) ->\n"
829-
" case cp(R0) of % Don't break CRLF\n"
830-
" [$\\n|R1] -> [[$\\r,$\\n]|R1];\n"
831-
" _ -> R\n"
832-
" end;\n"),
833-
io:put_chars(Fd, "\n%% Handle control\n"),
834-
GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end,
823+
"""
824+
825+
%% gc_1
826+
gc_1(R0, $\r) ->
827+
case cp(R0) of % Don't break CRLF
828+
[$\n|R1] -> [[$\r,$\n]|R1];
829+
_ -> [$\r|R0]
830+
end;
831+
%% Handle control
832+
833+
"""),
834+
GenControl = fun(Range) -> io:format(Fd, "gc_1~s [CP|R0];\n", [gen_clause(Range)]) end,
835835
CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP), false),
836836
[R1,R2,R3|Crs] = CRs0,
837837
[GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}],
838838
%%GenControl(R1),GenControl(R2),GenControl(R3),
839839
io:put_chars(Fd, "\n%% Optimize Latin-1\n"),
840+
GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R0,[CP]);\n", [gen_clause(Range)]) end,
841+
ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
842+
%% Pick codepoints below 256 (some data knowledge here)
843+
{ExtendedPictographicLow,_ExtendedPictographicHigh} =
844+
lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
840845
[GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicLow)],
841846

842847
io:put_chars(Fd,
843-
"gc_1([CP|R]=R0) when ?IS_LATIN1(CP) ->\n"
844-
" case R of\n"
845-
" [CP2|_] when ?IS_LATIN1(CP2) -> R0;\n"
846-
" _ -> gc_extend(cp(R), R, CP)\n"
848+
"gc_1(R0,CP) when ?IS_LATIN1(CP) ->\n"
849+
" case R0 of\n"
850+
" [CP2|_] when ?IS_LATIN1(CP2) -> [CP|R0];\n"
851+
" _ -> gc_extend(cp(R0), R0, CP)\n"
847852
" end;\n"
848-
"gc_1([CP|_]) when not ?IS_CP(CP) ->\n"
853+
"gc_1(_, CP) when not ?IS_CP(CP) ->\n"
849854
" error({badarg,CP});\n"),
850855
io:put_chars(Fd, "\n%% Continue control\n"),
851856
[GenControl(CP) || CP <- merge_ranges(Crs)],
852-
%% One clause per CP
853-
%% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)),
854-
%% [GenControl(CP) || CP <- CRs0, CP =/= {$\r, undefined}],
855857

856858
io:put_chars(Fd, "\n%% Handle prepend\n"),
857-
GenPrepend = fun(Range) -> io:format(Fd, "gc_1~s gc_prepend(R1, CP);\n", [gen_clause(Range)]) end,
859+
GenPrepend = fun(Range) -> io:format(Fd, "gc_1~s gc_prepend(R0, CP);\n", [gen_clause(Range)]) end,
858860
[GenPrepend(CP) || CP <- merge_ranges(maps:get(prepend,GBP))],
859861

860862
io:put_chars(Fd, "\n%% Handle Hangul L\n"),
861-
GenHangulL = fun(Range) -> io:format(Fd, "gc_1~s gc_h_L(R1,[CP]);\n", [gen_clause(Range)]) end,
863+
GenHangulL = fun(Range) -> io:format(Fd, "gc_1~s gc_h_L(R0,[CP]);\n", [gen_clause(Range)]) end,
862864
[GenHangulL(CP) || CP <- merge_ranges(maps:get(l,GBP))],
863865
io:put_chars(Fd, "%% Handle Hangul V\n"),
864-
GenHangulV = fun(Range) -> io:format(Fd, "gc_1~s gc_h_V(R1,[CP]);\n", [gen_clause(Range)]) end,
866+
GenHangulV = fun(Range) -> io:format(Fd, "gc_1~s gc_h_V(R0,[CP]);\n", [gen_clause(Range)]) end,
865867
[GenHangulV(CP) || CP <- merge_ranges(maps:get(v,GBP))],
866868
io:put_chars(Fd, "%% Handle Hangul T\n"),
867-
GenHangulT = fun(Range) -> io:format(Fd, "gc_1~s gc_h_T(R1,[CP]);\n", [gen_clause(Range)]) end,
869+
GenHangulT = fun(Range) -> io:format(Fd, "gc_1~s gc_h_T(R0,[CP]);\n", [gen_clause(Range)]) end,
868870
[GenHangulT(CP) || CP <- merge_ranges(maps:get(t,GBP))],
869871
io:put_chars(Fd, "%% Handle Hangul LV and LVT special, since they are large\n"),
870-
io:put_chars(Fd, "gc_1([CP|_]=R0) when is_integer(CP, 44000, 56000) -> gc_h_lv_lvt(R0, R0, []);\n"),
872+
io:put_chars(Fd, "gc_1(R0,CP) when is_integer(CP, 44000, 56000) -> R=[CP|R0], gc_h_lv_lvt(R, R, []);\n"),
871873

872874
io:put_chars(Fd, "\n%% Handle Regional\n"),
873-
GenRegional = fun(Range) -> io:format(Fd, "gc_1~s gc_regional(R1,CP);\n", [gen_clause(Range)]) end,
875+
GenRegional = fun(Range) -> io:format(Fd, "gc_1~s gc_regional(R0,CP);\n", [gen_clause(Range)]) end,
874876
[GenRegional(CP) || CP <- merge_ranges(maps:get(regional_indicator,GBP))],
875877
%% io:put_chars(Fd, "%% Handle E_Base\n"),
876878
%% GenEBase = fun(Range) -> io:format(Fd, "gc_1~s gc_e_cont(R1,[CP]);\n", [gen_clause(Range)]) end,
@@ -884,7 +886,7 @@ gen_gc(Fd, GBP) ->
884886

885887
io:put_chars(Fd, "\n%% default clauses\n"),
886888
io:put_chars(Fd, """
887-
gc_1([CP|R]) ->
889+
gc_1(R,CP) ->
888890
case is_ext_pict(CP) of
889891
true -> gc_ext_pict(R, [CP]);
890892
false ->
@@ -901,11 +903,11 @@ gen_gc(Fd, GBP) ->
901903
io:put_chars(Fd,
902904
"gc_prepend(R00, CP0) ->\n"
903905
" case cp(R00) of\n"
904-
" [CP1|_] = R0 ->\n"
906+
" [CP1|R0] ->\n"
905907
" case is_control(CP1) of\n"
906908
" true -> [CP0|R00];\n"
907909
" false ->\n"
908-
" case gc_1(R0) of\n"
910+
" case gc_1(R0, CP1) of\n"
909911
" [GC|R1] when is_integer(GC) -> [[CP0,GC]|R1];\n"
910912
" [GC|R1] -> [[CP0|GC]|R1]\n"
911913
" end\n"
@@ -1386,9 +1388,9 @@ gen_width_table(Fd, WideChars) ->
13861388
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
13871389

13881390
gen_clause({R0, undefined}) ->
1389-
io_lib:format("([~w=CP|R1]=R0) ->", [R0]);
1391+
io_lib:format("(R0, ~w=CP) ->", [R0]);
13901392
gen_clause({R0, R1}) ->
1391-
io_lib:format("([CP|R1]=R0) when is_integer(CP, ~w, ~w) ->", [R0,R1]).
1393+
io_lib:format("(R0, CP) when is_integer(CP, ~w, ~w) ->", [R0,R1]).
13921394

13931395
gen_clause2({R0, undefined}) ->
13941396
io_lib:format("([~w=CP|R1], R0, Acc) ->", [R0]);

0 commit comments

Comments
 (0)