Skip to content

Commit 8df402d

Browse files
authored
Merge pull request #10137 from bjorng/bjorn/compiler/shortcut-succeeded
Optimize binary matching code
2 parents 6aa9f98 + e672410 commit 8df402d

File tree

1 file changed

+79
-1
lines changed

1 file changed

+79
-1
lines changed

lib/compiler/src/beam_ssa_dead.erl

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ opt(Linear0) ->
6262
Blocks0 = maps:from_list(Linear0),
6363
St0 = #st{bs=Blocks0,us=Used,skippable=Skippable},
6464
St = shortcut_opt(St0),
65-
#st{bs=Blocks} = combine_eqs(St#st{us=#{}}),
65+
#st{bs=Blocks1} = combine_eqs(St#st{us=#{}}),
66+
Blocks = shortcut_failed_succeeded(Blocks1),
6667
opt_redundant_tests(Blocks).
6768

6869
%%%
@@ -860,6 +861,83 @@ eval_type_test_1({is_tagged_tuple,Sz,Tag}, Arg) ->
860861
eval_type_test_1(Test, Arg) ->
861862
erlang:Test(Arg).
862863

864+
%%%
865+
%%% Shortcut repeated failed `succeeded:guard` tests. Consider this
866+
%%% code:
867+
%%%
868+
%%% 0:
869+
%%% V1 = bs_start_match `new`, V0
870+
%%% B1 = succeeded:guard V1
871+
%%% br B1, ^Succ1, ^29
872+
%%% .
873+
%%% .
874+
%%% .
875+
%%% 29:
876+
%%% V2 = bs_start_match `new`, V0
877+
%%% B2 = succeeded:guard _V2
878+
%%% br B2, ^Succ2, ^36
879+
%%%
880+
%%% If the `bs_start_match` instruction in block 0 fails, the
881+
%%% `bs_start_match` instruction in block 29 will also fail.
882+
%%% Therefore, the `br` instruction can be modified to have use
883+
%%% the same failure label as in block 29:
884+
%%%
885+
%%% 0:
886+
%%% V1 = bs_start_match `new`, V0
887+
%%% B1 = succeeded:guard V1
888+
%%% br B1, ^Succ1, ^36
889+
%%% .
890+
%%% .
891+
%%% .
892+
%%% 29:
893+
%%% V2 = bs_start_match `new`, V0
894+
%%% B2 = succeeded:guard _V2
895+
%%% br B2, ^Succ2, ^36
896+
%%%
897+
%%% If the only reference to block 29 was from block 0, block 29 will
898+
%%% be removed, and if block Succ2 was only referenced from block 29
899+
%%% block Succ2 would also be removed, and so on. For some functions
900+
%%% with complex binary matching, that could result in many blocks
901+
%%% being removed. One example of such a function is
902+
%%% `ssl_gen_statem:read_application_dist_data/5`.
903+
%%%
904+
%%% The same idea works for any instruction that precedes
905+
%%% `succeeded:guard`.
906+
%%%
907+
908+
shortcut_failed_succeeded(Blocks) ->
909+
Ls = reverse(beam_ssa:rpo(Blocks)),
910+
shortcut_failed_succeeded(Ls, #{}, Blocks).
911+
912+
shortcut_failed_succeeded([L|Ls], FailMap0, Blocks0) ->
913+
Blk0 = map_get(L, Blocks0),
914+
case Blk0 of
915+
#b_blk{is=[#b_set{op=Op,dst=Result,args=Args},
916+
#b_set{op={succeeded,guard},dst=Bool,args=[Result]}],
917+
last=#b_br{bool=Bool,fail=Fail0}=Br}
918+
when Op =/= get_map_element ->
919+
%% We don't do this optimization for `get_map_element`
920+
%% because that would prevent multiple `get_map_element`
921+
%% instructions to be combined into a single
922+
%% `get_map_elements` BEAM instruction.
923+
Instr = {Op,Args},
924+
case FailMap0 of
925+
#{Fail0 := {Instr,Fail}} ->
926+
%% The code at label Fail0 will always transfer
927+
%% control to label Fail.
928+
Blk = Blk0#b_blk{last=Br#b_br{fail=Fail}},
929+
Blocks = Blocks0#{L := Blk},
930+
FailMap = FailMap0#{L => {Instr,Fail}},
931+
shortcut_failed_succeeded(Ls, FailMap, Blocks);
932+
#{} ->
933+
FailMap = FailMap0#{L => {Instr,Fail0}},
934+
shortcut_failed_succeeded(Ls, FailMap, Blocks0)
935+
end;
936+
#b_blk{} ->
937+
shortcut_failed_succeeded(Ls, FailMap0, Blocks0)
938+
end;
939+
shortcut_failed_succeeded([], _FailMap, Blocks) -> Blocks.
940+
863941
%%%
864942
%%% Combine bif:'=:=', is_boolean/1 tests, and switch instructions
865943
%%% to switch instructions.

0 commit comments

Comments
 (0)