Skip to content

Commit 8ae811b

Browse files
committed
lexer: do not lex decimal numbers when the parser won't accept them
A conflict arises with code positioning and decimal numbers (e.g., 2.2). To address this, the incremental API of Menhir is now employed. When the lexer identifies a decimal number, but the parser is in a state where a decimal number isn't accepted, it returns the a number-dot-number token sequence instead.
1 parent ad2615b commit 8ae811b

File tree

1 file changed

+78
-75
lines changed

1 file changed

+78
-75
lines changed

src/ecIo.ml

Lines changed: 78 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ open EcUtils
33

44
module P = EcParser
55
module L = Lexing
6+
module I = EcParser.MenhirInterpreter
67

78
(* -------------------------------------------------------------------- *)
89
let lexbuf_from_channel = fun name channel ->
@@ -16,142 +17,144 @@ let lexbuf_from_channel = fun name channel ->
1617
lexbuf
1718

1819
(* -------------------------------------------------------------------- *)
19-
let parserfun = fun () ->
20-
MenhirLib.Convert.Simplified.traditional2revised EcParser.prog
21-
2220
type 'a parser_t =
2321
(P.token * L.position * L.position, 'a) MenhirLib.Convert.revised
2422

2523
(* -------------------------------------------------------------------- *)
26-
let isbinop_fun = fun () ->
24+
let parserfun () : EcParsetree.prog parser_t =
25+
MenhirLib.Convert.Simplified.traditional2revised EcParser.prog
26+
27+
(* -------------------------------------------------------------------- *)
28+
let isbinop_fun () : unit parser_t =
2729
MenhirLib.Convert.Simplified.traditional2revised EcParser.is_binop
2830

29-
let isuniop_fun = fun () ->
31+
let isuniop_fun () : unit parser_t =
3032
MenhirLib.Convert.Simplified.traditional2revised EcParser.is_uniop
3133

3234
(* -------------------------------------------------------------------- *)
33-
type 'a ecreader_gr = {
35+
type ecreader_r = {
3436
(*---*) ecr_lexbuf : Lexing.lexbuf;
35-
(*---*) ecr_parser : 'a parser_t;
36-
mutable ecr_tokens : EcParser.token list;
3737
mutable ecr_atstart : bool;
38+
mutable ecr_tokens : EcParser.token list;
3839
}
3940

40-
type 'a ecreader_g = 'a ecreader_gr Disposable.t
41-
type ecreader = EcParsetree.prog ecreader_g
41+
type ecreader = ecreader_r Disposable.t
42+
43+
(* -------------------------------------------------------------------- *)
44+
let ecreader_of_lexbuf (lexbuf : L.lexbuf) : ecreader_r =
45+
{ ecr_lexbuf = lexbuf;
46+
ecr_atstart = true;
47+
ecr_tokens = []; }
4248

4349
(* -------------------------------------------------------------------- *)
44-
let lexbuf (reader : 'a ecreader_g) =
50+
let lexbuf (reader : ecreader) =
4551
(Disposable.get reader).ecr_lexbuf
4652

4753
(* -------------------------------------------------------------------- *)
48-
let from_channel ~name channel =
54+
let from_channel ~(name : string) (channel : in_channel) =
4955
let lexbuf = lexbuf_from_channel name channel in
50-
51-
Disposable.create
52-
{ ecr_lexbuf = lexbuf;
53-
ecr_parser = parserfun ();
54-
ecr_atstart = true;
55-
ecr_tokens = []; }
56+
Disposable.create
57+
(ecreader_of_lexbuf lexbuf)
5658

5759
(* -------------------------------------------------------------------- *)
58-
let from_file filename =
60+
let from_file (filename : string) =
5961
let channel = open_in filename in
60-
try
61-
let lexbuf = lexbuf_from_channel filename channel in
62-
63-
Disposable.create ~cb:(fun _ -> close_in channel)
64-
{ ecr_lexbuf = lexbuf;
65-
ecr_parser = parserfun ();
66-
ecr_atstart = true;
67-
ecr_tokens = []; }
62+
try
63+
let lexbuf = lexbuf_from_channel filename channel in
64+
Disposable.create
65+
~cb:(fun _ -> close_in channel)
66+
(ecreader_of_lexbuf lexbuf)
6867

69-
with
70-
| e ->
71-
(try close_in channel with _ -> ());
72-
raise e
68+
with e ->
69+
(try close_in channel with _ -> ());
70+
raise e
7371

7472
(* -------------------------------------------------------------------- *)
75-
let from_string data =
76-
let lexbuf = Lexing.from_string data in
77-
78-
Disposable.create
79-
{ ecr_lexbuf = lexbuf;
80-
ecr_parser = parserfun ();
81-
ecr_atstart = true;
82-
ecr_tokens = []; }
73+
let from_string (data : string) =
74+
Disposable.create
75+
(ecreader_of_lexbuf (Lexing.from_string data))
8376

8477
(* -------------------------------------------------------------------- *)
85-
let finalize (ecreader : 'a ecreader_g) =
78+
let finalize (ecreader : ecreader) =
8679
Disposable.dispose ecreader
8780

8881
(* -------------------------------------------------------------------- *)
89-
let lexer = fun ecreader ->
82+
let lexer ?(checkpoint : _ I.checkpoint option) (ecreader : ecreader_r) =
9083
let lexbuf = ecreader.ecr_lexbuf in
9184

9285
let isfinal = function
9386
| EcParser.FINAL _ -> true
9487
| _ -> false in
9588

96-
if ecreader.ecr_tokens = [] then
89+
if List.is_empty (ecreader.ecr_tokens) then
9790
ecreader.ecr_tokens <- EcLexer.main lexbuf;
9891

99-
match ecreader.ecr_tokens with
100-
| [] ->
101-
failwith "short-read-from-lexer"
92+
let token, queue = List.destruct ecreader.ecr_tokens in
93+
94+
let token, prequeue =
95+
match checkpoint, token with
96+
| Some checkpoint, P.DECIMAL (pre, (_, post)) ->
97+
if I.acceptable checkpoint token lexbuf.lex_curr_p then
98+
token, []
99+
else
100+
List.destruct P.[UINT pre; DOT; UINT post]
101+
| _ ->
102+
token, []
103+
in
102104

103-
| token :: queue -> begin
104-
ecreader.ecr_tokens <- queue;
105-
ecreader.ecr_atstart <- (isfinal token);
106-
(token, Lexing.lexeme_start_p lexbuf, Lexing.lexeme_end_p lexbuf)
107-
end
105+
ecreader.ecr_tokens <- prequeue @ queue;
106+
ecreader.ecr_atstart <- (isfinal token);
107+
(token, Lexing.lexeme_start_p lexbuf, Lexing.lexeme_end_p lexbuf)
108108

109109
(* -------------------------------------------------------------------- *)
110-
let drain (ecreader : 'a ecreader_g) =
110+
let drain (ecreader : ecreader) =
111111
let ecreader = Disposable.get ecreader in
112+
112113
let rec drain () =
113-
try
114-
match lexer ecreader with
115-
| (EcParser.FINAL _, _, _) -> ()
116-
| _ -> drain ()
117-
with EcLexer.LexicalError _ -> drain ()
114+
match lexer ecreader with
115+
| (EcParser.FINAL _, _, _) -> ()
116+
| _ | exception EcLexer.LexicalError _ -> drain ()
118117
in
119118
if not ecreader.ecr_atstart then
120119
drain ()
121120

122121
(* -------------------------------------------------------------------- *)
123-
let parse (ecreader : 'a ecreader_g) =
122+
let parse (ecreader : ecreader) =
124123
let ecreader = Disposable.get ecreader in
125-
ecreader.ecr_parser (fun () -> lexer ecreader)
124+
125+
let rec parse (checkpoint : EcParsetree.prog I.checkpoint) : EcParsetree.prog =
126+
match checkpoint with
127+
| Accepted pst ->
128+
pst
129+
130+
| InputNeeded _ ->
131+
parse (I.offer checkpoint (lexer ~checkpoint ecreader))
132+
133+
| Shifting _ | AboutToReduce _ | HandlingError _ ->
134+
parse (I.resume checkpoint)
135+
136+
| Rejected ->
137+
raise EcParser.Error
138+
139+
in parse (EcParser.Incremental.prog ecreader.ecr_lexbuf.lex_curr_p)
126140

127141
(* -------------------------------------------------------------------- *)
128-
let parseall (ecreader : 'a ecreader_g) =
142+
let parseall (ecreader : ecreader) =
129143
let rec aux acc =
130144
match EcLocation.unloc (parse ecreader) with
131145
| EcParsetree.P_Prog (commands, terminate) ->
132146
let acc = List.rev_append commands acc in
133-
if terminate then List.rev acc else aux acc
147+
if terminate then List.rev acc else aux acc
134148
| EcParsetree.P_Undo _ | EcParsetree.P_Exit ->
135149
assert false (* FIXME *)
136150
in
137151
aux []
138152

139153
(* -------------------------------------------------------------------- *)
140-
let lex_single_token name =
141-
try
142-
let ecr =
143-
{ ecr_lexbuf = Lexing.from_string name;
144-
ecr_parser = parserfun ();
145-
ecr_atstart = true;
146-
ecr_tokens = []; } in
147-
148-
let (token, _, _) = lexer ecr in
149-
150-
match lexer ecr with
151-
| (EcParser.EOF, _, _) -> Some token
152-
| _ -> None
153-
154-
with EcLexer.LexicalError _ -> None
154+
let lex_single_token (name : string) =
155+
match EcLexer.main (Lexing.from_string name) with
156+
| token :: _ -> Some token
157+
| _ | exception EcLexer.LexicalError _ -> None
155158

156159
(* -------------------------------------------------------------------- *)
157160
let is_sym_ident x =

0 commit comments

Comments
 (0)