Skip to content

Commit b73e199

Browse files
committed
tmp
1 parent ebe96fd commit b73e199

File tree

9 files changed

+82
-47
lines changed

9 files changed

+82
-47
lines changed

src/ast/types.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,13 @@ type _ indice =
3636

3737
let pp_id fmt id = pf fmt "$%s" id
3838

39-
let pp_name fmt s =
39+
let pp_name_inner fmt s =
4040
let pp_hex_char fmt c = pf fmt "\\%02x" (Char.code c) in
4141
let pp_char fmt = function
4242
| '\n' -> string fmt "\\n"
43-
| '\t' -> string fmt "\\t"
4443
| '\r' -> string fmt "\\r"
44+
| '\t' -> string fmt "\\t"
45+
| '\'' -> string fmt "\\'"
4546
| '\"' -> string fmt "\\\""
4647
| '\\' -> string fmt "\\\\"
4748
| c ->
@@ -53,10 +54,9 @@ let pp_name fmt s =
5354
| uc when 0x20 <= uc && uc < 0x7f -> pp_char fmt (Char.chr uc)
5455
| uc -> pf fmt "\\u{%02x}" uc
5556
in
56-
let pp_string fmt s =
57-
String.iter (fun c -> pp_unicode_char fmt (Char.code c)) s
58-
in
59-
pf fmt {|"%a"|} pp_string s
57+
String.iter (fun c -> pp_unicode_char fmt (Char.code c)) s
58+
59+
let pp_name fmt s = pf fmt {|"%a"|} pp_name_inner s
6060

6161
let pp_id_opt fmt = function None -> () | Some i -> pf fmt " %a" pp_id i
6262

src/parser/text_lexer.ml

Lines changed: 57 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ let mk_string buf s =
4949
| 't' -> '\t'
5050
| '\\' -> '\\'
5151
| '\'' -> '\''
52-
| '\"' -> '\"'
52+
| '"' -> '"'
5353
| 'u' ->
5454
let j = !i + 2 in
5555
begin
@@ -103,13 +103,13 @@ let frac = [%sedlex.regexp? num]
103103

104104
let float =
105105
[%sedlex.regexp?
106-
( Opt sign, num, '.', Opt frac
107-
| Opt sign, num, Opt ('.', Opt frac), ('e' | 'E'), Opt sign, num
108-
| Opt sign, "0x", hexnum, '.', Opt hexfrac
109-
| Opt sign, "0x", hexnum, Opt ('.', Opt hexfrac), ('p' | 'P'), Opt sign, num
110-
| Opt sign, "inf"
111-
| Opt sign, "nan"
112-
| Opt sign, "nan:", "0x", hexnum )]
106+
( Opt sign, num, '.', Opt frac
107+
| Opt sign, num, Opt ('.', Opt frac), ('e' | 'E'), Opt sign, num
108+
| Opt sign, "0x", hexnum, '.', Opt hexfrac
109+
| Opt sign, "0x", hexnum, Opt ('.', Opt hexfrac), ('p' | 'P'), Opt sign, num
110+
| Opt sign, "inf"
111+
| Opt sign, "nan"
112+
| Opt sign, "nan:", "0x", hexnum )]
113113

114114
let nat = [%sedlex.regexp? num | "0x", hexnum]
115115

@@ -119,15 +119,38 @@ let num = [%sedlex.regexp? float | int | nat]
119119

120120
let id_char =
121121
[%sedlex.regexp?
122-
( '0' .. '9'
123-
| 'a' .. 'z'
124-
| 'A' .. 'Z'
125-
| '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '/' | ':'
126-
| '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '_' | '`' | '|' | '~' )]
122+
( '0' .. '9'
123+
| 'a' .. 'z'
124+
| 'A' .. 'Z'
125+
| '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '/' | ':'
126+
| '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '_' | '`' | '|' | '~' )]
127127

128128
let string_elem = [%sedlex.regexp? Sub (any, "\"") | "\\\""]
129129

130-
let name = [%sedlex.regexp? "\"", Star string_elem, "\""]
130+
let utf8cont = [%sedlex.regexp? '\x80' .. '\xbf']
131+
132+
let utf8enc =
133+
[%sedlex.regexp?
134+
( '\xc2' .. '\xdf', utf8cont
135+
| '\xe0', '\xa0' .. '\xbf', utf8cont
136+
| '\xed', '\x80' .. '\x9f', utf8cont
137+
| ('\xe1' .. '\xec' | '\xee' .. '\xef'), utf8cont, utf8cont
138+
| '\xf0', '\x90' .. '\xbf', utf8cont, utf8cont
139+
| '\xf4', '\x80' .. '\x8f', utf8cont, utf8cont
140+
| '\xf1' .. '\xf3', utf8cont, utf8cont, utf8cont )]
141+
142+
let escape = [%sedlex.regexp? 'n' | 'r' | 't' | '\\' | '\'' | '"']
143+
144+
let character =
145+
[%sedlex.regexp?
146+
( Sub (any, ('"' | '\\' | '\x00' .. '\x1f' | '\x7f' .. '\xff'))
147+
| utf8enc
148+
| '\\', escape
149+
| '\\', hexdigit, hexdigit
150+
| '\\', "u{", hexnum, '}' )]
151+
152+
(* TODO: use character here instead of string_elem ? *)
153+
let name = [%sedlex.regexp? '"', Star string_elem, '"']
131154

132155
let operator =
133156
[%sedlex.regexp? Plus ('0' .. '9' | 'a' .. 'z' | '.' | '_' | ':'), Star name]
@@ -142,7 +165,7 @@ let bad_num = [%sedlex.regexp? num, Plus id]
142165

143166
let annot_atom =
144167
[%sedlex.regexp?
145-
Plus id_char | num | name | ',' | ';' | '[' | ']' | '{' | '}']
168+
Plus id_char | num | name | ',' | ';' | '[' | ']' | '{' | '}']
146169

147170
let keywords =
148171
let tbl = Hashtbl.create 512 in
@@ -449,33 +472,34 @@ let keywords =
449472
tbl
450473

451474
let rec token buf =
475+
(* Fmt.epr "LXM = %S@\n" (Utf8.lexeme buf); *)
452476
match%sedlex buf with
453477
| Plus any_blank -> token buf
454478
| bad_num | bad_id | bad_name -> unknown_operator buf
455479
| num -> NUM (Utf8.lexeme buf)
456-
| operator -> begin
480+
| operator ->
457481
let operator = Utf8.lexeme buf in
458482
match Hashtbl.find_opt keywords operator with
459483
| None -> unknown_operator buf
460484
| Some v -> v
461-
end
462-
(* comment *)
463-
| ";;" ->
464-
single_comment buf;
465-
token buf
466-
| "(;" ->
467-
comment buf;
485+
end
486+
(* comment *)
487+
| ";;" ->
488+
single_comment buf;
489+
token buf
490+
| "(;" ->
491+
comment buf;
492+
token buf
493+
(* custom annotation *)
494+
| "(@", name ->
495+
let annotid = Utf8.lexeme buf in
496+
let annotid = String.sub annotid 3 (String.length annotid - 4) in
497+
let annotid = mk_string buf annotid in
498+
if String.equal "" annotid then raise Empty_annotation_id
499+
else
500+
let items = Sexp.List (annot buf) in
501+
Annot.(record_annot annotid items);
468502
token buf
469-
(* custom annotation *)
470-
| "(@", name ->
471-
let annotid = Utf8.lexeme buf in
472-
let annotid = String.sub annotid 3 (String.length annotid - 4) in
473-
let annotid = mk_string buf annotid in
474-
if String.equal "" annotid then raise Empty_annotation_id
475-
else
476-
let items = Sexp.List (annot buf) in
477-
Annot.(record_annot annotid items);
478-
token buf
479503
| "(@", Plus id_char ->
480504
let annotid = Utf8.lexeme buf in
481505
let annotid = String.sub annotid 2 (String.length annotid - 2) in
@@ -501,7 +525,6 @@ let rec token buf =
501525
NAME name
502526
| "\"", Star string_elem -> raise Unclosed_string
503527
| eof -> EOF
504-
(* | "" -> EOF *)
505528
| any -> unknown_operator buf
506529
| _ -> unknown_operator buf
507530

src/script/script.ml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ let run ~no_exhaustion ~optimize script =
198198
link_state
199199
| Assert (Assert_malformed_quote (m, expected)) ->
200200
Log.debug0 "*** assert_malformed_quote@\n";
201-
(* TODO: use Parse.Text.Module.from_string instead *)
202-
let got = Parse.Text.Script.from_string m in
201+
let m = Fmt.str "%a" pp_name_inner m in
202+
let got = Parse.Text.Module.from_string m in
203203
let+ () =
204204
match got with
205205
| Error got -> check_error ~expected ~got
@@ -208,7 +208,6 @@ let run ~no_exhaustion ~optimize script =
208208
Compile.Text.until_binary ~unsafe ~rac:false ~srac:false m
209209
in
210210
check_error_result expected got
211-
| _ -> assert false
212211
in
213212
link_state
214213
| Assert (Assert_invalid_binary (m, expected)) ->

test/fmt/data.wat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
(module
2+
(memory $m 1)
3+
(data $d (i32.const 0) "hello" "\n" "\\n" "\\" "\\'" "\'" "\\r" "\r" "\\t" "\t" "\\\"" "\"" "world" "!")
4+
)

test/fmt/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
(deps
88
%{bin:owi}
99
print_simplified.exe
10+
data.wat
1011
done.wat
1112
m.wat
1213
locals.wat

test/fmt/print.t

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@ print simplified:
3232
)
3333
(start 1)
3434
)
35+
print data:
36+
$ owi fmt data.wat
37+
(module
38+
(memory $m 1)
39+
(data $d (memory 0) (offset i32.const 0) "hello\n\\n\\\\\'\'\\r\u{0d}\\t\t\\\"\"world!")
40+
)

test/script/passing.t

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
$ owi script --no-exhaustion passing/quickstart.wast
3939
$ owi script --no-exhaustion passing/relop.wast
4040
$ owi script --no-exhaustion passing/stringinitmsg.wast
41+
expected illegal escape but got (unknown operator unexpected character `"\""`)
42+
[54]
4143
$ owi script --no-exhaustion passing/type_abbreviations.wast
4244
$ owi script --no-exhaustion passing/typecheck3.wast
4345
$ owi script --no-exhaustion passing/typecheck4.wast

test/script/reference.t

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@
7474
$ owi script --no-exhaustion reference/memory_trap.wast
7575
$ owi script --no-exhaustion reference/memory.wast
7676
$ owi script --no-exhaustion reference/names.wast
77-
42
78-
123
77+
unknown operator unexpected character `"\""`
78+
[23]
7979
$ owi script --no-exhaustion reference/nop.wast
8080
$ owi script --no-exhaustion reference/ref_func.wast
8181
$ owi script --no-exhaustion reference/ref_is_null.wast

test/script/reference_opt.t

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@
7474
$ owi script --no-exhaustion --optimize reference/memory_trap.wast
7575
$ owi script --no-exhaustion --optimize reference/memory.wast
7676
$ owi script --no-exhaustion --optimize reference/names.wast
77-
42
78-
123
77+
unknown operator unexpected character `"\""`
78+
[23]
7979
$ owi script --no-exhaustion --optimize reference/nop.wast
8080
$ owi script --no-exhaustion --optimize reference/ref_func.wast
8181
$ owi script --no-exhaustion --optimize reference/ref_is_null.wast

0 commit comments

Comments
 (0)