Skip to content

Commit a91114f

Browse files
author
Nathan Rebours
committed
Fix pprintast with ocaml/ocaml#13604 and ocaml/ocaml#14279
This imports upstream patches to pprintast that were done post 5.2 and improves pprintast's handling of raw identifiers. Hopefully that should fix the issue that fstar folks ran into with the 0.36.2 release. Signed-off-by: Nathan Rebours <[email protected]>
1 parent 18bd96f commit a91114f

File tree

3 files changed

+129
-63
lines changed

3 files changed

+129
-63
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ unreleased
77

88
### Other Changes
99

10+
- Fix a bug where some infix operators such as `mod` would be printed as
11+
raw identifiers by our `Pprintast`. (#601, @NathanReb)
12+
1013
- Fix 5.2 -> 5.3 migration of constants. Those used to always have a `none`
1114
location which can lead to unhelpful error messages.
1215
(#569, @NathanReb)

astlib/pprintast.ml

Lines changed: 125 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
[Ppat_constraint (p, typ)] in [value_binding] patterns as if they were encoded
3232
using the new [pvb_constraint] field instead of producing incorrect syntax as
3333
the compiler version does.
34+
- Added ocaml#13604 and ocaml#14279 to better handle raw identifiers
3435
*)
3536

3637
open Ast_502
@@ -90,11 +91,44 @@ let first_is c str = str <> "" && str.[0] = c
9091
let last_is c str = str <> "" && str.[String.length str - 1] = c
9192
let first_is_in cs str = str <> "" && List.mem str.[0] cs
9293

94+
(** The OCaml grammar generates [longident]s from five different rules:
95+
- module longident (a sequence of uppercase identifiers [A.B.C])
96+
- constructor longident, either
97+
- a module [longident]
98+
- [[]], [()], [true], [false]
99+
- an optional module [longident] followed by [(::)] ([A.B.(::)])
100+
- class longident, an optional module [longident] followed by a lowercase
101+
identifier.
102+
- value longident, an optional module [longident] followed by either:
103+
- a lowercase identifier ([A.x])
104+
- an operator (and in particular the [mod] keyword), ([A.(+), B.(mod)])
105+
- type [longident]: a tree of applications and projections of uppercase
106+
identifiers followed by a projection ending with a lowercase identifier
107+
(for ordinary types), or any identifier (for module types) (e.g
108+
[A.B(C.D(E.F).K)(G).X.Y.t]) All these [longident]s share a common core and
109+
optionally add some extensions. Unfortunately, these extensions intersect
110+
while having different escaping and parentheses rules depending on the
111+
kind of [longident]:
112+
- [true] or [false] can be either constructor [longident]s, or value, type
113+
or class [longident]s using the raw identifier syntax.
114+
- [mod] can be either an operator value [longident], or a class or type
115+
[longident] using the raw identifier syntax. Thus in order to print
116+
correctly [longident]s, we need to keep track of their kind using the
117+
context in which they appear. *)
118+
type longindent_kind =
119+
| Constr (** variant constructors *)
120+
| Type (** core types, module types, class types, and classes *)
121+
| Value (** values *)
122+
| Other (** modules, classes *)
123+
93124
(* which identifiers are in fact operators needing parentheses *)
94-
let needs_parens txt =
95-
let fix = fixity_of_string txt in
96-
is_infix fix || is_mixfix fix || is_kwdop fix
97-
|| first_is_in prefix_symbols txt
125+
let needs_parens ~kind txt =
126+
match kind with
127+
| Type -> false
128+
| Constr | Value | Other ->
129+
let fix = fixity_of_string txt in
130+
is_infix fix || is_mixfix fix || is_kwdop fix
131+
|| first_is_in prefix_symbols txt
98132

99133
(* some infixes need spaces around parens to avoid clashes with comment
100134
syntax *)
@@ -103,24 +137,48 @@ let needs_spaces txt = first_is '*' txt || last_is '*' txt
103137
(* Turn an arbitrary variable name into a valid OCaml identifier by adding \#
104138
in case it is a keyword, or parenthesis when it is an infix or prefix
105139
operator. *)
106-
let ident_of_name ppf txt =
140+
let ident_of_name ~kind ppf txt =
107141
let format : (_, _, _) format =
108-
if Keyword.is_keyword txt then "\\#%s"
109-
else if not (needs_parens txt) then "%s"
142+
if Keyword.is_keyword txt then
143+
match (kind, txt) with
144+
| Constr, ("true" | "false") -> "%s"
145+
| Value, s ->
146+
if List.mem s special_infix_strings then
147+
(* Special case for infix keywords [mod], [lsl] and friends *)
148+
"(%s)"
149+
else "\\#%s"
150+
| Type, _ | Constr, _ | Other, _ -> "\\#%s"
151+
else if not (needs_parens ~kind txt) then "%s"
110152
else if needs_spaces txt then "(@;%s@;)"
111153
else "(%s)"
112154
in
113155
fprintf ppf format txt
114156

115-
let ident_of_name_loc ppf s = ident_of_name ppf s.txt
116-
117-
let protect_longident ppf print_longident longprefix txt =
118-
if not (needs_parens txt) then
119-
fprintf ppf "%a.%a" print_longident longprefix ident_of_name txt
157+
let protect_longident ~kind ppf print_longident longprefix txt =
158+
if not (needs_parens ~kind txt) then
159+
fprintf ppf "%a.%a" print_longident longprefix (ident_of_name ~kind) txt
120160
else if needs_spaces txt then
121161
fprintf ppf "%a.(@;%s@;)" print_longident longprefix txt
122162
else fprintf ppf "%a.(%s)" print_longident longprefix txt
123163

164+
let rec any_longident ~kind f = function
165+
| Lident s -> ident_of_name ~kind f s
166+
| Ldot (y, s) -> protect_longident ~kind f (any_longident ~kind:Other) y s
167+
| Lapply (y, s) ->
168+
fprintf f "%a(%a)"
169+
(any_longident ~kind:Other)
170+
y
171+
(any_longident ~kind:Other)
172+
s
173+
174+
let value_longident ppf l = any_longident ~kind:Value ppf l
175+
let longident ppf l = any_longident ~kind:Other ppf l
176+
let constr ppf l = any_longident ~kind:Constr ppf l
177+
let type_longident ppf l = any_longident ~kind:Type ppf l
178+
let ident_of_value_name ppf i = ident_of_name ~kind:Value ppf i
179+
let ident_of_name ppf i = ident_of_name ~kind:Other ppf i
180+
let ident_of_name_loc ppf s = ident_of_name ppf s.txt
181+
124182
type space_formatter = (unit, Format.formatter, unit) format
125183

126184
let override = function Override -> "!" | Fresh -> ""
@@ -145,10 +203,10 @@ type construct =
145203

146204
let view_expr x =
147205
match x.pexp_desc with
148-
| Pexp_construct ({ txt = Lident "()"; _ }, _) -> `tuple
149-
| Pexp_construct ({ txt = Lident "true"; _ }, _) -> `btrue
150-
| Pexp_construct ({ txt = Lident "false"; _ }, _) -> `bfalse
151-
| Pexp_construct ({ txt = Lident "[]"; _ }, _) -> `nil
206+
| Pexp_construct ({ txt = Lident "()"; _ }, None) -> `tuple
207+
| Pexp_construct ({ txt = Lident "true"; _ }, None) -> `btrue
208+
| Pexp_construct ({ txt = Lident "false"; _ }, None) -> `bfalse
209+
| Pexp_construct ({ txt = Lident "[]"; _ }, None) -> `nil
152210
| Pexp_construct ({ txt = Lident "::"; _ }, Some _) ->
153211
let rec loop exp acc =
154212
match exp with
@@ -261,12 +319,8 @@ let paren :
261319
pp f ")")
262320
else fu f x
263321

264-
let rec longident f = function
265-
| Lident s -> ident_of_name f s
266-
| Ldot (y, s) -> protect_longident f longident y s
267-
| Lapply (y, s) -> pp f "%a(%a)" longident y longident s
268-
269-
let longident_loc f x = pp f "%a" longident x.txt
322+
let with_loc pr ppf x = pr ppf x.txt
323+
let longident_loc = with_loc longident
270324

271325
let constant f = function
272326
| Pconst_char i -> pp f "%C" i
@@ -358,7 +412,7 @@ and core_type1 ctxt f x =
358412
| [] -> ()
359413
| [ x ] -> pp f "%a@;" (core_type1 ctxt) x
360414
| _ -> list ~first:"(" ~last:")@;" (core_type ctxt) ~sep:",@;" f l)
361-
l longident_loc li
415+
l (with_loc type_longident) li
362416
| Ptyp_variant (l, closed, low) ->
363417
let first_is_inherit =
364418
match l with
@@ -416,16 +470,16 @@ and core_type1 ctxt f x =
416470
(*FIXME*)
417471
pp f "@[<hov2>%a#%a@]"
418472
(list (core_type ctxt) ~sep:"," ~first:"(" ~last:")")
419-
l longident_loc li
473+
l (with_loc type_longident) li
420474
| Ptyp_package (lid, cstrs) -> (
421475
let aux f (s, ct) =
422-
pp f "type %a@ =@ %a" longident_loc s (core_type ctxt) ct
476+
pp f "type %a@ =@ %a" (with_loc type_longident) s (core_type ctxt) ct
423477
in
424478
match cstrs with
425-
| [] -> pp f "@[<hov2>(module@ %a)@]" longident_loc lid
479+
| [] -> pp f "@[<hov2>(module@ %a)@]" (with_loc type_longident) lid
426480
| _ ->
427-
pp f "@[<hov2>(module@ %a@ with@ %a)@]" longident_loc lid
428-
(list aux ~sep:"@ and@ ") cstrs)
481+
pp f "@[<hov2>(module@ %a@ with@ %a)@]" (with_loc type_longident)
482+
lid (list aux ~sep:"@ and@ ") cstrs)
429483
| Ptyp_open (li, ct) ->
430484
pp f "@[<hov2>%a.(%a)@]" longident_loc li (core_type ctxt) ct
431485
| Ptyp_extension e -> extension ctxt f e
@@ -501,11 +555,11 @@ and simple_pattern ctxt (f : Format.formatter) (x : pattern) : unit =
501555
({ txt = Lident (("()" | "[]" | "true" | "false") as x); _ }, None) ->
502556
pp f "%s" x
503557
| Ppat_any -> pp f "_"
504-
| Ppat_var { txt; _ } -> ident_of_name f txt
558+
| Ppat_var { txt; _ } -> ident_of_value_name f txt
505559
| Ppat_array l -> pp f "@[<2>[|%a|]@]" (list (pattern1 ctxt) ~sep:";") l
506560
| Ppat_unpack { txt = None } -> pp f "(module@ _)@ "
507561
| Ppat_unpack { txt = Some s } -> pp f "(module@ %s)@ " s
508-
| Ppat_type li -> pp f "#%a" longident_loc li
562+
| Ppat_type li -> pp f "#%a" (with_loc type_longident) li
509563
| Ppat_record (l, closed) -> (
510564
let longident_x_pattern f (li, p) =
511565
match (li, p) with
@@ -811,7 +865,7 @@ and expression ctxt f x =
811865
in
812866
let lst = sequence_helper [] x in
813867
pp f "@[<hv>%a@]" (list (expression (under_semi ctxt)) ~sep:";@;") lst
814-
| Pexp_new li -> pp f "@[<hov2>new@ %a@]" longident_loc li
868+
| Pexp_new li -> pp f "@[<hov2>new@ %a@]" (with_loc type_longident) li
815869
| Pexp_setinstvar (s, e) ->
816870
pp f "@[<hov2>%a@ <-@ %a@]" ident_of_name s.txt (expression ctxt) e
817871
| Pexp_override l ->
@@ -882,9 +936,9 @@ and simple_expr ctxt f x =
882936
pp f "@[<hv0>[%a]@]"
883937
(list (expression (under_semi ctxt)) ~sep:";@;")
884938
xs
885-
| `simple x -> longident f x
939+
| `simple x -> constr f x
886940
| _ -> assert false)
887-
| Pexp_ident li -> longident_loc f li
941+
| Pexp_ident li -> with_loc value_longident f li
888942
(* (match view_fixity_of_exp x with *)
889943
(* |`Normal -> longident_loc f li *)
890944
(* | `Prefix _ | `Infix _ -> pp f "( %a )" longident_loc li) *)
@@ -1003,7 +1057,7 @@ and class_type ctxt f x =
10031057
match l with
10041058
| [] -> ()
10051059
| _ -> pp f "[%a]@ " (list (core_type ctxt) ~sep:",") l)
1006-
l longident_loc li (attributes ctxt) x.pcty_attributes
1060+
l (with_loc type_longident) li (attributes ctxt) x.pcty_attributes
10071061
| Pcty_arrow (l, co, cl) ->
10081062
pp f "@[<2>%a@;->@;%a@]" (* FIXME remove parens later *)
10091063
(type_with_label ctxt) (l, co) (class_type ctxt) cl
@@ -1054,7 +1108,7 @@ and class_field ctxt f x =
10541108
(core_type ctxt) ct (item_attributes ctxt) x.pcf_attributes
10551109
| Pcf_method (s, pf, Cfk_concrete (ovf, e)) ->
10561110
let bind e =
1057-
binding ctxt f
1111+
binding ~is_method:true ctxt f
10581112
{
10591113
pvb_pat =
10601114
{
@@ -1122,7 +1176,7 @@ and class_expr ctxt f x =
11221176
pp f "%a%a"
11231177
(fun f l ->
11241178
if l <> [] then pp f "[%a]@ " (list (core_type ctxt) ~sep:",") l)
1125-
l longident_loc li
1179+
l (with_loc type_longident) li
11261180
| Pcl_constraint (ce, ct) ->
11271181
pp f "(%a@ :@ %a)" (class_expr ctxt) ce (class_type ctxt) ct
11281182
| Pcl_extension e -> extension ctxt f e
@@ -1157,26 +1211,28 @@ and module_type ctxt f x =
11571211

11581212
and with_constraint ctxt f = function
11591213
| Pwith_type (li, ({ ptype_params = ls; _ } as td)) ->
1160-
pp f "type@ %a %a =@ %a" (type_params ctxt) ls longident_loc li
1161-
(type_declaration ctxt) td
1214+
pp f "type@ %a %a =@ %a" (type_params ctxt) ls (with_loc type_longident)
1215+
li (type_declaration ctxt) td
11621216
| Pwith_module (li, li2) ->
11631217
pp f "module %a =@ %a" longident_loc li longident_loc li2
11641218
| Pwith_modtype (li, mty) ->
1165-
pp f "module type %a =@ %a" longident_loc li (module_type ctxt) mty
1219+
pp f "module type %a =@ %a" (with_loc type_longident) li
1220+
(module_type ctxt) mty
11661221
| Pwith_typesubst (li, ({ ptype_params = ls; _ } as td)) ->
1167-
pp f "type@ %a %a :=@ %a" (type_params ctxt) ls longident_loc li
1168-
(type_declaration ctxt) td
1222+
pp f "type@ %a %a :=@ %a" (type_params ctxt) ls (with_loc type_longident)
1223+
li (type_declaration ctxt) td
11691224
| Pwith_modsubst (li, li2) ->
11701225
pp f "module %a :=@ %a" longident_loc li longident_loc li2
11711226
| Pwith_modtypesubst (li, mty) ->
1172-
pp f "module type %a :=@ %a" longident_loc li (module_type ctxt) mty
1227+
pp f "module type %a :=@ %a" (with_loc type_longident) li
1228+
(module_type ctxt) mty
11731229

11741230
and module_type1 ctxt f x =
11751231
if x.pmty_attributes <> [] then module_type ctxt f x
11761232
else
11771233
match x.pmty_desc with
1178-
| Pmty_ident li -> pp f "%a" longident_loc li
1179-
| Pmty_alias li -> pp f "(module %a)" longident_loc li
1234+
| Pmty_ident li -> pp f "%a" (with_loc type_longident) li
1235+
| Pmty_alias li -> pp f "(module %a)" (with_loc type_longident) li
11801236
| Pmty_signature s ->
11811237
pp f "@[<hv0>@[<hv2>sig@ %a@]@ end@]" (* "@[<hov>sig@ %a@ end@]" *)
11821238
(list (signature_item ctxt))
@@ -1242,7 +1298,7 @@ and signature_item ctxt f x : unit =
12421298
pp f "@[<hov2>include@ %a@]%a" (module_type ctxt) incl.pincl_mod
12431299
(item_attributes ctxt) incl.pincl_attributes
12441300
| Psig_modtype { pmtd_name = s; pmtd_type = md; pmtd_attributes = attrs } ->
1245-
pp f "@[<hov2>module@ type@ %s%a@]%a" s.txt
1301+
pp f "@[<hov2>module@ type@ %a%a@]%a" ident_of_name s.txt
12461302
(fun f md ->
12471303
match md with
12481304
| None -> ()
@@ -1329,7 +1385,8 @@ and payload ctxt f = function
13291385
expression ctxt f e
13301386

13311387
(* transform [f = fun g h -> ..] to [f g h = ... ] could be improved *)
1332-
and binding ctxt f { pvb_pat = p; pvb_expr = x; pvb_constraint = ct; _ } =
1388+
and binding ?(is_method = false) ctxt f
1389+
{ pvb_pat = p; pvb_expr = x; pvb_constraint = ct; _ } =
13331390
(* .pvb_attributes have already been printed by the caller, #bindings *)
13341391
let rec pp_print_pexp_function f x =
13351392
if x.pexp_attributes <> [] then pp f "=@;%a" (expression ctxt) x
@@ -1341,31 +1398,37 @@ and binding ctxt f { pvb_pat = p; pvb_expr = x; pvb_constraint = ct; _ } =
13411398
pp f "(type@ %a)@ %a" ident_of_name str.txt pp_print_pexp_function e
13421399
| _ -> pp f "=@;%a" (expression ctxt) x
13431400
in
1344-
match (ct, p) with
1345-
| ( None,
1346-
{
1347-
ppat_attributes = [];
1348-
ppat_desc =
1349-
Ppat_constraint
1350-
(({ ppat_desc = Ppat_var _; ppat_attributes = [] } as p), typ);
1351-
} )
1352-
| Some (Pvc_constraint { locally_abstract_univars = []; typ }), p ->
1401+
match ct with
1402+
| Some (Pvc_constraint { locally_abstract_univars = []; typ }) ->
13531403
pp f "%a@;:@;%a@;=@;%a" (simple_pattern ctxt) p (core_type ctxt) typ
13541404
(expression ctxt) x
1355-
| Some (Pvc_constraint { locally_abstract_univars = vars; typ }), _ ->
1405+
| Some (Pvc_constraint { locally_abstract_univars = vars; typ }) ->
13561406
pp f "%a@;: type@;%a.@;%a@;=@;%a" (simple_pattern ctxt) p
13571407
(list pp_print_string ~sep:"@;")
13581408
(List.map (fun x -> x.txt) vars)
13591409
(core_type ctxt) typ (expression ctxt) x
1360-
| Some (Pvc_coercion { ground = None; coercion }), _ ->
1410+
| Some (Pvc_coercion { ground = None; coercion }) ->
13611411
pp f "%a@;:>@;%a@;=@;%a" (simple_pattern ctxt) p (core_type ctxt) coercion
13621412
(expression ctxt) x
1363-
| Some (Pvc_coercion { ground = Some ground; coercion }), _ ->
1413+
| Some (Pvc_coercion { ground = Some ground; coercion }) ->
13641414
pp f "%a@;:%a@;:>@;%a@;=@;%a" (simple_pattern ctxt) p (core_type ctxt)
13651415
ground (core_type ctxt) coercion (expression ctxt) x
1366-
| None, { ppat_desc = Ppat_var _; ppat_attributes = [] } ->
1367-
pp f "%a@ %a" (simple_pattern ctxt) p pp_print_pexp_function x
1368-
| _, _ -> pp f "%a@;=@;%a" (pattern ctxt) p (expression ctxt) x
1416+
| None -> (
1417+
match p with
1418+
| {
1419+
ppat_attributes = [];
1420+
ppat_desc =
1421+
Ppat_constraint
1422+
(({ ppat_desc = Ppat_var _; ppat_attributes = [] } as p), typ);
1423+
} ->
1424+
pp f "%a@;:@;%a@;=@;%a" (simple_pattern ctxt) p (core_type ctxt) typ
1425+
(expression ctxt) x
1426+
| { ppat_desc = Ppat_var { txt; _ }; ppat_attributes = [] } ->
1427+
if is_method then
1428+
(* [mod] is valid pattern variable but not a valid method name *)
1429+
pp f "%a@ %a" ident_of_name txt pp_print_pexp_function x
1430+
else pp f "%a@ %a" (simple_pattern ctxt) p pp_print_pexp_function x
1431+
| _ -> pp f "%a@;=@;%a" (pattern ctxt) p (expression ctxt) x)
13691432

13701433
(* [in] is not printed *)
13711434
and bindings ctxt f (rf, l) =
@@ -1439,7 +1502,7 @@ and structure_item ctxt f x =
14391502
(module_expr ctxt) od.popen_expr (item_attributes ctxt)
14401503
od.popen_attributes
14411504
| Pstr_modtype { pmtd_name = s; pmtd_type = md; pmtd_attributes = attrs } ->
1442-
pp f "@[<hov2>module@ type@ %s%a@]%a" s.txt
1505+
pp f "@[<hov2>module@ type@ %a%a@]%a" ident_of_name s.txt
14431506
(fun f md ->
14441507
match md with
14451508
| None -> ()

test/pprintast/raw_identifiers/test.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,5 @@ let stri =
3838

3939
Format.asprintf "%a" Pprintast.structure_item stri
4040
[%%expect{|
41-
- : string = "let f = \\#mod"
41+
- : string = "let f = (mod)"
4242
|}]

0 commit comments

Comments
 (0)