1212open Ocamlformat_parser_extended
1313include Parsetree
1414
15- let equal_core_type : core_type -> core_type -> bool = Poly. equal
16-
1715type use_file = toplevel_phrase list
1816
1917type repl_file = repl_phrase list
2018
19+ module Std_parsetree = Ocamlformat_parser_standard. Parsetree
20+
21+ type ('a, 'b) paired = {extended : 'a ; std : 'b }
22+
2123type 'a t =
22- | Structure : structure t
23- | Signature : signature t
24- | Use_file : use_file t
25- | Core_type : core_type t
26- | Module_type : module_type t
27- | Expression : expression t
28- | Pattern : pattern t
24+ | Structure : ( structure , Std_parsetree .structure ) paired t
25+ | Signature : ( signature , Std_parsetree .signature ) paired t
26+ | Use_file : ( use_file , Std_parsetree .toplevel_phrase list ) paired t
27+ | Core_type : ( core_type , Std_parsetree .core_type ) paired t
28+ | Module_type : ( module_type , Std_parsetree .module_type ) paired t
29+ | Expression : ( expression , Std_parsetree .expression ) paired t
30+ | Pattern : ( pattern , Std_parsetree .pattern ) paired t
2931 | Repl_file : repl_file t
3032 | Documentation : Ocamlformat_odoc_parser.Ast .t t
3133
@@ -42,17 +44,16 @@ let of_syntax = function
4244 | Repl_file -> Any Repl_file
4345 | Documentation -> Any Documentation
4446
45- let equal (type a ) (_ : a t ) : a -> a -> bool = Poly. equal
46-
4747let map (type a ) (x : a t ) (m : Ast_mapper.mapper ) : a -> a =
4848 match x with
49- | Structure -> m.structure m
50- | Signature -> m.signature m
51- | Use_file -> List. map ~f: (m.toplevel_phrase m)
52- | Core_type -> m.typ m
53- | Module_type -> m.module_type m
54- | Expression -> m.expr m
55- | Pattern -> m.pat m
49+ | Structure -> fun v -> {v with extended= m.structure m v.extended}
50+ | Signature -> fun v -> {v with extended= m.signature m v.extended}
51+ | Use_file ->
52+ fun v -> {v with extended= List. map ~f: (m.toplevel_phrase m) v.extended}
53+ | Core_type -> fun v -> {v with extended= m.typ m v.extended}
54+ | Module_type -> fun v -> {v with extended= m.module_type m v.extended}
55+ | Expression -> fun v -> {v with extended= m.expr m v.extended}
56+ | Pattern -> fun v -> {v with extended= m.pat m v.extended}
5657 | Repl_file -> List. map ~f: (m.repl_phrase m)
5758 | Documentation -> Fn. id
5859
@@ -346,23 +347,51 @@ module Parse = struct
346347
347348 let ast (type a ) (fg : a t ) ~ocaml_version ~preserve_beginend
348349 ~prefer_let_puns ~input_name str : a =
349- map fg
350- ( normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns )
351- @@
350+ let nm =
351+ normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns
352+ in
352353 let lexbuf = Lexing. from_string str in
353- let ocaml_version =
354+ let ocaml_version_pair =
354355 Some Ocaml_version. (major ocaml_version, minor ocaml_version)
355356 in
356357 Location. init_info lexbuf input_name ;
358+ let parse_std std_fg =
359+ (* Suppress warnings during raw std parse to avoid duplicate w50
360+ warnings — w50 handling is done separately by parse_ocaml *)
361+ Warning. with_warning_filter
362+ ~filter_warning: (fun _loc _warn -> false )
363+ ~filter_alert: (fun _loc _alert -> false )
364+ ~f: (fun () -> Std_ast.Parse. ast std_fg ~ocaml_version ~input_name str)
365+ in
366+ let paired normalize parse_ext std_fg =
367+ let extended =
368+ normalize nm (parse_ext ~ocaml_version: ocaml_version_pair lexbuf)
369+ in
370+ {extended; std= parse_std std_fg}
371+ in
357372 match fg with
358- | Structure -> Parse. implementation ~ocaml_version lexbuf
359- | Signature -> Parse. interface ~ocaml_version lexbuf
360- | Use_file -> Parse. use_file ~ocaml_version lexbuf
361- | Core_type -> Parse. core_type ~ocaml_version lexbuf
362- | Module_type -> Parse. module_type ~ocaml_version lexbuf
363- | Expression -> Parse. expression ~ocaml_version lexbuf
364- | Pattern -> Parse. pattern ~ocaml_version lexbuf
365- | Repl_file -> Toplevel_lexer. repl_file ~ocaml_version lexbuf
373+ | Structure ->
374+ paired
375+ (fun nm -> nm.structure nm)
376+ Parse. implementation Std_ast. Structure
377+ | Signature ->
378+ paired (fun nm -> nm.signature nm) Parse. interface Std_ast. Signature
379+ | Use_file ->
380+ paired
381+ (fun nm -> List. map ~f: (nm.toplevel_phrase nm))
382+ Parse. use_file Std_ast. Use_file
383+ | Core_type ->
384+ paired (fun nm -> nm.typ nm) Parse. core_type Std_ast. Core_type
385+ | Module_type ->
386+ paired
387+ (fun nm -> nm.module_type nm)
388+ Parse. module_type Std_ast. Module_type
389+ | Expression ->
390+ paired (fun nm -> nm.expr nm) Parse. expression Std_ast. Expression
391+ | Pattern -> paired (fun nm -> nm.pat nm) Parse. pattern Std_ast. Pattern
392+ | Repl_file ->
393+ List. map ~f: (nm.repl_phrase nm)
394+ (Toplevel_lexer. repl_file ~ocaml_version: ocaml_version_pair lexbuf)
366395 | Documentation ->
367396 let pos = (Location. curr lexbuf).loc_start in
368397 let pos = {pos with pos_fname= input_name} in
@@ -377,13 +406,13 @@ module Printast = struct
377406 let repl_file = Format. pp_print_list repl_phrase
378407
379408 let ast (type a ) : a t -> _ -> a -> _ = function
380- | Structure -> implementation
381- | Signature -> interface
382- | Use_file -> use_file
383- | Core_type -> core_type
384- | Module_type -> module_type
385- | Expression -> expression
386- | Pattern -> pattern
409+ | Structure -> fun fmt v -> implementation fmt v.extended
410+ | Signature -> fun fmt v -> interface fmt v.extended
411+ | Use_file -> fun fmt v -> use_file fmt v.extended
412+ | Core_type -> fun fmt v -> core_type fmt v.extended
413+ | Module_type -> fun fmt v -> module_type fmt v.extended
414+ | Expression -> fun fmt v -> expression fmt v.extended
415+ | Pattern -> fun fmt v -> pattern fmt v.extended
387416 | Repl_file -> repl_file
388417 | Documentation -> Docstring. dump
389418end
@@ -395,3 +424,216 @@ module Asttypes = struct
395424
396425 let is_recursive = function Recursive -> true | Nonrecursive -> false
397426end
427+
428+ type std_value = Std_value : 'a Std_ast .t * 'a -> std_value
429+
430+ let get_std (type a ) (fg : a t ) (v : a ) : std_value option =
431+ match fg with
432+ | Structure -> Some (Std_value (Structure , v.std))
433+ | Signature -> Some (Std_value (Signature , v.std))
434+ | Use_file -> Some (Std_value (Use_file , v.std))
435+ | Core_type -> Some (Std_value (Core_type , v.std))
436+ | Module_type -> Some (Std_value (Module_type , v.std))
437+ | Expression -> Some (Std_value (Expression , v.std))
438+ | Pattern -> Some (Std_value (Pattern , v.std))
439+ | Repl_file -> None
440+ | Documentation -> None
441+
442+ type std_pair = Std_pair : 'a Std_ast .t * 'a * 'a -> std_pair
443+
444+ let get_std_pair (type a ) (fg : a t ) (v1 : a ) (v2 : a ) : std_pair option =
445+ match fg with
446+ | Structure -> Some (Std_pair (Structure , v1.std, v2.std))
447+ | Signature -> Some (Std_pair (Signature , v1.std, v2.std))
448+ | Use_file -> Some (Std_pair (Use_file , v1.std, v2.std))
449+ | Core_type -> Some (Std_pair (Core_type , v1.std, v2.std))
450+ | Module_type -> Some (Std_pair (Module_type , v1.std, v2.std))
451+ | Expression -> Some (Std_pair (Expression , v1.std, v2.std))
452+ | Pattern -> Some (Std_pair (Pattern , v1.std, v2.std))
453+ | Repl_file -> None
454+ | Documentation -> None
455+
456+ let dump (type a ) (fg : a t ) fmt (v : a ) =
457+ match get_std fg v with
458+ | Some (Std_value (std_fg , std_v )) -> Std_ast.Printast. ast std_fg fmt std_v
459+ | None -> Printast. ast fg fmt v
460+
461+ let dump_normalized (type a ) (fg : a t ) ~normalize_code conf fmt (v : a ) =
462+ match get_std fg v with
463+ | Some (Std_value (std_fg , std_v )) ->
464+ Std_ast.Printast. ast std_fg fmt
465+ (Normalize_std_ast. ast std_fg ~normalize_code conf std_v)
466+ | None -> Printast. ast fg fmt v
467+
468+ type ast_check_result =
469+ | Ast_preserved
470+ | Docstrings_moved of Cmt .error list
471+ | Ast_changed
472+
473+ let equivalent (type a ) (fg : a t ) ~normalize_code conf (old_v : a )
474+ (new_v : a ) : ast_check_result =
475+ match get_std_pair fg old_v new_v with
476+ | None ->
477+ (* TODO: Repl_file and Documentation have no std AST, so we skip the
478+ equivalence check.
479+ - Repl_file: each toplevel phrase is OCaml code that could be
480+ validated individually by parsing it with the standard parser.
481+ - Documentation: OCaml code blocks inside .mld files are formatted
482+ but never validated for AST preservation. We should check each
483+ formatted code block by parsing it with the standard parser and
484+ comparing. *)
485+ Ast_preserved
486+ | Some (Std_pair (std_fg , old_std , new_std )) ->
487+ if
488+ Normalize_std_ast. equal std_fg ~normalize_code
489+ ~ignore_doc_comments: (not conf.Conf. opr_opts.comment_check.v)
490+ conf old_std new_std
491+ then Ast_preserved
492+ else if
493+ Normalize_std_ast. equal std_fg ~normalize_code
494+ ~ignore_doc_comments: true conf old_std new_std
495+ then
496+ Docstrings_moved
497+ (Normalize_std_ast. moved_docstrings ~normalize_code std_fg conf
498+ old_std new_std )
499+ else Ast_changed
500+
501+ module Parsed = struct
502+ type 'a t =
503+ {ast : 'a ; comments : Cmt .t list ; prefix : string ; source : Source .t }
504+ end
505+
506+ exception Warning50 of (Location. t * Warnings. t) list
507+
508+ module W = struct
509+ type t = int
510+
511+ let in_lexer : t list = [1 ; 2 ; 3 ; 14 ; 29 ]
512+
513+ let disable x = - abs x
514+
515+ let enable x = abs x
516+
517+ let to_string x =
518+ String. concat ~sep: " " (List. map ~f: (Format. sprintf " %+d" ) x)
519+ end
520+
521+ let tokens lexbuf =
522+ let rec loop acc =
523+ match Lexer. token_with_comments lexbuf with
524+ (* The location in lexbuf are invalid for comments *)
525+ | COMMENT (_ , loc ) as tok -> loop ((tok, loc) :: acc)
526+ | DOCSTRING ds as tok -> loop ((tok, Docstrings. docstring_loc ds) :: acc)
527+ | tok -> (
528+ let loc = Migrate_ast.Location. of_lexbuf lexbuf in
529+ let acc = (tok, loc) :: acc in
530+ match tok with EOF -> List. rev acc | _ -> loop acc )
531+ in
532+ loop []
533+
534+ let fresh_lexbuf source =
535+ let lexbuf = Lexing. from_string source in
536+ Location. init_info lexbuf ! Location. input_name ;
537+ let hash_bang =
538+ Lexer. skip_hash_bang lexbuf ;
539+ let len = lexbuf.lex_last_pos in
540+ String. sub source ~pos: 0 ~len
541+ in
542+ (lexbuf, hash_bang)
543+
544+ let split_hash_bang source =
545+ let lexbuf = Lexing. from_string source in
546+ Location. init_info lexbuf ! Location. input_name ;
547+ Lexer. skip_hash_bang lexbuf ;
548+ let len = lexbuf.lex_last_pos in
549+ let hash_bang = String. sub source ~pos: 0 ~len in
550+ let rest = String. sub source ~pos: len ~len: (String. length source - len) in
551+ (rest, hash_bang)
552+
553+ let collect_comments () =
554+ List. map (Lexer. comments () ) ~f: (function
555+ | `Comment txt , loc -> Cmt. create_comment txt loc
556+ | `Docstring txt , loc -> Cmt. create_docstring txt loc )
557+
558+ let parse_ocaml ?(disable_w50 = false ) ?(disable_deprecated = false ) fg
559+ (conf : Conf.t ) ~input_name ~source =
560+ let warnings =
561+ if conf.opr_opts.quiet.v then List. map ~f: W. disable W. in_lexer else []
562+ in
563+ let warnings = if disable_w50 then warnings else W. enable 50 :: warnings in
564+ ignore @@ Warnings. parse_options false (W. to_string warnings) ;
565+ let w50 = ref [] in
566+ let t =
567+ let source, hash_bang = split_hash_bang source in
568+ Warning. with_warning_filter
569+ ~filter_warning: (fun loc warn ->
570+ if
571+ Warning. is_unexpected_docstring warn
572+ && conf.opr_opts.comment_check.v
573+ then (
574+ w50 := (loc, warn) :: ! w50 ;
575+ false )
576+ else not conf.opr_opts.quiet.v )
577+ ~filter_alert: (fun _loc alert ->
578+ if Warning. is_deprecated_alert alert && disable_deprecated then false
579+ else not conf.opr_opts.quiet.v )
580+ ~f: (fun () ->
581+ let ocaml_version = conf.opr_opts.ocaml_version.v in
582+ let preserve_beginend =
583+ Poly. (conf.fmt_opts.exp_grouping.v = `Preserve )
584+ in
585+ let prefer_let_puns =
586+ match conf.fmt_opts.letop_punning.v with
587+ | `Always -> Some true
588+ | `Never -> Some false
589+ | `Preserve -> None
590+ in
591+ let ast =
592+ Parse. ast fg ~ocaml_version ~preserve_beginend ~prefer_let_puns
593+ ~input_name source
594+ in
595+ let comments = collect_comments () in
596+ Warnings. check_fatal () ;
597+ let tokens =
598+ let lexbuf, _ = fresh_lexbuf source in
599+ tokens lexbuf
600+ in
601+ let source = Source. create ~text: source ~tokens in
602+ {Parsed. ast; comments; prefix= hash_bang; source} )
603+ in
604+ match List. rev ! w50 with [] -> t | w50 -> raise (Warning50 w50)
605+
606+ let parse (type a ) ?disable_w50 ?disable_deprecated (fg : a t ) conf
607+ ~input_name ~source : a Parsed. t =
608+ match fg with
609+ | Documentation ->
610+ let pos = {Lexing. dummy_pos with pos_fname= input_name} in
611+ let ast = Docstring. parse_file pos source in
612+ { Parsed. ast
613+ ; comments= []
614+ ; prefix= " "
615+ ; source= Source. create ~text: source ~tokens: [] }
616+ | fg ->
617+ parse_ocaml ?disable_w50 ?disable_deprecated fg conf ~input_name
618+ ~source
619+
620+ (* * [is_repl_block x] returns whether [x] is a list of REPL phrases and
621+ outputs of the form:
622+
623+ {v
624+ # let this is = some phrase;;
625+ this is some output
626+ v} *)
627+ let is_repl_block x =
628+ String. length x > = 2 && Char. equal x.[0 ] '#' && Char. is_whitespace x.[1 ]
629+
630+ let parse_toplevel ?disable_w50 ?disable_deprecated (conf : Conf.t )
631+ ~input_name ~source =
632+ if is_repl_block source && conf.fmt_opts.parse_toplevel_phrases.v then
633+ Either. Second
634+ (parse ?disable_w50 ?disable_deprecated Repl_file conf ~input_name
635+ ~source )
636+ else
637+ First
638+ (parse ?disable_w50 ?disable_deprecated Use_file conf ~input_name
639+ ~source )
0 commit comments