Skip to content

Commit cb01918

Browse files
authored
Update the vendored odoc-parser (#2757)
Update to the new Odoc features: - @toc_status and @order_category - Structured code block tags syntax - Indentation of code and verbatim blocks Also backported formatting changes and code changes. * odoc-parser: Code block and verbatim blocks indentation change Odoc now strips the indentation from code and verbatim blocks. It doesn't use the indentation of the least indented line, like OCamlformat, but instead use the indentation of the block opening. * odoc: Normalize `\\n` into newline In code blocks, newlines can be turned into `\\n` (or the opposite) when formatting string literals that need to break. Update the normalization function to avoid crashing when that happens. * odoc: Remove extra indentation in code blocks The updated Odoc parser considers that code blocks horizontally start at the opening bracket. It no longer uses the indentation of the least indented line for that. As a result, OCamlformat cannot indent code blocks without changing their content. The indentation is also removed in code blocks that are formatted, to avoid adding visible indentation in rendered documentation. * odoc: Don't fail for removed whitespaces in code blocks During normalisation, allow whitespaces to disappear from code blocks. This happens when code if formatted.
1 parent 25d912f commit cb01918

39 files changed

+991
-717
lines changed

CHANGES.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ profile. This started with version 0.26.0.
66

77
## unreleased
88

9+
### Highlight
10+
11+
- \* Update Odoc's parser to 3.0 (#2757, @Julow)
12+
The indentation of code-blocks containing OCaml code is reduced by 2 to avoid
13+
changing the generated documentation. The indentation within code-blocks is
14+
now significative in Odoc and shows up in generated documentation.
15+
916
### Added
1017

1118
- Added option `letop-punning` (#2746, @WardBrian) to control whether

lib/Docstring.ml

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,55 @@ let is_tag_only =
3737

3838
type norm_conf = {normalize_code: string -> string}
3939

40-
let normalize_text s =
40+
(** Like [String.fields] but the separator can be of variable length.
41+
[f start_index str] returns [0] if [str.[start_index]] is not the first
42+
char of a separator, or returns the length of the separator.
43+
[start_index] is always less than [String.length s]. *)
44+
let fields_pattern ?(empty = true) ~is_sep s =
45+
let module Sub = Astring.String.Sub in
46+
let len = String.length s in
47+
(* Add a sub at the front of [acc] while respecting [empty]. *)
48+
let sub' ~start ~stop acc =
49+
if start = stop && not empty then acc else Sub.v ~start ~stop s :: acc
50+
in
51+
let rec loop acc field_start i =
52+
if i = len then List.rev (sub' ~start:field_start ~stop:i acc)
53+
else
54+
match is_sep i s with
55+
| 0 -> loop acc field_start (i + 1)
56+
| sep_len ->
57+
let next = i + sep_len in
58+
loop (sub' ~start:field_start ~stop:i acc) next next
59+
in
60+
loop [] 0 0
61+
62+
let normalize_text_subs s =
4163
(* normalize consecutive whitespace chars to a single space *)
42-
String.concat ~sep:" "
43-
(List.filter ~f:(Fn.non String.is_empty)
44-
(String.split_on_chars s ~on:['\t'; '\n'; '\011'; '\012'; '\r'; ' ']) )
64+
let is_sep i s =
65+
match s.[i] with
66+
| '\t' | '\n' | '\011' | '\012' | '\r' | ' ' -> 1
67+
| '\\' when i + 1 < String.length s -> (
68+
match s.[i + 1] with 'n' -> 2 | _ -> 0 )
69+
| _ -> 0
70+
in
71+
fields_pattern ~empty:false ~is_sep s
72+
73+
let normalize_text s =
74+
let module Sub = Astring.String.Sub in
75+
let sep = Sub.v " " in
76+
Sub.concat ~sep (normalize_text_subs s) |> Sub.to_string
4577

4678
let list f fmt l =
47-
let pp_sep fmt () = Format.fprintf fmt "" in
79+
let pp_sep _ () = () in
4880
Format.pp_print_list ~pp_sep f fmt l
4981

50-
let str fmt s = Format.fprintf fmt "%s" (normalize_text s)
82+
let str_with_sep ~pp_sep fmt s =
83+
Format.pp_print_list ~pp_sep Astring.String.Sub.pp fmt
84+
(normalize_text_subs s)
85+
86+
let str fmt s =
87+
let pp_sep fmt () = Format.pp_print_string fmt " " in
88+
str_with_sep ~pp_sep fmt s
5189

5290
let ign_loc f fmt with_loc = f fmt with_loc.Odoc_parser.Loc.value
5391

@@ -105,17 +143,22 @@ let fmt_media_href fmt = function
105143
| `Reference s -> fpf fmt "Reference(%s)" s
106144
| `Link s -> fpf fmt "Link(%s)" s
107145

146+
let fmt_code_block_tag fmt = function
147+
| `Tag s -> fpf fmt "Tag(%a)" (ign_loc str) s
148+
| `Binding (a, b) ->
149+
fpf fmt "Binding(%a, %a)" (ign_loc str) a (ign_loc str) b
150+
108151
let rec odoc_nestable_block_element c fmt : Ast.nestable_block_element -> _ =
109152
function
110153
| `Paragraph elms -> fpf fmt "Paragraph(%a)" odoc_inline_elements elms
111154
| `Code_block (b : Ast.code_block) ->
112155
let fmt_metadata fmt (m : Ast.code_block_meta) =
113-
fpf fmt "(%a, %a)" (ign_loc str) m.language
114-
(option (ign_loc str))
156+
fpf fmt "(%a, %a)" (ign_loc str) m.language (list fmt_code_block_tag)
115157
m.tags
116158
in
117159
let fmt_content =
118-
ign_loc (fun fmt s -> str fmt (c.normalize_code s))
160+
let pp_sep _ () = () in
161+
ign_loc (fun fmt s -> str_with_sep ~pp_sep fmt (c.normalize_code s))
119162
in
120163
let fmt_output =
121164
option (list (ign_loc (odoc_nestable_block_element c)))
@@ -179,6 +222,10 @@ let odoc_tag c fmt : Ast.tag -> unit = function
179222
| `Hidden -> fpf fmt "Hidden"
180223
| `Children_order elems ->
181224
odoc_implicitly_ended_tag c fmt "Children_order" elems
225+
| `Toc_status txt ->
226+
fpf fmt "Toc_status(%a)" (odoc_nestable_block_elements c) txt
227+
| `Order_category txt ->
228+
fpf fmt "Order_category(%a)" (odoc_nestable_block_elements c) txt
182229
| `Short_title elems -> odoc_implicitly_ended_tag c fmt "Short_title" elems
183230

184231
let odoc_block_element c fmt = function

lib/Fmt_odoc.ml

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
(**************************************************************************)
1111

1212
open Fmt
13+
open Ocamlformat_odoc_parser
1314
open Ocamlformat_odoc_parser.Ast
1415
module Loc = Ocamlformat_odoc_parser.Loc
1516

@@ -95,14 +96,23 @@ let rec drop_leading_spaces = function
9596

9697
let ign_loc ~f with_loc = f with_loc.Loc.value
9798

99+
(** Format the content of code and verbatim blocks. *)
100+
let fmt_multiline_text txt =
101+
let fmt_line ~first ~last:_ l =
102+
let l = String.rstrip l in
103+
if first then str l
104+
else if String.length l = 0 then str_as 0 "\n"
105+
else force_break $ str l
106+
in
107+
let lines = String.split_lines txt in
108+
vbox 0 (list_fl lines fmt_line)
109+
98110
let fmt_verbatim_block ~loc s =
99-
let force_break = loc.Loc.start.line < loc.end_.line in
100111
let content =
101-
(* Literal newline to avoid indentation *)
102-
if force_break then wrap (str "\n") force_newline (str s)
103-
else fits_breaks " " "\n" $ str s $ fits_breaks " " ~hint:(0, 0) ""
112+
let s, _warnings = Odoc_parser.verbatim_content loc s in
113+
fmt_multiline_text s
104114
in
105-
hvbox 0 (wrap (str "{v") (str "v}") content)
115+
hvbox 0 (str "{v" $ space_break $ content $ space_break $ str "v}")
106116

107117
let fmt_code_span ~wrap s =
108118
let s = escape_balanced_brackets s in
@@ -300,7 +310,7 @@ and fmt_nestable_block_element c (elm : nestable_block_element with_location)
300310
| `Paragraph elems ->
301311
hovbox 0
302312
(fmt_inline_elements c ~wrap:c.conf.fmt_opts.wrap_docstrings.v elems)
303-
| `Code_block code_block -> fmt_code_block c code_block
313+
| `Code_block code_block -> fmt_code_block ~loc:elm.location c code_block
304314
| `Math_block s -> fmt_math_block s
305315
| `Verbatim s -> fmt_verbatim_block ~loc:elm.location s
306316
| `Modules mods ->
@@ -431,13 +441,19 @@ and fmt_table c table =
431441
| Some light -> fmt_table_light c light
432442
| None -> fmt_table_heavy c table
433443

434-
and fmt_code_block c (b : code_block) =
444+
and fmt_code_block_tag = function
445+
| `Tag t -> ign_loc ~f:str t
446+
| `Binding (k, v) -> ign_loc ~f:str k $ str "=" $ ign_loc ~f:str v
447+
448+
and fmt_code_block c ~loc (b : code_block) =
435449
let content =
436-
let content = b.content.value in
450+
let content, _warnings =
451+
Odoc_parser.codeblock_content loc b.content.value
452+
in
437453
match b.meta with
438454
| Some {language= {value= "ocaml"; _}; _} | None -> (
439455
(* [offset] doesn't take into account code blocks nested into lists. *)
440-
match c.fmt_code c.conf ~offset:2 ~set_margin:true content with
456+
match c.fmt_code c.conf ~offset:0 ~set_margin:true content with
441457
| Ok formatted -> formatted |> Format_.asprintf "%a" Fmt.eval
442458
| Error (`Msg message) ->
443459
if
@@ -451,23 +467,13 @@ and fmt_code_block c (b : code_block) =
451467
content )
452468
| Some _ -> content
453469
in
454-
let fmt_line ~first ~last:_ l =
455-
let l = String.rstrip l in
456-
if first then str l
457-
else if String.length l = 0 then str_as 0 "\n"
458-
else force_break $ str l
459-
in
460-
let fmt_code s =
461-
let lines = String.split_lines s in
462-
vbox 0 (list_fl lines fmt_line)
463-
in
464470
let delim = opt b.delimiter str in
465471
let opening =
466472
let meta =
467473
opt b.meta (fun meta ->
468474
str "@"
469475
$ ign_loc ~f:str meta.language
470-
$ opt meta.tags (fun tags -> str " " $ ign_loc ~f:str tags) )
476+
$ list meta.tags noop (fun t -> char ' ' $ fmt_code_block_tag t) )
471477
in
472478
str "{" $ delim $ meta $ str "["
473479
in
@@ -481,9 +487,11 @@ and fmt_code_block c (b : code_block) =
481487
$ str "]}" )
482488
| None -> str "]" $ delim $ str "}"
483489
in
484-
hvbox 2
485-
( opening $ force_break $ fmt_code content $ break 1 ~-2
486-
$ output_or_closing )
490+
(* The content might contain an indentation when it was not formatted. *)
491+
hvbox 0
492+
( opening $ force_break
493+
$ fmt_multiline_text content
494+
$ space_break $ output_or_closing )
487495

488496
and fmt_nestable_block_elements c elems =
489497
list_block_elem c elems (fmt_nestable_block_element c)
@@ -518,6 +526,8 @@ let fmt_tag c : tag -> _ = function
518526
| `Hidden -> fmt_tag_args c "hidden"
519527
| `Canonical ref -> fmt_tag_args c "canonical" ~arg:(fmt_reference ref)
520528
| `Children_order txt -> fmt_tag_args c "children_order" ~txt
529+
| `Toc_status txt -> fmt_tag_args c "toc_status" ~txt
530+
| `Order_category txt -> fmt_tag_args c "order_category" ~txt
521531
| `Short_title txt -> fmt_tag_args c "short_title" ~txt
522532

523533
let fmt_block_element c elm =

test/passing/refs.ahrefs/doc.mld.ref

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ v}
6868
[foo.ml]
6969

7070
{[
71-
(** I'm foo, a page child to Doe *)
71+
(** I'm foo, a page child to Doe *)
7272
]}
7373

7474
{2 Compilation}
Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,26 @@
11
Block delimiters should be on their own line:
22

33
{[
4-
let x = 1
4+
let x = 1
55
]}
66

77
As of odoc 2.1, a block can carry metadata:
88

99
{@ocaml[
10-
let x = 2
10+
let x = 2
1111
]}
1212

1313
An OCaml block that should break:
1414

1515
{[
16-
let x = 2 in
17-
x + x
16+
let x = 2 in
17+
x + x
1818
]}
1919

2020
A toplevel phrase with no output:
2121

2222
{[
23-
# let x = 2
24-
and y = 3 in
25-
x + y
26-
;;
23+
# let x = 2 and y = 3 in x+y;;
2724
]}
2825

2926
A toplevel phrase with output:
@@ -38,10 +35,7 @@ Many toplevel phrases without output:
3835
{[
3936
# let x = 2;;
4037
# x + 2;;
41-
# let x = 2
42-
and y = 3 in
43-
x + y
44-
;;
38+
# let x = 2 and y = 3 in x+y;;
4539
]}
4640

4741
Many toplevel phrases with output:
@@ -51,10 +45,7 @@ Many toplevel phrases with output:
5145
val x : int = 2
5246
# x + 2;;
5347
- : int = 4
54-
# let x = 2
55-
and y = 3 in
56-
x + y
57-
;;
48+
# let x = 2 and y = 3 in x+y;;
5849
]}
5950

6051
Output are printed after a newline:
@@ -68,25 +59,22 @@ Output are printed after a newline:
6859
Excessive linebreaks are removed:
6960

7061
{[
71-
# let x = 2 in
72-
x + 1
73-
;;
62+
# let x = 2 in x+1;;
63+
7464
output
75-
# let y = 3 in
76-
y + 1
77-
;;
65+
66+
# let y = 3 in y+1;;
7867
]}
7968

8069
Linebreak after `#`:
8170

8271
{[
83-
# let x = 2 in
84-
x + 1
85-
;;
72+
#
73+
let x = 2 in x+1;;
8674
]}
8775

8876
Invalid toplevel phrase/ocaml block:
8977
{[
90-
- : int =
91-
4
78+
- : int =
79+
4
9280
]}

0 commit comments

Comments
 (0)