11module Code_block = struct
2- type metadata = {
3- language_tag : string Odoc_parser.Loc .with_location ;
4- labels : string Odoc_parser.Loc .with_location option ;
5- }
2+ type metadata = { language_tag : string ; labels : string option }
63
74 type t = {
8- location : Odoc_parser.Loc .span ;
95 metadata : metadata option ;
10- contents : string ;
6+ content : Location .t ; (* Location of the content *)
7+ code_block : Location .t ; (* Location of the enclosing code block *)
118 }
129end
1310
14- (* odoc-parser adjusts for the initial [** *)
15- let docstring_start_adjustment = String. length " (**"
16-
17- let drop_last lst =
18- match List. rev lst with
19- | [] -> None
20- | last :: rev_tl -> Some (List. rev rev_tl, last)
11+ (* Parse and extract code block metadata from an odoc formatted docstring.
2112
22- (* drop_first_and_last [1; 2; 3; 4] = Some (1, Some ([2; 3], 4)). *)
23- let drop_first_and_last = function
24- | [] -> None
25- | first :: tl -> Some (first, drop_last tl)
13+ Code blocks are the only thing we're interested in. This function parses
14+ the given text and extracts the metadata and enough location information
15+ from the code blocks be able to String.sub them out of the original text.
2616
27- let slice lines ~(start : Odoc_parser.Loc.point ) ~(end_ : Odoc_parser.Loc.point )
28- =
29- let lines_to_include =
30- Util.Array. slice lines ~from: (start.line - 1 ) ~to_: (end_.line - 1 )
31- |> Array. to_list
32- in
33- match drop_first_and_last lines_to_include with
34- | None -> " "
35- | Some (line , None) ->
36- String. sub line start.column (end_.column - start.column)
37- (* Imagine we were slicing the file from (Line 2, Column 3) to (Line 6, Column 7):
17+ [location] is the location of this docstring within the original file
18+ (ie, the location of the contents of the documentation comment). This is
19+ required so we can splice out the code blocks from the original file.
3820
39- 0123456789
40- 1 ----------
41- 2 ---[---
42- 3 ---------
43- 4 --
44- 5 ----------
45- 6 -------]--
46- 7 ----------
47- 8 ----------
21+ The results are prepended in reverse order onto [acc]. *)
22+ let extract_code_block_info acc ~(location : Lexing.position ) ~docstring =
23+ let module O = Odoc_parser in
24+ let parsed = O. parse_comment ~location ~text: docstring in
4825
49- The case below handles this multiline case, concatenating the included substrings
50- from lines 2-6 ([lines_to_include]). *)
51- | Some (first_line , Some (stripped , last_line )) ->
52- let first_line =
53- String. sub first_line start.column
54- (String. length first_line - start.column)
26+ (* If odoc-parser produced any warnings, we raise them as errors here *)
27+ List. iter
28+ (fun error -> failwith (O.Warning. to_string error))
29+ (O. warnings parsed);
30+
31+ (* Extract the useful info from what odoc has given us.
32+
33+ Note, we don't use the contents of the code block that odoc has handed us
34+ as that has been stripped and we need all the relevant whitespace.
35+ Fortunately the location info give us enough info to be able to extract
36+ the code from the original text, whitespace and all.
37+ *)
38+ let handle_code_block : O.Loc.span -> _ -> Code_block.t =
39+ let convert_loc (sp : O.Loc.span ) =
40+ Location.
41+ {
42+ loc_start = O. position_of_point parsed sp.start;
43+ loc_end = O. position_of_point parsed sp.end_;
44+ loc_ghost = false ;
45+ }
46+ in
47+ fun location (metadata , { O.Loc. location = span ; _ } ) ->
48+ let metadata =
49+ Option. map
50+ (fun (lang , labels ) ->
51+ let language_tag = O.Loc. value lang in
52+ let labels = Option. map O.Loc. value labels in
53+ Code_block. { language_tag; labels })
54+ metadata
5555 in
56- let last_line = String. sub last_line 0 end_.column in
57- String. concat " \n " ([ first_line ] @ stripped @ [ last_line ])
58-
59- let is_newline c = c = '\n'
60-
61- let find_nth_line s =
62- let max_index = String. length s - 1 in
63- let indexes_of_newlines =
64- s |> String. to_seqi
65- |> Seq. filter_map (fun (i , c ) ->
66- match is_newline c with true -> Some i | false -> None )
67- in
68- let indexes_of_line_starts =
69- indexes_of_newlines
70- |> Seq. filter_map (fun i ->
71- match i < max_index with true -> Some (i + 1 ) | false -> None )
56+ let content = convert_loc span in
57+ let code_block = convert_loc location in
58+ { metadata; content; code_block }
7259 in
73- (* first line always starts at index zero, even if there is no preceeding newline *)
74- let indexes = 0 :: List. of_seq indexes_of_line_starts in
75- fun nth ->
76- (* index starts at zero but lines go from 1 *)
77- List. nth_opt indexes (nth - 1 )
78-
79- let point_to_index offset_of_line_start (point : Odoc_parser.Loc.point ) =
80- let offset = offset_of_line_start point.line + point.column in
81- (* on line 1 odoc-parser adjusts by the start of the docstring, undo *)
82- match point.line with 1 -> offset - docstring_start_adjustment | _ -> offset
83-
84- let initial_line_number = 1
8560
86- let dislocate_point ~( location : Lexing.position )
87- ( point : Odoc_parser.Loc.point ) =
88- { point with line = point.line - location.pos_lnum + initial_line_number }
89-
90- let slice_location ~( location : Lexing.position ) offset_of_line_start
91- ( span : Odoc_parser.Loc.span ) s =
92- let start = dislocate_point ~location span.start in
93- let end_ = dislocate_point ~location span.end_ in
94- let start_index = point_to_index offset_of_line_start start in
95- let end_index = point_to_index offset_of_line_start end_ in
96- let len = end_index - start_index in
97- String. sub s start_index len
98-
99- let extract_code_blocks ~( location : Lexing.position ) ~ docstring =
100- let offset_in_string = find_nth_line docstring in
101- let offset_of_line_start nth =
102- match offset_in_string nth with
103- | None -> Fmt. failwith " Attempting to reach invalid line "
104- | Some offset -> offset
61+ (* Fold over the results from odoc-parser, recurse where necessary
62+ and extract the code block metadata *)
63+ let rec fold_fn acc ( elt : O.Ast.block_element O.Loc.with_location ) =
64+ match elt with
65+ | { O.Loc. value = `Code_block c ; location } ->
66+ handle_code_block location c :: acc
67+ | { O.Loc. value = `List ( _ , _ , lists ); _ } ->
68+ List. fold_left ( List. fold_left fold_fn) acc (lists :> O.Ast.t list )
69+ | { O.Loc. value = `Tag tag ; _ } -> (
70+ match tag with
71+ | `Deprecated blocks
72+ | `Param (_, blocks)
73+ | `Raise (_, blocks)
74+ | `Return blocks
75+ | `See (_, _, blocks)
76+ | `Before ( _ , blocks ) ->
77+ List. fold_left fold_fn acc (blocks :> O.Ast.t )
78+ | _ -> acc)
79+ | _ -> acc
10580 in
106- let rec acc blocks =
107- List. map
108- (fun block ->
109- match Odoc_parser.Loc. value block with
110- | `Code_block (metadata, { Odoc_parser.Loc. value = _; location = span })
111- ->
112- let metadata =
113- Option. map
114- (fun (language_tag , labels ) ->
115- Code_block. { language_tag; labels })
116- metadata
117- in
118- let contents =
119- slice_location ~location offset_of_line_start span docstring
120- in
121- [ { Code_block. location = block.location; metadata; contents } ]
122- | `List (_ , _ , lists ) -> List. map acc lists |> List. concat
123- | _ -> [] )
124- blocks
125- |> List. concat
126- in
127- let parsed = Odoc_parser. parse_comment ~location ~text: docstring in
128- List. iter
129- (fun error -> failwith (Odoc_parser.Warning. to_string error))
130- (Odoc_parser. warnings parsed);
131- List. map
132- (fun element ->
133- match element with
134- | { Odoc_parser.Loc. value = #Odoc_parser.Ast. nestable_block_element; _ }
135- as e ->
136- acc [ e ]
137- | { value = `Tag tag ; _ } -> (
138- match tag with
139- | `Deprecated blocks -> acc blocks
140- | `Param (_ , blocks ) -> acc blocks
141- | `Raise (_ , blocks ) -> acc blocks
142- | `Return blocks -> acc blocks
143- | `See (_ , _ , blocks ) -> acc blocks
144- | `Before (_ , blocks ) -> acc blocks
145- | _ -> [] )
146- | { value = `Heading _ ; _ } -> [] )
147- (Odoc_parser. ast parsed)
148- |> List. concat
149-
150- let docstrings lexbuf =
151- let rec loop list =
152- match Lexer. token_with_comments lexbuf with
153- | Parser. EOF -> list
154- | Parser. DOCSTRING docstring ->
155- let docstring =
156- ( Docstrings. docstring_body docstring,
157- Docstrings. docstring_loc docstring )
158- in
159- loop (docstring :: list )
160- | _ -> loop list
161- in
162- loop [] |> List. rev
163-
164- let convert_pos (p : Lexing.position ) (pt : Odoc_parser.Loc.point ) =
165- { p with pos_lnum = pt.line; pos_cnum = pt.column }
16681
167- let convert_loc (loc : Location.t ) (sp : Odoc_parser.Loc.span ) =
168- let loc_start = convert_pos loc.loc_start sp.start in
169- let loc_end = convert_pos loc.loc_end sp.end_ in
170- { loc with loc_start; loc_end }
82+ List. fold_left fold_fn acc (O. ast parsed)
17183
84+ (* This function handles string containing ocaml code. It parses it as ocaml
85+ via compiler-libs, then for each odoc-formatted comment it then parses
86+ that via odoc-parser. The end result is a list of metadata about the code
87+ blocks within the comments. The result is given as an in-order list of
88+ [Code_block.t] values. *)
17289let docstring_code_blocks str =
173- Lexer. handle_docstrings := true ;
174- Lexer. init () ;
175- List. map
176- (fun (docstring , (cmt_loc : Location.t )) ->
177- let location =
178- {
179- cmt_loc.loc_start with
180- pos_cnum = cmt_loc.loc_start.pos_cnum + docstring_start_adjustment;
181- }
90+ let initial_handle_docstrings = ! Lexer. handle_docstrings in
91+ Fun. protect
92+ ~finally: (fun () -> Lexer. handle_docstrings := initial_handle_docstrings)
93+ (fun () ->
94+ Lexer. handle_docstrings := true ;
95+ Lexer. init () ;
96+ let lexbuf = Lexing. from_string str in
97+ let rec loop list =
98+ match Lexer. token_with_comments lexbuf with
99+ | Parser. EOF -> list
100+ | Parser. DOCSTRING docstring ->
101+ let body = Docstrings. docstring_body docstring in
102+ let loc = Docstrings. docstring_loc docstring in
103+
104+ (* odoc-parser adjusts for the initial [** *)
105+ let adjustment = 3 (* String.length "(* *" *) in
106+
107+ let location =
108+ {
109+ loc.loc_start with
110+ pos_cnum = loc.loc_start.pos_cnum + adjustment;
111+ }
112+ in
113+ loop (extract_code_block_info list ~location ~docstring:body)
114+ | _ -> loop list
182115 in
183- let blocks = extract_code_blocks ~location ~docstring in
184- List. map
185- (fun (b : Code_block.t ) -> (b, convert_loc cmt_loc b.location))
186- blocks)
187- (docstrings (Lexing. from_string str))
188- |> List. concat
116+ loop [] |> List.rev)
189117
190- let make_block ~loc code_block =
118+ (* Given code block metadata and the original file, this function splices the
119+ contents of the code block from the original text and creates an Mdx
120+ Block.t, or reports the error (e.g., from invalid tags) *)
121+ let make_block code_block file_contents =
191122 let handle_header = function
192123 | Some Code_block.{ language_tag; labels } ->
193124 let open Util.Result.Infix in
194- let language_tag = Odoc_parser.Loc. value language_tag in
195125 let header = Block.Header.of_string language_tag in
196126 let* labels =
197127 match labels with
198128 | None -> Ok []
199129 | Some labels -> (
200- let labels = Odoc_parser.Loc. value labels |> String. trim in
201- match Label. of_string labels with
130+ match Label.of_string (String.trim labels) with
202131 | Ok labels -> Ok labels
203- | Error msgs -> Error ( List. hd msgs)
204- (* TODO: Report precise location *) )
132+ | Error msgs ->
133+ Error (List.hd msgs) (* TODO: Report precise location *) )
205134 in
206135 let language_label = Label.Language_tag language_tag in
207136 Ok (header, language_label :: labels)
@@ -212,47 +141,43 @@ let make_block ~loc code_block =
212141 match handle_header code_block.Code_block.metadata with
213142 | Error _ as e -> e
214143 | Ok (header, labels) ->
215- let contents = String. split_on_char '\n' code_block.contents in
216- Block. mk ~loc ~section: None ~labels ~header ~contents ~legacy_labels: false
217- ~errors: []
144+ let slice (loc : Location.t) =
145+ let start = loc.loc_start.pos_cnum in
146+ let len = loc.loc_end.pos_cnum - start in
147+ String.sub file_contents start len
148+ in
149+ let contents = slice code_block.content |> String.split_on_char '\n' in
150+ Block.mk ~loc:code_block.code_block ~section:None ~labels ~header
151+ ~contents ~legacy_labels:false ~errors:[]
218152
219153let parse_mli file_contents =
220154 (* Find the locations of the code blocks within [file_contents], then slice it up into
221155 [Text] and [Block] parts by using the starts and ends of those blocks as
222156 boundaries. *)
223157 let code_blocks = docstring_code_blocks file_contents in
224- let cursor = { Odoc_parser.Loc. line = 1 ; column = 0 } in
225- let lines = String. split_on_char '\n' file_contents |> Array. of_list in
226158 let cursor, tokens =
227159 List.fold_left
228- (fun (cursor , code_blocks ) (( code_block : Code_block.t ), loc ) ->
160+ (fun (cursor, code_blocks) (code_block : Code_block.t) ->
229161 let pre_text =
230162 Document.Text
231- (slice lines ~start: cursor ~end_: code_block.location.start)
163+ (String.sub file_contents cursor
164+ (code_block.code_block.loc_start.pos_cnum - cursor))
232165 in
233166 let block =
234- match make_block ~loc code_block with
167+ match make_block code_block file_contents with
235168 | Ok block -> Document.Block block
236169 | Error (`Msg msg) -> Fmt.failwith "Error creating block: %s" msg
237170 in
238- let cursor = code_block.location.end_ in
239171 (* append them in reverse order, since this is a fold_left *)
240172 let code_blocks = block :: pre_text :: code_blocks in
241- (cursor , code_blocks))
242- (cursor , [] ) code_blocks
173+ (code_block.code_block.loc_end.pos_cnum , code_blocks))
174+ (0 , []) code_blocks
243175 in
244176 let tokens = List.rev tokens in
245- let eof =
246- {
247- Odoc_parser.Loc. line = Array. length lines;
248- column = String. length lines.(Array. length lines - 1 );
249- }
250- in
251- let eof_is_beyond_location (loc : Odoc_parser.Loc.point ) =
252- eof.line > loc.line || (eof.line = loc.line && eof.column > loc.column)
253- in
254- if eof_is_beyond_location cursor then
255- let remainder = slice lines ~start: cursor ~end_: eof in
177+ if cursor < String.length file_contents then
178+ let remainder =
179+ String.sub file_contents cursor (String.length file_contents - cursor)
180+ in
256181 if not (String.equal remainder "") then tokens @ [ Text remainder ]
257182 else tokens
258183 else tokens
0 commit comments