Skip to content

Commit 2dffff5

Browse files
Merge pull request #422 from jonludlam/simplify-mli-parser
Simplify mli_parser
2 parents ea30f77 + 4c5ab4c commit 2dffff5

File tree

1 file changed

+122
-197
lines changed

1 file changed

+122
-197
lines changed

lib/mli_parser.ml

Lines changed: 122 additions & 197 deletions
Original file line numberDiff line numberDiff line change
@@ -1,207 +1,136 @@
11
module Code_block = struct
2-
type metadata = {
3-
language_tag : string Odoc_parser.Loc.with_location;
4-
labels : string Odoc_parser.Loc.with_location option;
5-
}
2+
type metadata = { language_tag : string; labels : string option }
63

74
type t = {
8-
location : Odoc_parser.Loc.span;
95
metadata : metadata option;
10-
contents : string;
6+
content : Location.t; (* Location of the content *)
7+
code_block : Location.t; (* Location of the enclosing code block *)
118
}
129
end
1310

14-
(* odoc-parser adjusts for the initial [** *)
15-
let docstring_start_adjustment = String.length "(**"
16-
17-
let drop_last lst =
18-
match List.rev lst with
19-
| [] -> None
20-
| last :: rev_tl -> Some (List.rev rev_tl, last)
11+
(* Parse and extract code block metadata from an odoc formatted docstring.
2112
22-
(* drop_first_and_last [1; 2; 3; 4] = Some (1, Some ([2; 3], 4)). *)
23-
let drop_first_and_last = function
24-
| [] -> None
25-
| first :: tl -> Some (first, drop_last tl)
13+
Code blocks are the only thing we're interested in. This function parses
14+
the given text and extracts the metadata and enough location information
15+
from the code blocks be able to String.sub them out of the original text.
2616
27-
let slice lines ~(start : Odoc_parser.Loc.point) ~(end_ : Odoc_parser.Loc.point)
28-
=
29-
let lines_to_include =
30-
Util.Array.slice lines ~from:(start.line - 1) ~to_:(end_.line - 1)
31-
|> Array.to_list
32-
in
33-
match drop_first_and_last lines_to_include with
34-
| None -> ""
35-
| Some (line, None) ->
36-
String.sub line start.column (end_.column - start.column)
37-
(* Imagine we were slicing the file from (Line 2, Column 3) to (Line 6, Column 7):
17+
[location] is the location of this docstring within the original file
18+
(ie, the location of the contents of the documentation comment). This is
19+
required so we can splice out the code blocks from the original file.
3820
39-
0123456789
40-
1 ----------
41-
2 ---[---
42-
3 ---------
43-
4 --
44-
5 ----------
45-
6 -------]--
46-
7 ----------
47-
8 ----------
21+
The results are prepended in reverse order onto [acc]. *)
22+
let extract_code_block_info acc ~(location : Lexing.position) ~docstring =
23+
let module O = Odoc_parser in
24+
let parsed = O.parse_comment ~location ~text:docstring in
4825

49-
The case below handles this multiline case, concatenating the included substrings
50-
from lines 2-6 ([lines_to_include]). *)
51-
| Some (first_line, Some (stripped, last_line)) ->
52-
let first_line =
53-
String.sub first_line start.column
54-
(String.length first_line - start.column)
26+
(* If odoc-parser produced any warnings, we raise them as errors here *)
27+
List.iter
28+
(fun error -> failwith (O.Warning.to_string error))
29+
(O.warnings parsed);
30+
31+
(* Extract the useful info from what odoc has given us.
32+
33+
Note, we don't use the contents of the code block that odoc has handed us
34+
as that has been stripped and we need all the relevant whitespace.
35+
Fortunately the location info give us enough info to be able to extract
36+
the code from the original text, whitespace and all.
37+
*)
38+
let handle_code_block : O.Loc.span -> _ -> Code_block.t =
39+
let convert_loc (sp : O.Loc.span) =
40+
Location.
41+
{
42+
loc_start = O.position_of_point parsed sp.start;
43+
loc_end = O.position_of_point parsed sp.end_;
44+
loc_ghost = false;
45+
}
46+
in
47+
fun location (metadata, { O.Loc.location = span; _ }) ->
48+
let metadata =
49+
Option.map
50+
(fun (lang, labels) ->
51+
let language_tag = O.Loc.value lang in
52+
let labels = Option.map O.Loc.value labels in
53+
Code_block.{ language_tag; labels })
54+
metadata
5555
in
56-
let last_line = String.sub last_line 0 end_.column in
57-
String.concat "\n" ([ first_line ] @ stripped @ [ last_line ])
58-
59-
let is_newline c = c = '\n'
60-
61-
let find_nth_line s =
62-
let max_index = String.length s - 1 in
63-
let indexes_of_newlines =
64-
s |> String.to_seqi
65-
|> Seq.filter_map (fun (i, c) ->
66-
match is_newline c with true -> Some i | false -> None)
67-
in
68-
let indexes_of_line_starts =
69-
indexes_of_newlines
70-
|> Seq.filter_map (fun i ->
71-
match i < max_index with true -> Some (i + 1) | false -> None)
56+
let content = convert_loc span in
57+
let code_block = convert_loc location in
58+
{ metadata; content; code_block }
7259
in
73-
(* first line always starts at index zero, even if there is no preceeding newline *)
74-
let indexes = 0 :: List.of_seq indexes_of_line_starts in
75-
fun nth ->
76-
(* index starts at zero but lines go from 1 *)
77-
List.nth_opt indexes (nth - 1)
78-
79-
let point_to_index offset_of_line_start (point : Odoc_parser.Loc.point) =
80-
let offset = offset_of_line_start point.line + point.column in
81-
(* on line 1 odoc-parser adjusts by the start of the docstring, undo *)
82-
match point.line with 1 -> offset - docstring_start_adjustment | _ -> offset
83-
84-
let initial_line_number = 1
8560

86-
let dislocate_point ~(location : Lexing.position)
87-
(point : Odoc_parser.Loc.point) =
88-
{ point with line = point.line - location.pos_lnum + initial_line_number }
89-
90-
let slice_location ~(location : Lexing.position) offset_of_line_start
91-
(span : Odoc_parser.Loc.span) s =
92-
let start = dislocate_point ~location span.start in
93-
let end_ = dislocate_point ~location span.end_ in
94-
let start_index = point_to_index offset_of_line_start start in
95-
let end_index = point_to_index offset_of_line_start end_ in
96-
let len = end_index - start_index in
97-
String.sub s start_index len
98-
99-
let extract_code_blocks ~(location : Lexing.position) ~docstring =
100-
let offset_in_string = find_nth_line docstring in
101-
let offset_of_line_start nth =
102-
match offset_in_string nth with
103-
| None -> Fmt.failwith "Attempting to reach invalid line"
104-
| Some offset -> offset
61+
(* Fold over the results from odoc-parser, recurse where necessary
62+
and extract the code block metadata *)
63+
let rec fold_fn acc (elt : O.Ast.block_element O.Loc.with_location) =
64+
match elt with
65+
| { O.Loc.value = `Code_block c; location } ->
66+
handle_code_block location c :: acc
67+
| { O.Loc.value = `List (_, _, lists); _ } ->
68+
List.fold_left (List.fold_left fold_fn) acc (lists :> O.Ast.t list)
69+
| { O.Loc.value = `Tag tag; _ } -> (
70+
match tag with
71+
| `Deprecated blocks
72+
| `Param (_, blocks)
73+
| `Raise (_, blocks)
74+
| `Return blocks
75+
| `See (_, _, blocks)
76+
| `Before (_, blocks) ->
77+
List.fold_left fold_fn acc (blocks :> O.Ast.t)
78+
| _ -> acc)
79+
| _ -> acc
10580
in
106-
let rec acc blocks =
107-
List.map
108-
(fun block ->
109-
match Odoc_parser.Loc.value block with
110-
| `Code_block (metadata, { Odoc_parser.Loc.value = _; location = span })
111-
->
112-
let metadata =
113-
Option.map
114-
(fun (language_tag, labels) ->
115-
Code_block.{ language_tag; labels })
116-
metadata
117-
in
118-
let contents =
119-
slice_location ~location offset_of_line_start span docstring
120-
in
121-
[ { Code_block.location = block.location; metadata; contents } ]
122-
| `List (_, _, lists) -> List.map acc lists |> List.concat
123-
| _ -> [])
124-
blocks
125-
|> List.concat
126-
in
127-
let parsed = Odoc_parser.parse_comment ~location ~text:docstring in
128-
List.iter
129-
(fun error -> failwith (Odoc_parser.Warning.to_string error))
130-
(Odoc_parser.warnings parsed);
131-
List.map
132-
(fun element ->
133-
match element with
134-
| { Odoc_parser.Loc.value = #Odoc_parser.Ast.nestable_block_element; _ }
135-
as e ->
136-
acc [ e ]
137-
| { value = `Tag tag; _ } -> (
138-
match tag with
139-
| `Deprecated blocks -> acc blocks
140-
| `Param (_, blocks) -> acc blocks
141-
| `Raise (_, blocks) -> acc blocks
142-
| `Return blocks -> acc blocks
143-
| `See (_, _, blocks) -> acc blocks
144-
| `Before (_, blocks) -> acc blocks
145-
| _ -> [])
146-
| { value = `Heading _; _ } -> [])
147-
(Odoc_parser.ast parsed)
148-
|> List.concat
149-
150-
let docstrings lexbuf =
151-
let rec loop list =
152-
match Lexer.token_with_comments lexbuf with
153-
| Parser.EOF -> list
154-
| Parser.DOCSTRING docstring ->
155-
let docstring =
156-
( Docstrings.docstring_body docstring,
157-
Docstrings.docstring_loc docstring )
158-
in
159-
loop (docstring :: list)
160-
| _ -> loop list
161-
in
162-
loop [] |> List.rev
163-
164-
let convert_pos (p : Lexing.position) (pt : Odoc_parser.Loc.point) =
165-
{ p with pos_lnum = pt.line; pos_cnum = pt.column }
16681

167-
let convert_loc (loc : Location.t) (sp : Odoc_parser.Loc.span) =
168-
let loc_start = convert_pos loc.loc_start sp.start in
169-
let loc_end = convert_pos loc.loc_end sp.end_ in
170-
{ loc with loc_start; loc_end }
82+
List.fold_left fold_fn acc (O.ast parsed)
17183

84+
(* This function handles string containing ocaml code. It parses it as ocaml
85+
via compiler-libs, then for each odoc-formatted comment it then parses
86+
that via odoc-parser. The end result is a list of metadata about the code
87+
blocks within the comments. The result is given as an in-order list of
88+
[Code_block.t] values. *)
17289
let docstring_code_blocks str =
173-
Lexer.handle_docstrings := true;
174-
Lexer.init ();
175-
List.map
176-
(fun (docstring, (cmt_loc : Location.t)) ->
177-
let location =
178-
{
179-
cmt_loc.loc_start with
180-
pos_cnum = cmt_loc.loc_start.pos_cnum + docstring_start_adjustment;
181-
}
90+
let initial_handle_docstrings = !Lexer.handle_docstrings in
91+
Fun.protect
92+
~finally:(fun () -> Lexer.handle_docstrings := initial_handle_docstrings)
93+
(fun () ->
94+
Lexer.handle_docstrings := true;
95+
Lexer.init ();
96+
let lexbuf = Lexing.from_string str in
97+
let rec loop list =
98+
match Lexer.token_with_comments lexbuf with
99+
| Parser.EOF -> list
100+
| Parser.DOCSTRING docstring ->
101+
let body = Docstrings.docstring_body docstring in
102+
let loc = Docstrings.docstring_loc docstring in
103+
104+
(* odoc-parser adjusts for the initial [** *)
105+
let adjustment = 3 (* String.length "(**" *) in
106+
107+
let location =
108+
{
109+
loc.loc_start with
110+
pos_cnum = loc.loc_start.pos_cnum + adjustment;
111+
}
112+
in
113+
loop (extract_code_block_info list ~location ~docstring:body)
114+
| _ -> loop list
182115
in
183-
let blocks = extract_code_blocks ~location ~docstring in
184-
List.map
185-
(fun (b : Code_block.t) -> (b, convert_loc cmt_loc b.location))
186-
blocks)
187-
(docstrings (Lexing.from_string str))
188-
|> List.concat
116+
loop [] |> List.rev)
189117
190-
let make_block ~loc code_block =
118+
(* Given code block metadata and the original file, this function splices the
119+
contents of the code block from the original text and creates an Mdx
120+
Block.t, or reports the error (e.g., from invalid tags) *)
121+
let make_block code_block file_contents =
191122
let handle_header = function
192123
| Some Code_block.{ language_tag; labels } ->
193124
let open Util.Result.Infix in
194-
let language_tag = Odoc_parser.Loc.value language_tag in
195125
let header = Block.Header.of_string language_tag in
196126
let* labels =
197127
match labels with
198128
| None -> Ok []
199129
| Some labels -> (
200-
let labels = Odoc_parser.Loc.value labels |> String.trim in
201-
match Label.of_string labels with
130+
match Label.of_string (String.trim labels) with
202131
| Ok labels -> Ok labels
203-
| Error msgs -> Error (List.hd msgs)
204-
(* TODO: Report precise location *))
132+
| Error msgs ->
133+
Error (List.hd msgs) (* TODO: Report precise location *))
205134
in
206135
let language_label = Label.Language_tag language_tag in
207136
Ok (header, language_label :: labels)
@@ -212,47 +141,43 @@ let make_block ~loc code_block =
212141
match handle_header code_block.Code_block.metadata with
213142
| Error _ as e -> e
214143
| Ok (header, labels) ->
215-
let contents = String.split_on_char '\n' code_block.contents in
216-
Block.mk ~loc ~section:None ~labels ~header ~contents ~legacy_labels:false
217-
~errors:[]
144+
let slice (loc : Location.t) =
145+
let start = loc.loc_start.pos_cnum in
146+
let len = loc.loc_end.pos_cnum - start in
147+
String.sub file_contents start len
148+
in
149+
let contents = slice code_block.content |> String.split_on_char '\n' in
150+
Block.mk ~loc:code_block.code_block ~section:None ~labels ~header
151+
~contents ~legacy_labels:false ~errors:[]
218152
219153
let parse_mli file_contents =
220154
(* Find the locations of the code blocks within [file_contents], then slice it up into
221155
[Text] and [Block] parts by using the starts and ends of those blocks as
222156
boundaries. *)
223157
let code_blocks = docstring_code_blocks file_contents in
224-
let cursor = { Odoc_parser.Loc.line = 1; column = 0 } in
225-
let lines = String.split_on_char '\n' file_contents |> Array.of_list in
226158
let cursor, tokens =
227159
List.fold_left
228-
(fun (cursor, code_blocks) ((code_block : Code_block.t), loc) ->
160+
(fun (cursor, code_blocks) (code_block : Code_block.t) ->
229161
let pre_text =
230162
Document.Text
231-
(slice lines ~start:cursor ~end_:code_block.location.start)
163+
(String.sub file_contents cursor
164+
(code_block.code_block.loc_start.pos_cnum - cursor))
232165
in
233166
let block =
234-
match make_block ~loc code_block with
167+
match make_block code_block file_contents with
235168
| Ok block -> Document.Block block
236169
| Error (`Msg msg) -> Fmt.failwith "Error creating block: %s" msg
237170
in
238-
let cursor = code_block.location.end_ in
239171
(* append them in reverse order, since this is a fold_left *)
240172
let code_blocks = block :: pre_text :: code_blocks in
241-
(cursor, code_blocks))
242-
(cursor, []) code_blocks
173+
(code_block.code_block.loc_end.pos_cnum, code_blocks))
174+
(0, []) code_blocks
243175
in
244176
let tokens = List.rev tokens in
245-
let eof =
246-
{
247-
Odoc_parser.Loc.line = Array.length lines;
248-
column = String.length lines.(Array.length lines - 1);
249-
}
250-
in
251-
let eof_is_beyond_location (loc : Odoc_parser.Loc.point) =
252-
eof.line > loc.line || (eof.line = loc.line && eof.column > loc.column)
253-
in
254-
if eof_is_beyond_location cursor then
255-
let remainder = slice lines ~start:cursor ~end_:eof in
177+
if cursor < String.length file_contents then
178+
let remainder =
179+
String.sub file_contents cursor (String.length file_contents - cursor)
180+
in
256181
if not (String.equal remainder "") then tokens @ [ Text remainder ]
257182
else tokens
258183
else tokens

0 commit comments

Comments
 (0)