forked from borglab/wrap
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxml_parser.py
More file actions
304 lines (254 loc) · 13.9 KB
/
xml_parser.py
File metadata and controls
304 lines (254 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import os
import sys
from pathlib import Path
import xml.etree.ElementTree as ET
class XMLDocParser:
"""
Parses and extracts docs from Doxygen-generated XML.
"""
def __init__(self):
# Memory for overloaded functions with identical parameter name sets
self._memory = {}
# This is useful for investigating functions that cause problems for extract_docstring.
# Set this to true to have useful information for debugging this class, as in the CLI
# function at the bottom of this class.
self._verbose = False
def parse_xml(self, xml_file: str):
"""
Get the ElementTree of an XML file given the file name.
If an error occurs, prints a warning and returns None.
"""
try:
return ET.parse(xml_file)
except FileNotFoundError:
print(f"Warning: XML file '{xml_file}' not found.")
return None
except ET.ParseError:
print(f"Warning: Failed to parse XML file '{xml_file}'.")
return None
def extract_docstring(self, xml_folder: str, cpp_class: str,
cpp_method: str, method_args_names: 'list[str]'):
"""
Extract the docstrings for a C++ class's method from the Doxygen-generated XML.
Args:
xml_folder (str): The path to the folder that contains all of the Doxygen-generated XML.
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
method_args_names (list): A list of the names of the cpp_method's parameters.
"""
self.print_if_verbose(f"Extracting docs for {cpp_class}.{cpp_method}")
# Get all of the member definitions in cpp_class with name cpp_method
maybe_member_defs = self.get_member_defs(xml_folder, cpp_class,
cpp_method)
# Filter member definitions which don't match the given argument names
member_defs, ignored_params = self.filter_member_defs(
maybe_member_defs, method_args_names)
# Find which member to get docs from, if there are multiple that match in name and args
documenting_index = self.determine_documenting_index(
cpp_class, cpp_method, method_args_names, member_defs)
# Extract the docs for the function that matches cpp_class.cpp_method(*method_args_names).
return self.get_formatted_docstring(member_defs[documenting_index],
ignored_params) if member_defs else ""
def get_member_defs(self, xml_folder: str, cpp_class: str,
cpp_method: str):
"""Get all of the member definitions in cpp_class with name cpp_method.
Args:
xml_folder (str): The folder containing the Doxygen XML documentation.
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
Returns:
list: All of the member definitions in cpp_class with name cpp_method.
"""
xml_folder_path = Path(xml_folder)
# Create the path to the Doxygen XML index file.
xml_index_file = xml_folder_path / "index.xml"
# Parse the index file
index_tree = self.parse_xml(xml_index_file)
if not index_tree:
self.print_if_verbose(f"Index file {xml_index_file} was empty.")
return ""
index_root = index_tree.getroot()
# Find the compound with name == cpp_class
class_index = index_root.find(f"./*[name='{cpp_class}']")
if class_index is None:
self.print_if_verbose(
f"Could not extract docs for {cpp_class}.{cpp_method}; class not found in index file."
)
return ""
# Create the path to the file with the documentation for cpp_class.
xml_class_file = xml_folder_path / f"{class_index.attrib['refid']}.xml"
# Parse the class file
class_tree = self.parse_xml(xml_class_file)
if not class_tree:
self.print_if_verbose(f"Class file {xml_class_file} was empty.")
return ""
class_root = class_tree.getroot()
# Find the member(s) in cpp_class with name == cpp_method
maybe_member_defs = class_root.findall(
f"compounddef/sectiondef//*[name='{cpp_method}']")
return maybe_member_defs
def filter_member_defs(self, maybe_member_defs: list,
method_args_names: list):
"""
Remove member definitions which do not match the supplied argument names list.
Args:
maybe_member_defs (list): The list of all member definitions in the class which share the same name.
method_args_names (list): The list of argument names in the definition of the function whose documentation is desired.
Supplying the argument names allows for the filtering of overloaded functions with the same name but different arguments.
Returns:
tuple[list, list]: (the filtered member definitions, parameters which should be ignored because they are optional)
"""
member_defs = []
# Optional parameters we should ignore if we encounter them in the docstring
ignored_params = []
# Filter out the members which don't match the method_args_names
for maybe_member_def in maybe_member_defs:
self.print_if_verbose(
f"Investigating member_def with argstring {maybe_member_def.find('argsstring').text}"
)
# Find the number of required parameters and the number of total parameters from the
# Doxygen XML for this member_def
params = maybe_member_def.findall("param")
num_tot_params = len(params)
# Calculate required params by subtracting the number of optional params (params where defval is
# set--defval means default value) from the number of total params
num_req_params = num_tot_params - sum([
1 if param.find("defval") is not None else 0
for param in params
])
# If the number of parameters in method_args_names matches neither number, eliminate this member_def
# This is done because wrap generates a python wrapper function twice for every function with
# optional parameters: one with none of the optional parameters, and one with all of the optional
# parameters, required.
if len(method_args_names) != num_req_params and len(
method_args_names) != num_tot_params:
self.print_if_verbose(
f"Wrong number of parameters: got {len(method_args_names)}, expected required {num_req_params} or total {num_tot_params}."
)
continue
# If the parameter names don't match, eliminate this member_def
eliminate = False
for i, arg_name in enumerate(method_args_names):
# Try to find the name of the parameter in the XML
param_name = params[i].find(
"declname"
) # declname is the tag that usually contains the param name
# If we couldn't find the declname, try the defname (used uncommonly)
if param_name is None:
param_name = params[i].find("defname")
if param_name is None:
# Can't find the name for this parameter. This may be an unreachable statement but Doxygen is
# not well-documented enough to rely on a <declname> or a <defname> always being defined inside a <param>.
eliminate = True
continue
# Eliminate if any param name doesn't match the expected name
if arg_name != param_name.text:
eliminate = True
if eliminate:
self.print_if_verbose("Names didn't match.")
continue
# At this point, this member_def can be assumed to be the desired function (or is indistinguishable
# from it based on all of the reliable information we have--if this is the case, we need to rely on
# the _memory to give the correct docs for each.)
member_defs.append(maybe_member_def)
self.print_if_verbose("Confirmed as correct function.")
# Remember which parameters to ignore, if any
for i in range(len(method_args_names), num_tot_params):
ignored_params.append(params[i].find("declname").text)
return member_defs, ignored_params
def determine_documenting_index(self, cpp_class: str, cpp_method: str,
method_args_names: list,
member_defs: list):
"""
Determine which member definition to retrieve documentation from, if there are multiple.
Args:
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
method_args_names (list): A list of the names of the cpp_method's parameters.
member_defs (list): All of the member definitions of cpp_class which match cpp_method in name
and whose arguments have the same names as method_args_names.
Returns:
int: The index indicating which member definition to document.
"""
# If there are multiple member defs that match the method args names,
# remember how many we've encountered already so that we can return
# the docs for the first one we haven't yet extracted.
# This is only relevant if there are overloaded functions where the
# parameter types are different but the parameter names are the same,
# e.g. foo(int bar) and foo(string bar). The parameter types cannot be
# relied on because they cannot be assumed to be the same between GTSAM
# implementation and pybind11 generated wrapper, e.g. OptionalJacobian
# in GTSAM becomes Eigen::Matrix in the pybind11 code.
documenting_index = 0
if len(member_defs) > 1:
function_key = f"{cpp_class}.{cpp_method}({','.join(method_args_names) if method_args_names else ''})"
if function_key in self._memory:
self._memory[function_key] += 1
documenting_index = self._memory[function_key]
else:
self._memory[function_key] = 0
return documenting_index
def get_formatted_docstring(self,
member_def: 'xml.etree.ElementTree.Element',
ignored_params: list):
"""Gets the formatted docstring for the supplied XML element representing a member definition.
Args:
member_def (xml.etree.ElementTree.Element): The member definition to document.
ignored_params (list): The optional parameters which should be ignored, if any.
Returns:
str: The formatted docstring.
"""
docstring = ""
brief_description = member_def.find(".//briefdescription")
detailed_description = member_def.find(".//detaileddescription")
# Add the brief description first, if it exists.
if brief_description is not None:
for para in brief_description.findall("para"):
docstring += "".join(t for t in para.itertext() if t.strip())
# Add the detailed description. This includes the parameter list and the return value.
if detailed_description is not None:
docstring += "\n"
# Add non-parameter detailed description
for element in list(detailed_description):
if element.tag == "para" and "parameterlist" not in [
e.tag for e in element
]:
docstring += "".join(
t for t in element.itertext() if t.strip()) + " "
# Add parameter docs
parameter_list = detailed_description.find(".//parameterlist")
if parameter_list is not None:
for i, parameter_item in enumerate(
parameter_list.findall(".//parameteritem")):
name = parameter_item.find(".//parametername").text
desc = parameter_item.find(
".//parameterdescription/para").text
if name not in ignored_params:
docstring += f"{name.strip() if name else f'[Parameter {i}]'}: {desc.strip() if desc else 'No description provided'}\n"
# Add return value docs
return_sect = detailed_description.find(".//simplesect")
if return_sect is not None and return_sect.attrib[
"kind"] == "return" and return_sect.find(
"para").text is not None:
docstring += f"Returns: {return_sect.find('para').text.strip()}"
return docstring.strip()
def print_if_verbose(self, text: str):
"""
Print text if the parser is in verbose mode.
"""
if self._verbose:
print(text)
if __name__ == "__main__":
if len(sys.argv) != 5:
print(
"Usage: python xml_parser.py <doxygen_xml_folder> <cpp_class> <cpp_method> <method_args_names (comma-separated)>"
)
else:
parser = XMLDocParser()
parser._verbose = True
xml_file = sys.argv[1]
extracted_doc = parser.extract_docstring(xml_file, sys.argv[2],
sys.argv[3],
sys.argv[4].split(","))
print()
print(extracted_doc.strip())