Commit | Line | Data |
---|---|---|
f9e62be7 DW |
1 | #!/usr/bin/env python |
2 | """Doxygen XML to SWIG docstring converter. | |
3 | ||
4 | Usage: | |
5 | ||
6 | doxy2swig.py [options] input.xml output.i | |
7 | ||
8 | Converts Doxygen generated XML files into a file containing docstrings | |
9 | that can be used by SWIG-1.3.x. Note that you need to get SWIG | |
10 | version > 1.3.23 or use Robin Dunn's docstring patch to be able to use | |
11 | the resulting output. | |
12 | ||
13 | input.xml is your doxygen generated XML file and output.i is where the | |
14 | output will be written (the file will be clobbered). | |
15 | ||
16 | """ | |
1cbfde94 | 17 | # |
f9e62be7 DW |
18 | # |
19 | # This code is implemented using Mark Pilgrim's code as a guideline: | |
20 | # http://www.faqs.org/docs/diveintopython/kgp_divein.html | |
21 | # | |
22 | # Author: Prabhu Ramachandran | |
23 | # License: BSD style | |
24 | # | |
25 | # Thanks: | |
26 | # Johan Hake: the include_function_definition feature | |
27 | # Bill Spotz: bug reports and testing. | |
28 | # Sebastian Henschel: Misc. enhancements. | |
29 | # | |
1cbfde94 | 30 | # |
f9e62be7 DW |
31 | |
32 | from xml.dom import minidom | |
33 | import re | |
34 | import textwrap | |
35 | import sys | |
f9e62be7 DW |
36 | import os.path |
37 | import optparse | |
38 | ||
39 | ||
40 | def my_open_read(source): | |
41 | if hasattr(source, "read"): | |
42 | return source | |
43 | else: | |
44 | return open(source) | |
45 | ||
1cbfde94 | 46 | |
f9e62be7 DW |
47 | def my_open_write(dest): |
48 | if hasattr(dest, "write"): | |
49 | return dest | |
50 | else: | |
51 | return open(dest, 'w') | |
52 | ||
53 | ||
1cbfde94 MZ |
54 | class Doxy2SWIG: |
55 | ||
f9e62be7 DW |
56 | """Converts Doxygen generated XML files into a file containing |
57 | docstrings that can be used by SWIG-1.3.x that have support for | |
58 | feature("docstring"). Once the data is parsed it is stored in | |
59 | self.pieces. | |
60 | ||
1cbfde94 MZ |
61 | """ |
62 | ||
f9e62be7 DW |
63 | def __init__(self, src, include_function_definition=True, quiet=False): |
64 | """Initialize the instance given a source object. `src` can | |
65 | be a file or filename. If you do not want to include function | |
66 | definitions from doxygen then set | |
67 | `include_function_definition` to `False`. This is handy since | |
68 | this allows you to use the swig generated function definition | |
69 | using %feature("autodoc", [0,1]). | |
70 | ||
71 | """ | |
72 | f = my_open_read(src) | |
73 | self.my_dir = os.path.dirname(f.name) | |
74 | self.xmldoc = minidom.parse(f).documentElement | |
75 | f.close() | |
76 | ||
77 | self.pieces = [] | |
1cbfde94 | 78 | self.pieces.append('\n// File: %s\n' % |
f9e62be7 DW |
79 | os.path.basename(f.name)) |
80 | ||
81 | self.space_re = re.compile(r'\s+') | |
82 | self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') | |
83 | self.multi = 0 | |
84 | self.ignores = ['inheritancegraph', 'param', 'listofallmembers', | |
85 | 'innerclass', 'name', 'declname', 'incdepgraph', | |
86 | 'invincdepgraph', 'programlisting', 'type', | |
87 | 'references', 'referencedby', 'location', | |
88 | 'collaborationgraph', 'reimplements', | |
89 | 'reimplementedby', 'derivedcompoundref', | |
90 | 'basecompoundref'] | |
91 | #self.generics = [] | |
92 | self.include_function_definition = include_function_definition | |
93 | if not include_function_definition: | |
94 | self.ignores.append('argsstring') | |
95 | ||
96 | self.quiet = quiet | |
1cbfde94 | 97 | |
f9e62be7 DW |
98 | def generate(self): |
99 | """Parses the file set in the initialization. The resulting | |
100 | data is stored in `self.pieces`. | |
101 | ||
102 | """ | |
103 | self.parse(self.xmldoc) | |
1cbfde94 | 104 | |
f9e62be7 DW |
105 | def parse(self, node): |
106 | """Parse a given node. This function in turn calls the | |
107 | `parse_<nodeType>` functions which handle the respective | |
108 | nodes. | |
109 | ||
110 | """ | |
1cbfde94 | 111 | pm = getattr(self, "parse_%s" % node.__class__.__name__) |
f9e62be7 DW |
112 | pm(node) |
113 | ||
114 | def parse_Document(self, node): | |
115 | self.parse(node.documentElement) | |
116 | ||
117 | def parse_Text(self, node): | |
118 | txt = node.data | |
119 | txt = txt.replace('\\', r'\\\\') | |
120 | txt = txt.replace('"', r'\"') | |
121 | # ignore pure whitespace | |
122 | m = self.space_re.match(txt) | |
123 | if m and len(m.group()) == len(txt): | |
124 | pass | |
125 | else: | |
126 | self.add_text(textwrap.fill(txt, break_long_words=False)) | |
127 | ||
128 | def parse_Element(self, node): | |
129 | """Parse an `ELEMENT_NODE`. This calls specific | |
130 | `do_<tagName>` handers for different elements. If no handler | |
131 | is available the `generic_parse` method is called. All | |
132 | tagNames specified in `self.ignores` are simply ignored. | |
1cbfde94 | 133 | |
f9e62be7 DW |
134 | """ |
135 | name = node.tagName | |
136 | ignores = self.ignores | |
137 | if name in ignores: | |
138 | return | |
139 | attr = "do_%s" % name | |
140 | if hasattr(self, attr): | |
141 | handlerMethod = getattr(self, attr) | |
142 | handlerMethod(node) | |
143 | else: | |
144 | self.generic_parse(node) | |
145 | #if name not in self.generics: self.generics.append(name) | |
146 | ||
147 | def parse_Comment(self, node): | |
148 | """Parse a `COMMENT_NODE`. This does nothing for now.""" | |
149 | return | |
150 | ||
151 | def add_text(self, value): | |
152 | """Adds text corresponding to `value` into `self.pieces`.""" | |
74bea5a5 | 153 | if isinstance(value, (list, tuple)): |
f9e62be7 DW |
154 | self.pieces.extend(value) |
155 | else: | |
156 | self.pieces.append(value) | |
157 | ||
158 | def get_specific_nodes(self, node, names): | |
159 | """Given a node and a sequence of strings in `names`, return a | |
160 | dictionary containing the names as keys and child | |
161 | `ELEMENT_NODEs`, that have a `tagName` equal to the name. | |
162 | ||
163 | """ | |
1cbfde94 MZ |
164 | nodes = [(x.tagName, x) for x in node.childNodes |
165 | if x.nodeType == x.ELEMENT_NODE and | |
f9e62be7 DW |
166 | x.tagName in names] |
167 | return dict(nodes) | |
168 | ||
169 | def generic_parse(self, node, pad=0): | |
170 | """A Generic parser for arbitrary tags in a node. | |
171 | ||
172 | Parameters: | |
173 | ||
174 | - node: A node in the DOM. | |
175 | - pad: `int` (default: 0) | |
176 | ||
177 | If 0 the node data is not padded with newlines. If 1 it | |
178 | appends a newline after parsing the childNodes. If 2 it | |
179 | pads before and after the nodes are processed. Defaults to | |
180 | 0. | |
181 | ||
182 | """ | |
183 | npiece = 0 | |
184 | if pad: | |
185 | npiece = len(self.pieces) | |
186 | if pad == 2: | |
1cbfde94 | 187 | self.add_text('\n') |
f9e62be7 DW |
188 | for n in node.childNodes: |
189 | self.parse(n) | |
190 | if pad: | |
191 | if len(self.pieces) > npiece: | |
192 | self.add_text('\n') | |
193 | ||
194 | def space_parse(self, node): | |
195 | self.add_text(' ') | |
196 | self.generic_parse(node) | |
197 | ||
198 | do_ref = space_parse | |
199 | do_emphasis = space_parse | |
200 | do_bold = space_parse | |
201 | do_computeroutput = space_parse | |
202 | do_formula = space_parse | |
203 | ||
204 | def do_compoundname(self, node): | |
205 | self.add_text('\n\n') | |
206 | data = node.firstChild.data | |
1cbfde94 | 207 | self.add_text('%%feature("docstring") %s "\n' % data) |
f9e62be7 DW |
208 | |
209 | def do_compounddef(self, node): | |
210 | kind = node.attributes['kind'].value | |
211 | if kind in ('class', 'struct'): | |
212 | prot = node.attributes['prot'].value | |
74bea5a5 | 213 | if prot != 'public': |
f9e62be7 DW |
214 | return |
215 | names = ('compoundname', 'briefdescription', | |
216 | 'detaileddescription', 'includes') | |
217 | first = self.get_specific_nodes(node, names) | |
218 | for n in names: | |
219 | if first.has_key(n): | |
220 | self.parse(first[n]) | |
1cbfde94 | 221 | self.add_text(['";', '\n']) |
f9e62be7 DW |
222 | for n in node.childNodes: |
223 | if n not in first.values(): | |
224 | self.parse(n) | |
225 | elif kind in ('file', 'namespace'): | |
226 | nodes = node.getElementsByTagName('sectiondef') | |
227 | for n in nodes: | |
228 | self.parse(n) | |
229 | ||
230 | def do_includes(self, node): | |
231 | self.add_text('C++ includes: ') | |
232 | self.generic_parse(node, pad=1) | |
233 | ||
234 | def do_parameterlist(self, node): | |
1cbfde94 | 235 | text = 'unknown' |
f9e62be7 DW |
236 | for key, val in node.attributes.items(): |
237 | if key == 'kind': | |
1cbfde94 MZ |
238 | if val == 'param': |
239 | text = 'Parameters' | |
240 | elif val == 'exception': | |
241 | text = 'Exceptions' | |
242 | elif val == 'retval': | |
243 | text = 'Returns' | |
244 | else: | |
245 | text = val | |
f9e62be7 DW |
246 | break |
247 | self.add_text(['\n', '\n', text, ':', '\n']) | |
248 | self.generic_parse(node, pad=1) | |
249 | ||
250 | def do_para(self, node): | |
251 | self.add_text('\n') | |
252 | self.generic_parse(node, pad=1) | |
253 | ||
254 | def do_parametername(self, node): | |
255 | self.add_text('\n') | |
256 | try: | |
1cbfde94 MZ |
257 | data = node.firstChild.data |
258 | except AttributeError: # perhaps a <ref> tag in it | |
259 | data = node.firstChild.firstChild.data | |
f9e62be7 DW |
260 | if data.find('Exception') != -1: |
261 | self.add_text(data) | |
262 | else: | |
1cbfde94 | 263 | self.add_text("%s: " % data) |
f9e62be7 DW |
264 | |
265 | def do_parameterdefinition(self, node): | |
266 | self.generic_parse(node, pad=1) | |
267 | ||
268 | def do_detaileddescription(self, node): | |
269 | self.generic_parse(node, pad=1) | |
270 | ||
271 | def do_briefdescription(self, node): | |
272 | self.generic_parse(node, pad=1) | |
273 | ||
274 | def do_memberdef(self, node): | |
275 | prot = node.attributes['prot'].value | |
276 | id = node.attributes['id'].value | |
277 | kind = node.attributes['kind'].value | |
278 | tmp = node.parentNode.parentNode.parentNode | |
279 | compdef = tmp.getElementsByTagName('compounddef')[0] | |
280 | cdef_kind = compdef.attributes['kind'].value | |
1cbfde94 | 281 | |
f9e62be7 DW |
282 | if prot == 'public': |
283 | first = self.get_specific_nodes(node, ('definition', 'name')) | |
284 | name = first['name'].firstChild.data | |
1cbfde94 | 285 | if name[:8] == 'operator': # Don't handle operators yet. |
f9e62be7 DW |
286 | return |
287 | ||
74bea5a5 | 288 | if not 'definition' in first or \ |
f9e62be7 DW |
289 | kind in ['variable', 'typedef']: |
290 | return | |
291 | ||
292 | if self.include_function_definition: | |
293 | defn = first['definition'].firstChild.data | |
294 | else: | |
295 | defn = "" | |
296 | self.add_text('\n') | |
297 | self.add_text('%feature("docstring") ') | |
1cbfde94 | 298 | |
f9e62be7 DW |
299 | anc = node.parentNode.parentNode |
300 | if cdef_kind in ('file', 'namespace'): | |
301 | ns_node = anc.getElementsByTagName('innernamespace') | |
302 | if not ns_node and cdef_kind == 'namespace': | |
303 | ns_node = anc.getElementsByTagName('compoundname') | |
304 | if ns_node: | |
305 | ns = ns_node[0].firstChild.data | |
1cbfde94 | 306 | self.add_text(' %s::%s "\n%s' % (ns, name, defn)) |
f9e62be7 | 307 | else: |
1cbfde94 | 308 | self.add_text(' %s "\n%s' % (name, defn)) |
f9e62be7 DW |
309 | elif cdef_kind in ('class', 'struct'): |
310 | # Get the full function name. | |
311 | anc_node = anc.getElementsByTagName('compoundname') | |
312 | cname = anc_node[0].firstChild.data | |
1cbfde94 | 313 | self.add_text(' %s::%s "\n%s' % (cname, name, defn)) |
f9e62be7 DW |
314 | |
315 | for n in node.childNodes: | |
316 | if n not in first.values(): | |
317 | self.parse(n) | |
318 | self.add_text(['";', '\n']) | |
1cbfde94 | 319 | |
f9e62be7 DW |
320 | def do_definition(self, node): |
321 | data = node.firstChild.data | |
1cbfde94 | 322 | self.add_text('%s "\n%s' % (data, data)) |
f9e62be7 DW |
323 | |
324 | def do_sectiondef(self, node): | |
325 | kind = node.attributes['kind'].value | |
326 | if kind in ('public-func', 'func', 'user-defined', ''): | |
327 | self.generic_parse(node) | |
328 | ||
329 | def do_header(self, node): | |
330 | """For a user defined section def a header field is present | |
331 | which should not be printed as such, so we comment it in the | |
332 | output.""" | |
333 | data = node.firstChild.data | |
1cbfde94 | 334 | self.add_text('\n/*\n %s \n*/\n' % data) |
f9e62be7 DW |
335 | # If our immediate sibling is a 'description' node then we |
336 | # should comment that out also and remove it from the parent | |
337 | # node's children. | |
338 | parent = node.parentNode | |
339 | idx = parent.childNodes.index(node) | |
340 | if len(parent.childNodes) >= idx + 2: | |
1cbfde94 | 341 | nd = parent.childNodes[idx + 2] |
f9e62be7 DW |
342 | if nd.nodeName == 'description': |
343 | nd = parent.removeChild(nd) | |
344 | self.add_text('\n/*') | |
345 | self.generic_parse(nd) | |
346 | self.add_text('\n*/\n') | |
347 | ||
348 | def do_simplesect(self, node): | |
349 | kind = node.attributes['kind'].value | |
350 | if kind in ('date', 'rcs', 'version'): | |
351 | pass | |
352 | elif kind == 'warning': | |
353 | self.add_text(['\n', 'WARNING: ']) | |
354 | self.generic_parse(node) | |
355 | elif kind == 'see': | |
356 | self.add_text('\n') | |
357 | self.add_text('See: ') | |
358 | self.generic_parse(node) | |
359 | else: | |
360 | self.generic_parse(node) | |
361 | ||
362 | def do_argsstring(self, node): | |
363 | self.generic_parse(node, pad=1) | |
364 | ||
365 | def do_member(self, node): | |
366 | kind = node.attributes['kind'].value | |
367 | refid = node.attributes['refid'].value | |
368 | if kind == 'function' and refid[:9] == 'namespace': | |
369 | self.generic_parse(node) | |
370 | ||
371 | def do_doxygenindex(self, node): | |
372 | self.multi = 1 | |
373 | comps = node.getElementsByTagName('compound') | |
374 | for c in comps: | |
375 | refid = c.attributes['refid'].value | |
376 | fname = refid + '.xml' | |
377 | if not os.path.exists(fname): | |
378 | fname = os.path.join(self.my_dir, fname) | |
379 | if not self.quiet: | |
1cbfde94 | 380 | print("parsing file: %s" % fname) |
f9e62be7 DW |
381 | p = Doxy2SWIG(fname, self.include_function_definition, self.quiet) |
382 | p.generate() | |
383 | self.pieces.extend(self.clean_pieces(p.pieces)) | |
384 | ||
385 | def write(self, fname): | |
386 | o = my_open_write(fname) | |
387 | if self.multi: | |
388 | o.write("".join(self.pieces)) | |
389 | else: | |
390 | o.write("".join(self.clean_pieces(self.pieces))) | |
391 | o.close() | |
392 | ||
393 | def clean_pieces(self, pieces): | |
394 | """Cleans the list of strings given as `pieces`. It replaces | |
395 | multiple newlines by a maximum of 2 and returns a new list. | |
396 | It also wraps the paragraphs nicely. | |
1cbfde94 | 397 | |
f9e62be7 DW |
398 | """ |
399 | ret = [] | |
400 | count = 0 | |
401 | for i in pieces: | |
402 | if i == '\n': | |
403 | count = count + 1 | |
404 | else: | |
405 | if i == '";': | |
406 | if count: | |
407 | ret.append('\n') | |
408 | elif count > 2: | |
409 | ret.append('\n\n') | |
410 | elif count: | |
1cbfde94 | 411 | ret.append('\n' * count) |
f9e62be7 DW |
412 | count = 0 |
413 | ret.append(i) | |
414 | ||
415 | _data = "".join(ret) | |
416 | ret = [] | |
417 | for i in _data.split('\n\n'): | |
a67c3be4 | 418 | if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:': |
1cbfde94 MZ |
419 | ret.extend([i, '\n' + '-' * len(i), '\n\n']) |
420 | elif i.find('// File:') > -1: # leave comments alone. | |
f9e62be7 DW |
421 | ret.extend([i, '\n']) |
422 | else: | |
423 | _tmp = textwrap.fill(i.strip(), break_long_words=False) | |
424 | _tmp = self.lead_spc.sub(r'\1"\2', _tmp) | |
425 | ret.extend([_tmp, '\n\n']) | |
426 | return ret | |
427 | ||
428 | ||
429 | def convert(input, output, include_function_definition=True, quiet=False): | |
430 | p = Doxy2SWIG(input, include_function_definition, quiet) | |
431 | p.generate() | |
432 | p.write(output) | |
433 | ||
1cbfde94 | 434 | |
f9e62be7 DW |
435 | def main(): |
436 | usage = __doc__ | |
437 | parser = optparse.OptionParser(usage) | |
438 | parser.add_option("-n", '--no-function-definition', | |
439 | action='store_true', | |
440 | default=False, | |
441 | dest='func_def', | |
442 | help='do not include doxygen function definitions') | |
443 | parser.add_option("-q", '--quiet', | |
444 | action='store_true', | |
445 | default=False, | |
446 | dest='quiet', | |
447 | help='be quiet and minimize output') | |
1cbfde94 | 448 | |
f9e62be7 DW |
449 | options, args = parser.parse_args() |
450 | if len(args) != 2: | |
451 | parser.error("error: no input and output specified") | |
452 | ||
453 | convert(args[0], args[1], not options.func_def, options.quiet) | |
1cbfde94 | 454 | |
f9e62be7 DW |
455 | |
456 | if __name__ == '__main__': | |
457 | main() |