Commit | Line | Data |
---|---|---|
f9e62be7 DW |
1 | #!/usr/bin/env python |
2 | """Doxygen XML to SWIG docstring converter. | |
3 | ||
4 | Usage: | |
5 | ||
6 | doxy2swig.py [options] input.xml output.i | |
7 | ||
8 | Converts Doxygen generated XML files into a file containing docstrings | |
9 | that can be used by SWIG-1.3.x. Note that you need to get SWIG | |
10 | version > 1.3.23 or use Robin Dunn's docstring patch to be able to use | |
11 | the resulting output. | |
12 | ||
13 | input.xml is your doxygen generated XML file and output.i is where the | |
14 | output will be written (the file will be clobbered). | |
15 | ||
16 | """ | |
17 | ###################################################################### | |
18 | # | |
19 | # This code is implemented using Mark Pilgrim's code as a guideline: | |
20 | # http://www.faqs.org/docs/diveintopython/kgp_divein.html | |
21 | # | |
22 | # Author: Prabhu Ramachandran | |
23 | # License: BSD style | |
24 | # | |
25 | # Thanks: | |
26 | # Johan Hake: the include_function_definition feature | |
27 | # Bill Spotz: bug reports and testing. | |
28 | # Sebastian Henschel: Misc. enhancements. | |
29 | # | |
30 | ###################################################################### | |
31 | ||
32 | from xml.dom import minidom | |
33 | import re | |
34 | import textwrap | |
35 | import sys | |
f9e62be7 DW |
36 | import os.path |
37 | import optparse | |
38 | ||
39 | ||
40 | def my_open_read(source): | |
41 | if hasattr(source, "read"): | |
42 | return source | |
43 | else: | |
44 | return open(source) | |
45 | ||
46 | def my_open_write(dest): | |
47 | if hasattr(dest, "write"): | |
48 | return dest | |
49 | else: | |
50 | return open(dest, 'w') | |
51 | ||
52 | ||
53 | class Doxy2SWIG: | |
54 | """Converts Doxygen generated XML files into a file containing | |
55 | docstrings that can be used by SWIG-1.3.x that have support for | |
56 | feature("docstring"). Once the data is parsed it is stored in | |
57 | self.pieces. | |
58 | ||
59 | """ | |
60 | ||
61 | def __init__(self, src, include_function_definition=True, quiet=False): | |
62 | """Initialize the instance given a source object. `src` can | |
63 | be a file or filename. If you do not want to include function | |
64 | definitions from doxygen then set | |
65 | `include_function_definition` to `False`. This is handy since | |
66 | this allows you to use the swig generated function definition | |
67 | using %feature("autodoc", [0,1]). | |
68 | ||
69 | """ | |
70 | f = my_open_read(src) | |
71 | self.my_dir = os.path.dirname(f.name) | |
72 | self.xmldoc = minidom.parse(f).documentElement | |
73 | f.close() | |
74 | ||
75 | self.pieces = [] | |
76 | self.pieces.append('\n// File: %s\n'%\ | |
77 | os.path.basename(f.name)) | |
78 | ||
79 | self.space_re = re.compile(r'\s+') | |
80 | self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') | |
81 | self.multi = 0 | |
82 | self.ignores = ['inheritancegraph', 'param', 'listofallmembers', | |
83 | 'innerclass', 'name', 'declname', 'incdepgraph', | |
84 | 'invincdepgraph', 'programlisting', 'type', | |
85 | 'references', 'referencedby', 'location', | |
86 | 'collaborationgraph', 'reimplements', | |
87 | 'reimplementedby', 'derivedcompoundref', | |
88 | 'basecompoundref'] | |
89 | #self.generics = [] | |
90 | self.include_function_definition = include_function_definition | |
91 | if not include_function_definition: | |
92 | self.ignores.append('argsstring') | |
93 | ||
94 | self.quiet = quiet | |
95 | ||
96 | ||
97 | def generate(self): | |
98 | """Parses the file set in the initialization. The resulting | |
99 | data is stored in `self.pieces`. | |
100 | ||
101 | """ | |
102 | self.parse(self.xmldoc) | |
103 | ||
104 | def parse(self, node): | |
105 | """Parse a given node. This function in turn calls the | |
106 | `parse_<nodeType>` functions which handle the respective | |
107 | nodes. | |
108 | ||
109 | """ | |
110 | pm = getattr(self, "parse_%s"%node.__class__.__name__) | |
111 | pm(node) | |
112 | ||
113 | def parse_Document(self, node): | |
114 | self.parse(node.documentElement) | |
115 | ||
116 | def parse_Text(self, node): | |
117 | txt = node.data | |
118 | txt = txt.replace('\\', r'\\\\') | |
119 | txt = txt.replace('"', r'\"') | |
120 | # ignore pure whitespace | |
121 | m = self.space_re.match(txt) | |
122 | if m and len(m.group()) == len(txt): | |
123 | pass | |
124 | else: | |
125 | self.add_text(textwrap.fill(txt, break_long_words=False)) | |
126 | ||
127 | def parse_Element(self, node): | |
128 | """Parse an `ELEMENT_NODE`. This calls specific | |
129 | `do_<tagName>` handers for different elements. If no handler | |
130 | is available the `generic_parse` method is called. All | |
131 | tagNames specified in `self.ignores` are simply ignored. | |
132 | ||
133 | """ | |
134 | name = node.tagName | |
135 | ignores = self.ignores | |
136 | if name in ignores: | |
137 | return | |
138 | attr = "do_%s" % name | |
139 | if hasattr(self, attr): | |
140 | handlerMethod = getattr(self, attr) | |
141 | handlerMethod(node) | |
142 | else: | |
143 | self.generic_parse(node) | |
144 | #if name not in self.generics: self.generics.append(name) | |
145 | ||
146 | def parse_Comment(self, node): | |
147 | """Parse a `COMMENT_NODE`. This does nothing for now.""" | |
148 | return | |
149 | ||
150 | def add_text(self, value): | |
151 | """Adds text corresponding to `value` into `self.pieces`.""" | |
74bea5a5 | 152 | if isinstance(value, (list, tuple)): |
f9e62be7 DW |
153 | self.pieces.extend(value) |
154 | else: | |
155 | self.pieces.append(value) | |
156 | ||
157 | def get_specific_nodes(self, node, names): | |
158 | """Given a node and a sequence of strings in `names`, return a | |
159 | dictionary containing the names as keys and child | |
160 | `ELEMENT_NODEs`, that have a `tagName` equal to the name. | |
161 | ||
162 | """ | |
163 | nodes = [(x.tagName, x) for x in node.childNodes \ | |
164 | if x.nodeType == x.ELEMENT_NODE and \ | |
165 | x.tagName in names] | |
166 | return dict(nodes) | |
167 | ||
168 | def generic_parse(self, node, pad=0): | |
169 | """A Generic parser for arbitrary tags in a node. | |
170 | ||
171 | Parameters: | |
172 | ||
173 | - node: A node in the DOM. | |
174 | - pad: `int` (default: 0) | |
175 | ||
176 | If 0 the node data is not padded with newlines. If 1 it | |
177 | appends a newline after parsing the childNodes. If 2 it | |
178 | pads before and after the nodes are processed. Defaults to | |
179 | 0. | |
180 | ||
181 | """ | |
182 | npiece = 0 | |
183 | if pad: | |
184 | npiece = len(self.pieces) | |
185 | if pad == 2: | |
186 | self.add_text('\n') | |
187 | for n in node.childNodes: | |
188 | self.parse(n) | |
189 | if pad: | |
190 | if len(self.pieces) > npiece: | |
191 | self.add_text('\n') | |
192 | ||
193 | def space_parse(self, node): | |
194 | self.add_text(' ') | |
195 | self.generic_parse(node) | |
196 | ||
197 | do_ref = space_parse | |
198 | do_emphasis = space_parse | |
199 | do_bold = space_parse | |
200 | do_computeroutput = space_parse | |
201 | do_formula = space_parse | |
202 | ||
203 | def do_compoundname(self, node): | |
204 | self.add_text('\n\n') | |
205 | data = node.firstChild.data | |
206 | self.add_text('%%feature("docstring") %s "\n'%data) | |
207 | ||
208 | def do_compounddef(self, node): | |
209 | kind = node.attributes['kind'].value | |
210 | if kind in ('class', 'struct'): | |
211 | prot = node.attributes['prot'].value | |
74bea5a5 | 212 | if prot != 'public': |
f9e62be7 DW |
213 | return |
214 | names = ('compoundname', 'briefdescription', | |
215 | 'detaileddescription', 'includes') | |
216 | first = self.get_specific_nodes(node, names) | |
217 | for n in names: | |
218 | if first.has_key(n): | |
219 | self.parse(first[n]) | |
220 | self.add_text(['";','\n']) | |
221 | for n in node.childNodes: | |
222 | if n not in first.values(): | |
223 | self.parse(n) | |
224 | elif kind in ('file', 'namespace'): | |
225 | nodes = node.getElementsByTagName('sectiondef') | |
226 | for n in nodes: | |
227 | self.parse(n) | |
228 | ||
229 | def do_includes(self, node): | |
230 | self.add_text('C++ includes: ') | |
231 | self.generic_parse(node, pad=1) | |
232 | ||
233 | def do_parameterlist(self, node): | |
234 | text='unknown' | |
235 | for key, val in node.attributes.items(): | |
236 | if key == 'kind': | |
237 | if val == 'param': text = 'Parameters' | |
238 | elif val == 'exception': text = 'Exceptions' | |
a67c3be4 | 239 | elif val == 'retval': text = 'Returns' |
f9e62be7 DW |
240 | else: text = val |
241 | break | |
242 | self.add_text(['\n', '\n', text, ':', '\n']) | |
243 | self.generic_parse(node, pad=1) | |
244 | ||
245 | def do_para(self, node): | |
246 | self.add_text('\n') | |
247 | self.generic_parse(node, pad=1) | |
248 | ||
249 | def do_parametername(self, node): | |
250 | self.add_text('\n') | |
251 | try: | |
252 | data=node.firstChild.data | |
253 | except AttributeError: # perhaps a <ref> tag in it | |
254 | data=node.firstChild.firstChild.data | |
255 | if data.find('Exception') != -1: | |
256 | self.add_text(data) | |
257 | else: | |
258 | self.add_text("%s: "%data) | |
259 | ||
260 | def do_parameterdefinition(self, node): | |
261 | self.generic_parse(node, pad=1) | |
262 | ||
263 | def do_detaileddescription(self, node): | |
264 | self.generic_parse(node, pad=1) | |
265 | ||
266 | def do_briefdescription(self, node): | |
267 | self.generic_parse(node, pad=1) | |
268 | ||
269 | def do_memberdef(self, node): | |
270 | prot = node.attributes['prot'].value | |
271 | id = node.attributes['id'].value | |
272 | kind = node.attributes['kind'].value | |
273 | tmp = node.parentNode.parentNode.parentNode | |
274 | compdef = tmp.getElementsByTagName('compounddef')[0] | |
275 | cdef_kind = compdef.attributes['kind'].value | |
276 | ||
277 | if prot == 'public': | |
278 | first = self.get_specific_nodes(node, ('definition', 'name')) | |
279 | name = first['name'].firstChild.data | |
280 | if name[:8] == 'operator': # Don't handle operators yet. | |
281 | return | |
282 | ||
74bea5a5 | 283 | if not 'definition' in first or \ |
f9e62be7 DW |
284 | kind in ['variable', 'typedef']: |
285 | return | |
286 | ||
287 | if self.include_function_definition: | |
288 | defn = first['definition'].firstChild.data | |
289 | else: | |
290 | defn = "" | |
291 | self.add_text('\n') | |
292 | self.add_text('%feature("docstring") ') | |
293 | ||
294 | anc = node.parentNode.parentNode | |
295 | if cdef_kind in ('file', 'namespace'): | |
296 | ns_node = anc.getElementsByTagName('innernamespace') | |
297 | if not ns_node and cdef_kind == 'namespace': | |
298 | ns_node = anc.getElementsByTagName('compoundname') | |
299 | if ns_node: | |
300 | ns = ns_node[0].firstChild.data | |
301 | self.add_text(' %s::%s "\n%s'%(ns, name, defn)) | |
302 | else: | |
303 | self.add_text(' %s "\n%s'%(name, defn)) | |
304 | elif cdef_kind in ('class', 'struct'): | |
305 | # Get the full function name. | |
306 | anc_node = anc.getElementsByTagName('compoundname') | |
307 | cname = anc_node[0].firstChild.data | |
308 | self.add_text(' %s::%s "\n%s'%(cname, name, defn)) | |
309 | ||
310 | for n in node.childNodes: | |
311 | if n not in first.values(): | |
312 | self.parse(n) | |
313 | self.add_text(['";', '\n']) | |
314 | ||
315 | def do_definition(self, node): | |
316 | data = node.firstChild.data | |
317 | self.add_text('%s "\n%s'%(data, data)) | |
318 | ||
319 | def do_sectiondef(self, node): | |
320 | kind = node.attributes['kind'].value | |
321 | if kind in ('public-func', 'func', 'user-defined', ''): | |
322 | self.generic_parse(node) | |
323 | ||
324 | def do_header(self, node): | |
325 | """For a user defined section def a header field is present | |
326 | which should not be printed as such, so we comment it in the | |
327 | output.""" | |
328 | data = node.firstChild.data | |
329 | self.add_text('\n/*\n %s \n*/\n'%data) | |
330 | # If our immediate sibling is a 'description' node then we | |
331 | # should comment that out also and remove it from the parent | |
332 | # node's children. | |
333 | parent = node.parentNode | |
334 | idx = parent.childNodes.index(node) | |
335 | if len(parent.childNodes) >= idx + 2: | |
336 | nd = parent.childNodes[idx+2] | |
337 | if nd.nodeName == 'description': | |
338 | nd = parent.removeChild(nd) | |
339 | self.add_text('\n/*') | |
340 | self.generic_parse(nd) | |
341 | self.add_text('\n*/\n') | |
342 | ||
343 | def do_simplesect(self, node): | |
344 | kind = node.attributes['kind'].value | |
345 | if kind in ('date', 'rcs', 'version'): | |
346 | pass | |
347 | elif kind == 'warning': | |
348 | self.add_text(['\n', 'WARNING: ']) | |
349 | self.generic_parse(node) | |
350 | elif kind == 'see': | |
351 | self.add_text('\n') | |
352 | self.add_text('See: ') | |
353 | self.generic_parse(node) | |
354 | else: | |
355 | self.generic_parse(node) | |
356 | ||
357 | def do_argsstring(self, node): | |
358 | self.generic_parse(node, pad=1) | |
359 | ||
360 | def do_member(self, node): | |
361 | kind = node.attributes['kind'].value | |
362 | refid = node.attributes['refid'].value | |
363 | if kind == 'function' and refid[:9] == 'namespace': | |
364 | self.generic_parse(node) | |
365 | ||
366 | def do_doxygenindex(self, node): | |
367 | self.multi = 1 | |
368 | comps = node.getElementsByTagName('compound') | |
369 | for c in comps: | |
370 | refid = c.attributes['refid'].value | |
371 | fname = refid + '.xml' | |
372 | if not os.path.exists(fname): | |
373 | fname = os.path.join(self.my_dir, fname) | |
374 | if not self.quiet: | |
74bea5a5 | 375 | print( "parsing file: %s"%fname ) |
f9e62be7 DW |
376 | p = Doxy2SWIG(fname, self.include_function_definition, self.quiet) |
377 | p.generate() | |
378 | self.pieces.extend(self.clean_pieces(p.pieces)) | |
379 | ||
380 | def write(self, fname): | |
381 | o = my_open_write(fname) | |
382 | if self.multi: | |
383 | o.write("".join(self.pieces)) | |
384 | else: | |
385 | o.write("".join(self.clean_pieces(self.pieces))) | |
386 | o.close() | |
387 | ||
388 | def clean_pieces(self, pieces): | |
389 | """Cleans the list of strings given as `pieces`. It replaces | |
390 | multiple newlines by a maximum of 2 and returns a new list. | |
391 | It also wraps the paragraphs nicely. | |
392 | ||
393 | """ | |
394 | ret = [] | |
395 | count = 0 | |
396 | for i in pieces: | |
397 | if i == '\n': | |
398 | count = count + 1 | |
399 | else: | |
400 | if i == '";': | |
401 | if count: | |
402 | ret.append('\n') | |
403 | elif count > 2: | |
404 | ret.append('\n\n') | |
405 | elif count: | |
406 | ret.append('\n'*count) | |
407 | count = 0 | |
408 | ret.append(i) | |
409 | ||
410 | _data = "".join(ret) | |
411 | ret = [] | |
412 | for i in _data.split('\n\n'): | |
a67c3be4 DW |
413 | if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:': |
414 | ret.extend([i, '\n'+'-'*len(i), '\n\n']) | |
f9e62be7 DW |
415 | elif i.find('// File:') > -1: # leave comments alone. |
416 | ret.extend([i, '\n']) | |
417 | else: | |
418 | _tmp = textwrap.fill(i.strip(), break_long_words=False) | |
419 | _tmp = self.lead_spc.sub(r'\1"\2', _tmp) | |
420 | ret.extend([_tmp, '\n\n']) | |
421 | return ret | |
422 | ||
423 | ||
424 | def convert(input, output, include_function_definition=True, quiet=False): | |
425 | p = Doxy2SWIG(input, include_function_definition, quiet) | |
426 | p.generate() | |
427 | p.write(output) | |
428 | ||
429 | def main(): | |
430 | usage = __doc__ | |
431 | parser = optparse.OptionParser(usage) | |
432 | parser.add_option("-n", '--no-function-definition', | |
433 | action='store_true', | |
434 | default=False, | |
435 | dest='func_def', | |
436 | help='do not include doxygen function definitions') | |
437 | parser.add_option("-q", '--quiet', | |
438 | action='store_true', | |
439 | default=False, | |
440 | dest='quiet', | |
441 | help='be quiet and minimize output') | |
442 | ||
443 | options, args = parser.parse_args() | |
444 | if len(args) != 2: | |
445 | parser.error("error: no input and output specified") | |
446 | ||
447 | convert(args[0], args[1], not options.func_def, options.quiet) | |
448 | ||
449 | ||
450 | if __name__ == '__main__': | |
451 | main() |