Commit | Line | Data |
---|---|---|
f9e62be7 DW |
1 | #!/usr/bin/env python |
2 | """Doxygen XML to SWIG docstring converter. | |
3 | ||
4 | Usage: | |
5 | ||
6 | doxy2swig.py [options] input.xml output.i | |
7 | ||
8 | Converts Doxygen generated XML files into a file containing docstrings | |
9 | that can be used by SWIG-1.3.x. Note that you need to get SWIG | |
10 | version > 1.3.23 or use Robin Dunn's docstring patch to be able to use | |
11 | the resulting output. | |
12 | ||
13 | input.xml is your doxygen generated XML file and output.i is where the | |
14 | output will be written (the file will be clobbered). | |
15 | ||
16 | """ | |
17 | ###################################################################### | |
18 | # | |
19 | # This code is implemented using Mark Pilgrim's code as a guideline: | |
20 | # http://www.faqs.org/docs/diveintopython/kgp_divein.html | |
21 | # | |
22 | # Author: Prabhu Ramachandran | |
23 | # License: BSD style | |
24 | # | |
25 | # Thanks: | |
26 | # Johan Hake: the include_function_definition feature | |
27 | # Bill Spotz: bug reports and testing. | |
28 | # Sebastian Henschel: Misc. enhancements. | |
29 | # | |
30 | ###################################################################### | |
31 | ||
32 | from xml.dom import minidom | |
33 | import re | |
34 | import textwrap | |
35 | import sys | |
36 | import types | |
37 | import os.path | |
38 | import optparse | |
39 | ||
40 | ||
41 | def my_open_read(source): | |
42 | if hasattr(source, "read"): | |
43 | return source | |
44 | else: | |
45 | return open(source) | |
46 | ||
47 | def my_open_write(dest): | |
48 | if hasattr(dest, "write"): | |
49 | return dest | |
50 | else: | |
51 | return open(dest, 'w') | |
52 | ||
53 | ||
54 | class Doxy2SWIG: | |
55 | """Converts Doxygen generated XML files into a file containing | |
56 | docstrings that can be used by SWIG-1.3.x that have support for | |
57 | feature("docstring"). Once the data is parsed it is stored in | |
58 | self.pieces. | |
59 | ||
60 | """ | |
61 | ||
62 | def __init__(self, src, include_function_definition=True, quiet=False): | |
63 | """Initialize the instance given a source object. `src` can | |
64 | be a file or filename. If you do not want to include function | |
65 | definitions from doxygen then set | |
66 | `include_function_definition` to `False`. This is handy since | |
67 | this allows you to use the swig generated function definition | |
68 | using %feature("autodoc", [0,1]). | |
69 | ||
70 | """ | |
71 | f = my_open_read(src) | |
72 | self.my_dir = os.path.dirname(f.name) | |
73 | self.xmldoc = minidom.parse(f).documentElement | |
74 | f.close() | |
75 | ||
76 | self.pieces = [] | |
77 | self.pieces.append('\n// File: %s\n'%\ | |
78 | os.path.basename(f.name)) | |
79 | ||
80 | self.space_re = re.compile(r'\s+') | |
81 | self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') | |
82 | self.multi = 0 | |
83 | self.ignores = ['inheritancegraph', 'param', 'listofallmembers', | |
84 | 'innerclass', 'name', 'declname', 'incdepgraph', | |
85 | 'invincdepgraph', 'programlisting', 'type', | |
86 | 'references', 'referencedby', 'location', | |
87 | 'collaborationgraph', 'reimplements', | |
88 | 'reimplementedby', 'derivedcompoundref', | |
89 | 'basecompoundref'] | |
90 | #self.generics = [] | |
91 | self.include_function_definition = include_function_definition | |
92 | if not include_function_definition: | |
93 | self.ignores.append('argsstring') | |
94 | ||
95 | self.quiet = quiet | |
96 | ||
97 | ||
98 | def generate(self): | |
99 | """Parses the file set in the initialization. The resulting | |
100 | data is stored in `self.pieces`. | |
101 | ||
102 | """ | |
103 | self.parse(self.xmldoc) | |
104 | ||
105 | def parse(self, node): | |
106 | """Parse a given node. This function in turn calls the | |
107 | `parse_<nodeType>` functions which handle the respective | |
108 | nodes. | |
109 | ||
110 | """ | |
111 | pm = getattr(self, "parse_%s"%node.__class__.__name__) | |
112 | pm(node) | |
113 | ||
114 | def parse_Document(self, node): | |
115 | self.parse(node.documentElement) | |
116 | ||
117 | def parse_Text(self, node): | |
118 | txt = node.data | |
119 | txt = txt.replace('\\', r'\\\\') | |
120 | txt = txt.replace('"', r'\"') | |
121 | # ignore pure whitespace | |
122 | m = self.space_re.match(txt) | |
123 | if m and len(m.group()) == len(txt): | |
124 | pass | |
125 | else: | |
126 | self.add_text(textwrap.fill(txt, break_long_words=False)) | |
127 | ||
128 | def parse_Element(self, node): | |
129 | """Parse an `ELEMENT_NODE`. This calls specific | |
130 | `do_<tagName>` handers for different elements. If no handler | |
131 | is available the `generic_parse` method is called. All | |
132 | tagNames specified in `self.ignores` are simply ignored. | |
133 | ||
134 | """ | |
135 | name = node.tagName | |
136 | ignores = self.ignores | |
137 | if name in ignores: | |
138 | return | |
139 | attr = "do_%s" % name | |
140 | if hasattr(self, attr): | |
141 | handlerMethod = getattr(self, attr) | |
142 | handlerMethod(node) | |
143 | else: | |
144 | self.generic_parse(node) | |
145 | #if name not in self.generics: self.generics.append(name) | |
146 | ||
147 | def parse_Comment(self, node): | |
148 | """Parse a `COMMENT_NODE`. This does nothing for now.""" | |
149 | return | |
150 | ||
151 | def add_text(self, value): | |
152 | """Adds text corresponding to `value` into `self.pieces`.""" | |
153 | if type(value) in (types.ListType, types.TupleType): | |
154 | self.pieces.extend(value) | |
155 | else: | |
156 | self.pieces.append(value) | |
157 | ||
158 | def get_specific_nodes(self, node, names): | |
159 | """Given a node and a sequence of strings in `names`, return a | |
160 | dictionary containing the names as keys and child | |
161 | `ELEMENT_NODEs`, that have a `tagName` equal to the name. | |
162 | ||
163 | """ | |
164 | nodes = [(x.tagName, x) for x in node.childNodes \ | |
165 | if x.nodeType == x.ELEMENT_NODE and \ | |
166 | x.tagName in names] | |
167 | return dict(nodes) | |
168 | ||
169 | def generic_parse(self, node, pad=0): | |
170 | """A Generic parser for arbitrary tags in a node. | |
171 | ||
172 | Parameters: | |
173 | ||
174 | - node: A node in the DOM. | |
175 | - pad: `int` (default: 0) | |
176 | ||
177 | If 0 the node data is not padded with newlines. If 1 it | |
178 | appends a newline after parsing the childNodes. If 2 it | |
179 | pads before and after the nodes are processed. Defaults to | |
180 | 0. | |
181 | ||
182 | """ | |
183 | npiece = 0 | |
184 | if pad: | |
185 | npiece = len(self.pieces) | |
186 | if pad == 2: | |
187 | self.add_text('\n') | |
188 | for n in node.childNodes: | |
189 | self.parse(n) | |
190 | if pad: | |
191 | if len(self.pieces) > npiece: | |
192 | self.add_text('\n') | |
193 | ||
194 | def space_parse(self, node): | |
195 | self.add_text(' ') | |
196 | self.generic_parse(node) | |
197 | ||
198 | do_ref = space_parse | |
199 | do_emphasis = space_parse | |
200 | do_bold = space_parse | |
201 | do_computeroutput = space_parse | |
202 | do_formula = space_parse | |
203 | ||
204 | def do_compoundname(self, node): | |
205 | self.add_text('\n\n') | |
206 | data = node.firstChild.data | |
207 | self.add_text('%%feature("docstring") %s "\n'%data) | |
208 | ||
209 | def do_compounddef(self, node): | |
210 | kind = node.attributes['kind'].value | |
211 | if kind in ('class', 'struct'): | |
212 | prot = node.attributes['prot'].value | |
213 | if prot <> 'public': | |
214 | return | |
215 | names = ('compoundname', 'briefdescription', | |
216 | 'detaileddescription', 'includes') | |
217 | first = self.get_specific_nodes(node, names) | |
218 | for n in names: | |
219 | if first.has_key(n): | |
220 | self.parse(first[n]) | |
221 | self.add_text(['";','\n']) | |
222 | for n in node.childNodes: | |
223 | if n not in first.values(): | |
224 | self.parse(n) | |
225 | elif kind in ('file', 'namespace'): | |
226 | nodes = node.getElementsByTagName('sectiondef') | |
227 | for n in nodes: | |
228 | self.parse(n) | |
229 | ||
230 | def do_includes(self, node): | |
231 | self.add_text('C++ includes: ') | |
232 | self.generic_parse(node, pad=1) | |
233 | ||
234 | def do_parameterlist(self, node): | |
235 | text='unknown' | |
236 | for key, val in node.attributes.items(): | |
237 | if key == 'kind': | |
238 | if val == 'param': text = 'Parameters' | |
239 | elif val == 'exception': text = 'Exceptions' | |
a67c3be4 | 240 | elif val == 'retval': text = 'Returns' |
f9e62be7 DW |
241 | else: text = val |
242 | break | |
243 | self.add_text(['\n', '\n', text, ':', '\n']) | |
244 | self.generic_parse(node, pad=1) | |
245 | ||
246 | def do_para(self, node): | |
247 | self.add_text('\n') | |
248 | self.generic_parse(node, pad=1) | |
249 | ||
250 | def do_parametername(self, node): | |
251 | self.add_text('\n') | |
252 | try: | |
253 | data=node.firstChild.data | |
254 | except AttributeError: # perhaps a <ref> tag in it | |
255 | data=node.firstChild.firstChild.data | |
256 | if data.find('Exception') != -1: | |
257 | self.add_text(data) | |
258 | else: | |
259 | self.add_text("%s: "%data) | |
260 | ||
261 | def do_parameterdefinition(self, node): | |
262 | self.generic_parse(node, pad=1) | |
263 | ||
264 | def do_detaileddescription(self, node): | |
265 | self.generic_parse(node, pad=1) | |
266 | ||
267 | def do_briefdescription(self, node): | |
268 | self.generic_parse(node, pad=1) | |
269 | ||
270 | def do_memberdef(self, node): | |
271 | prot = node.attributes['prot'].value | |
272 | id = node.attributes['id'].value | |
273 | kind = node.attributes['kind'].value | |
274 | tmp = node.parentNode.parentNode.parentNode | |
275 | compdef = tmp.getElementsByTagName('compounddef')[0] | |
276 | cdef_kind = compdef.attributes['kind'].value | |
277 | ||
278 | if prot == 'public': | |
279 | first = self.get_specific_nodes(node, ('definition', 'name')) | |
280 | name = first['name'].firstChild.data | |
281 | if name[:8] == 'operator': # Don't handle operators yet. | |
282 | return | |
283 | ||
284 | if not first.has_key('definition') or \ | |
285 | kind in ['variable', 'typedef']: | |
286 | return | |
287 | ||
288 | if self.include_function_definition: | |
289 | defn = first['definition'].firstChild.data | |
290 | else: | |
291 | defn = "" | |
292 | self.add_text('\n') | |
293 | self.add_text('%feature("docstring") ') | |
294 | ||
295 | anc = node.parentNode.parentNode | |
296 | if cdef_kind in ('file', 'namespace'): | |
297 | ns_node = anc.getElementsByTagName('innernamespace') | |
298 | if not ns_node and cdef_kind == 'namespace': | |
299 | ns_node = anc.getElementsByTagName('compoundname') | |
300 | if ns_node: | |
301 | ns = ns_node[0].firstChild.data | |
302 | self.add_text(' %s::%s "\n%s'%(ns, name, defn)) | |
303 | else: | |
304 | self.add_text(' %s "\n%s'%(name, defn)) | |
305 | elif cdef_kind in ('class', 'struct'): | |
306 | # Get the full function name. | |
307 | anc_node = anc.getElementsByTagName('compoundname') | |
308 | cname = anc_node[0].firstChild.data | |
309 | self.add_text(' %s::%s "\n%s'%(cname, name, defn)) | |
310 | ||
311 | for n in node.childNodes: | |
312 | if n not in first.values(): | |
313 | self.parse(n) | |
314 | self.add_text(['";', '\n']) | |
315 | ||
316 | def do_definition(self, node): | |
317 | data = node.firstChild.data | |
318 | self.add_text('%s "\n%s'%(data, data)) | |
319 | ||
320 | def do_sectiondef(self, node): | |
321 | kind = node.attributes['kind'].value | |
322 | if kind in ('public-func', 'func', 'user-defined', ''): | |
323 | self.generic_parse(node) | |
324 | ||
325 | def do_header(self, node): | |
326 | """For a user defined section def a header field is present | |
327 | which should not be printed as such, so we comment it in the | |
328 | output.""" | |
329 | data = node.firstChild.data | |
330 | self.add_text('\n/*\n %s \n*/\n'%data) | |
331 | # If our immediate sibling is a 'description' node then we | |
332 | # should comment that out also and remove it from the parent | |
333 | # node's children. | |
334 | parent = node.parentNode | |
335 | idx = parent.childNodes.index(node) | |
336 | if len(parent.childNodes) >= idx + 2: | |
337 | nd = parent.childNodes[idx+2] | |
338 | if nd.nodeName == 'description': | |
339 | nd = parent.removeChild(nd) | |
340 | self.add_text('\n/*') | |
341 | self.generic_parse(nd) | |
342 | self.add_text('\n*/\n') | |
343 | ||
344 | def do_simplesect(self, node): | |
345 | kind = node.attributes['kind'].value | |
346 | if kind in ('date', 'rcs', 'version'): | |
347 | pass | |
348 | elif kind == 'warning': | |
349 | self.add_text(['\n', 'WARNING: ']) | |
350 | self.generic_parse(node) | |
351 | elif kind == 'see': | |
352 | self.add_text('\n') | |
353 | self.add_text('See: ') | |
354 | self.generic_parse(node) | |
355 | else: | |
356 | self.generic_parse(node) | |
357 | ||
358 | def do_argsstring(self, node): | |
359 | self.generic_parse(node, pad=1) | |
360 | ||
361 | def do_member(self, node): | |
362 | kind = node.attributes['kind'].value | |
363 | refid = node.attributes['refid'].value | |
364 | if kind == 'function' and refid[:9] == 'namespace': | |
365 | self.generic_parse(node) | |
366 | ||
367 | def do_doxygenindex(self, node): | |
368 | self.multi = 1 | |
369 | comps = node.getElementsByTagName('compound') | |
370 | for c in comps: | |
371 | refid = c.attributes['refid'].value | |
372 | fname = refid + '.xml' | |
373 | if not os.path.exists(fname): | |
374 | fname = os.path.join(self.my_dir, fname) | |
375 | if not self.quiet: | |
376 | print "parsing file: %s"%fname | |
377 | p = Doxy2SWIG(fname, self.include_function_definition, self.quiet) | |
378 | p.generate() | |
379 | self.pieces.extend(self.clean_pieces(p.pieces)) | |
380 | ||
381 | def write(self, fname): | |
382 | o = my_open_write(fname) | |
383 | if self.multi: | |
384 | o.write("".join(self.pieces)) | |
385 | else: | |
386 | o.write("".join(self.clean_pieces(self.pieces))) | |
387 | o.close() | |
388 | ||
389 | def clean_pieces(self, pieces): | |
390 | """Cleans the list of strings given as `pieces`. It replaces | |
391 | multiple newlines by a maximum of 2 and returns a new list. | |
392 | It also wraps the paragraphs nicely. | |
393 | ||
394 | """ | |
395 | ret = [] | |
396 | count = 0 | |
397 | for i in pieces: | |
398 | if i == '\n': | |
399 | count = count + 1 | |
400 | else: | |
401 | if i == '";': | |
402 | if count: | |
403 | ret.append('\n') | |
404 | elif count > 2: | |
405 | ret.append('\n\n') | |
406 | elif count: | |
407 | ret.append('\n'*count) | |
408 | count = 0 | |
409 | ret.append(i) | |
410 | ||
411 | _data = "".join(ret) | |
412 | ret = [] | |
413 | for i in _data.split('\n\n'): | |
a67c3be4 DW |
414 | if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:': |
415 | ret.extend([i, '\n'+'-'*len(i), '\n\n']) | |
f9e62be7 DW |
416 | elif i.find('// File:') > -1: # leave comments alone. |
417 | ret.extend([i, '\n']) | |
418 | else: | |
419 | _tmp = textwrap.fill(i.strip(), break_long_words=False) | |
420 | _tmp = self.lead_spc.sub(r'\1"\2', _tmp) | |
421 | ret.extend([_tmp, '\n\n']) | |
422 | return ret | |
423 | ||
424 | ||
425 | def convert(input, output, include_function_definition=True, quiet=False): | |
426 | p = Doxy2SWIG(input, include_function_definition, quiet) | |
427 | p.generate() | |
428 | p.write(output) | |
429 | ||
430 | def main(): | |
431 | usage = __doc__ | |
432 | parser = optparse.OptionParser(usage) | |
433 | parser.add_option("-n", '--no-function-definition', | |
434 | action='store_true', | |
435 | default=False, | |
436 | dest='func_def', | |
437 | help='do not include doxygen function definitions') | |
438 | parser.add_option("-q", '--quiet', | |
439 | action='store_true', | |
440 | default=False, | |
441 | dest='quiet', | |
442 | help='be quiet and minimize output') | |
443 | ||
444 | options, args = parser.parse_args() | |
445 | if len(args) != 2: | |
446 | parser.error("error: no input and output specified") | |
447 | ||
448 | convert(args[0], args[1], not options.func_def, options.quiet) | |
449 | ||
450 | ||
451 | if __name__ == '__main__': | |
452 | main() |