| 1 | #!/usr/bin/env python |
| 2 | """Doxygen XML to SWIG docstring converter. |
| 3 | |
| 4 | Usage: |
| 5 | |
| 6 | doxy2swig.py [options] input.xml output.i |
| 7 | |
| 8 | Converts Doxygen generated XML files into a file containing docstrings |
| 9 | that can be used by SWIG-1.3.x. Note that you need to get SWIG |
| 10 | version > 1.3.23 or use Robin Dunn's docstring patch to be able to use |
| 11 | the resulting output. |
| 12 | |
| 13 | input.xml is your doxygen generated XML file and output.i is where the |
| 14 | output will be written (the file will be clobbered). |
| 15 | |
| 16 | """ |
| 17 | # |
| 18 | # |
| 19 | # This code is implemented using Mark Pilgrim's code as a guideline: |
| 20 | # http://www.faqs.org/docs/diveintopython/kgp_divein.html |
| 21 | # |
| 22 | # Author: Prabhu Ramachandran |
| 23 | # License: BSD style |
| 24 | # |
| 25 | # Thanks: |
| 26 | # Johan Hake: the include_function_definition feature |
| 27 | # Bill Spotz: bug reports and testing. |
| 28 | # Sebastian Henschel: Misc. enhancements. |
| 29 | # |
| 30 | # |
| 31 | |
| 32 | from xml.dom import minidom |
| 33 | import re |
| 34 | import textwrap |
| 35 | import sys |
| 36 | import os.path |
| 37 | import optparse |
| 38 | |
| 39 | |
| 40 | def my_open_read(source): |
| 41 | if hasattr(source, "read"): |
| 42 | return source |
| 43 | else: |
| 44 | return open(source) |
| 45 | |
| 46 | |
| 47 | def my_open_write(dest): |
| 48 | if hasattr(dest, "write"): |
| 49 | return dest |
| 50 | else: |
| 51 | return open(dest, 'w') |
| 52 | |
| 53 | |
| 54 | class Doxy2SWIG: |
| 55 | |
| 56 | """Converts Doxygen generated XML files into a file containing |
| 57 | docstrings that can be used by SWIG-1.3.x that have support for |
| 58 | feature("docstring"). Once the data is parsed it is stored in |
| 59 | self.pieces. |
| 60 | |
| 61 | """ |
| 62 | |
| 63 | def __init__(self, src, include_function_definition=True, quiet=False): |
| 64 | """Initialize the instance given a source object. `src` can |
| 65 | be a file or filename. If you do not want to include function |
| 66 | definitions from doxygen then set |
| 67 | `include_function_definition` to `False`. This is handy since |
| 68 | this allows you to use the swig generated function definition |
| 69 | using %feature("autodoc", [0,1]). |
| 70 | |
| 71 | """ |
| 72 | f = my_open_read(src) |
| 73 | self.my_dir = os.path.dirname(f.name) |
| 74 | self.xmldoc = minidom.parse(f).documentElement |
| 75 | f.close() |
| 76 | |
| 77 | self.pieces = [] |
| 78 | self.pieces.append('\n// File: %s\n' % |
| 79 | os.path.basename(f.name)) |
| 80 | |
| 81 | self.space_re = re.compile(r'\s+') |
| 82 | self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') |
| 83 | self.multi = 0 |
| 84 | self.ignores = ['inheritancegraph', 'param', 'listofallmembers', |
| 85 | 'innerclass', 'name', 'declname', 'incdepgraph', |
| 86 | 'invincdepgraph', 'programlisting', 'type', |
| 87 | 'references', 'referencedby', 'location', |
| 88 | 'collaborationgraph', 'reimplements', |
| 89 | 'reimplementedby', 'derivedcompoundref', |
| 90 | 'basecompoundref'] |
| 91 | #self.generics = [] |
| 92 | self.include_function_definition = include_function_definition |
| 93 | if not include_function_definition: |
| 94 | self.ignores.append('argsstring') |
| 95 | |
| 96 | self.quiet = quiet |
| 97 | |
| 98 | def generate(self): |
| 99 | """Parses the file set in the initialization. The resulting |
| 100 | data is stored in `self.pieces`. |
| 101 | |
| 102 | """ |
| 103 | self.parse(self.xmldoc) |
| 104 | |
| 105 | def parse(self, node): |
| 106 | """Parse a given node. This function in turn calls the |
| 107 | `parse_<nodeType>` functions which handle the respective |
| 108 | nodes. |
| 109 | |
| 110 | """ |
| 111 | pm = getattr(self, "parse_%s" % node.__class__.__name__) |
| 112 | pm(node) |
| 113 | |
| 114 | def parse_Document(self, node): |
| 115 | self.parse(node.documentElement) |
| 116 | |
| 117 | def parse_Text(self, node): |
| 118 | txt = node.data |
| 119 | txt = txt.replace('\\', r'\\\\') |
| 120 | txt = txt.replace('"', r'\"') |
| 121 | # ignore pure whitespace |
| 122 | m = self.space_re.match(txt) |
| 123 | if m and len(m.group()) == len(txt): |
| 124 | pass |
| 125 | else: |
| 126 | self.add_text(textwrap.fill(txt, break_long_words=False)) |
| 127 | |
| 128 | def parse_Element(self, node): |
| 129 | """Parse an `ELEMENT_NODE`. This calls specific |
| 130 | `do_<tagName>` handers for different elements. If no handler |
| 131 | is available the `generic_parse` method is called. All |
| 132 | tagNames specified in `self.ignores` are simply ignored. |
| 133 | |
| 134 | """ |
| 135 | name = node.tagName |
| 136 | ignores = self.ignores |
| 137 | if name in ignores: |
| 138 | return |
| 139 | attr = "do_%s" % name |
| 140 | if hasattr(self, attr): |
| 141 | handlerMethod = getattr(self, attr) |
| 142 | handlerMethod(node) |
| 143 | else: |
| 144 | self.generic_parse(node) |
| 145 | #if name not in self.generics: self.generics.append(name) |
| 146 | |
| 147 | def parse_Comment(self, node): |
| 148 | """Parse a `COMMENT_NODE`. This does nothing for now.""" |
| 149 | return |
| 150 | |
| 151 | def add_text(self, value): |
| 152 | """Adds text corresponding to `value` into `self.pieces`.""" |
| 153 | if isinstance(value, (list, tuple)): |
| 154 | self.pieces.extend(value) |
| 155 | else: |
| 156 | self.pieces.append(value) |
| 157 | |
| 158 | def get_specific_nodes(self, node, names): |
| 159 | """Given a node and a sequence of strings in `names`, return a |
| 160 | dictionary containing the names as keys and child |
| 161 | `ELEMENT_NODEs`, that have a `tagName` equal to the name. |
| 162 | |
| 163 | """ |
| 164 | nodes = [(x.tagName, x) for x in node.childNodes |
| 165 | if x.nodeType == x.ELEMENT_NODE and |
| 166 | x.tagName in names] |
| 167 | return dict(nodes) |
| 168 | |
| 169 | def generic_parse(self, node, pad=0): |
| 170 | """A Generic parser for arbitrary tags in a node. |
| 171 | |
| 172 | Parameters: |
| 173 | |
| 174 | - node: A node in the DOM. |
| 175 | - pad: `int` (default: 0) |
| 176 | |
| 177 | If 0 the node data is not padded with newlines. If 1 it |
| 178 | appends a newline after parsing the childNodes. If 2 it |
| 179 | pads before and after the nodes are processed. Defaults to |
| 180 | 0. |
| 181 | |
| 182 | """ |
| 183 | npiece = 0 |
| 184 | if pad: |
| 185 | npiece = len(self.pieces) |
| 186 | if pad == 2: |
| 187 | self.add_text('\n') |
| 188 | for n in node.childNodes: |
| 189 | self.parse(n) |
| 190 | if pad: |
| 191 | if len(self.pieces) > npiece: |
| 192 | self.add_text('\n') |
| 193 | |
| 194 | def space_parse(self, node): |
| 195 | self.add_text(' ') |
| 196 | self.generic_parse(node) |
| 197 | |
| 198 | do_ref = space_parse |
| 199 | do_emphasis = space_parse |
| 200 | do_bold = space_parse |
| 201 | do_computeroutput = space_parse |
| 202 | do_formula = space_parse |
| 203 | |
| 204 | def do_compoundname(self, node): |
| 205 | self.add_text('\n\n') |
| 206 | data = node.firstChild.data |
| 207 | self.add_text('%%feature("docstring") %s "\n' % data) |
| 208 | |
| 209 | def do_compounddef(self, node): |
| 210 | kind = node.attributes['kind'].value |
| 211 | if kind in ('class', 'struct'): |
| 212 | prot = node.attributes['prot'].value |
| 213 | if prot != 'public': |
| 214 | return |
| 215 | names = ('compoundname', 'briefdescription', |
| 216 | 'detaileddescription', 'includes') |
| 217 | first = self.get_specific_nodes(node, names) |
| 218 | for n in names: |
| 219 | if first.has_key(n): |
| 220 | self.parse(first[n]) |
| 221 | self.add_text(['";', '\n']) |
| 222 | for n in node.childNodes: |
| 223 | if n not in first.values(): |
| 224 | self.parse(n) |
| 225 | elif kind in ('file', 'namespace'): |
| 226 | nodes = node.getElementsByTagName('sectiondef') |
| 227 | for n in nodes: |
| 228 | self.parse(n) |
| 229 | |
| 230 | def do_includes(self, node): |
| 231 | self.add_text('C++ includes: ') |
| 232 | self.generic_parse(node, pad=1) |
| 233 | |
| 234 | def do_parameterlist(self, node): |
| 235 | text = 'unknown' |
| 236 | for key, val in node.attributes.items(): |
| 237 | if key == 'kind': |
| 238 | if val == 'param': |
| 239 | text = 'Parameters' |
| 240 | elif val == 'exception': |
| 241 | text = 'Exceptions' |
| 242 | elif val == 'retval': |
| 243 | text = 'Returns' |
| 244 | else: |
| 245 | text = val |
| 246 | break |
| 247 | self.add_text(['\n', '\n', text, ':', '\n']) |
| 248 | self.generic_parse(node, pad=1) |
| 249 | |
| 250 | def do_para(self, node): |
| 251 | self.add_text('\n') |
| 252 | self.generic_parse(node, pad=1) |
| 253 | |
| 254 | def do_parametername(self, node): |
| 255 | self.add_text('\n') |
| 256 | try: |
| 257 | data = node.firstChild.data |
| 258 | except AttributeError: # perhaps a <ref> tag in it |
| 259 | data = node.firstChild.firstChild.data |
| 260 | if data.find('Exception') != -1: |
| 261 | self.add_text(data) |
| 262 | else: |
| 263 | self.add_text("%s: " % data) |
| 264 | |
| 265 | def do_parameterdefinition(self, node): |
| 266 | self.generic_parse(node, pad=1) |
| 267 | |
| 268 | def do_detaileddescription(self, node): |
| 269 | self.generic_parse(node, pad=1) |
| 270 | |
| 271 | def do_briefdescription(self, node): |
| 272 | self.generic_parse(node, pad=1) |
| 273 | |
| 274 | def do_memberdef(self, node): |
| 275 | prot = node.attributes['prot'].value |
| 276 | id = node.attributes['id'].value |
| 277 | kind = node.attributes['kind'].value |
| 278 | tmp = node.parentNode.parentNode.parentNode |
| 279 | compdef = tmp.getElementsByTagName('compounddef')[0] |
| 280 | cdef_kind = compdef.attributes['kind'].value |
| 281 | |
| 282 | if prot == 'public': |
| 283 | first = self.get_specific_nodes(node, ('definition', 'name')) |
| 284 | name = first['name'].firstChild.data |
| 285 | if name[:8] == 'operator': # Don't handle operators yet. |
| 286 | return |
| 287 | |
| 288 | if not 'definition' in first or \ |
| 289 | kind in ['variable', 'typedef']: |
| 290 | return |
| 291 | |
| 292 | if self.include_function_definition: |
| 293 | defn = first['definition'].firstChild.data |
| 294 | else: |
| 295 | defn = "" |
| 296 | self.add_text('\n') |
| 297 | self.add_text('%feature("docstring") ') |
| 298 | |
| 299 | anc = node.parentNode.parentNode |
| 300 | if cdef_kind in ('file', 'namespace'): |
| 301 | ns_node = anc.getElementsByTagName('innernamespace') |
| 302 | if not ns_node and cdef_kind == 'namespace': |
| 303 | ns_node = anc.getElementsByTagName('compoundname') |
| 304 | if ns_node: |
| 305 | ns = ns_node[0].firstChild.data |
| 306 | self.add_text(' %s::%s "\n%s' % (ns, name, defn)) |
| 307 | else: |
| 308 | self.add_text(' %s "\n%s' % (name, defn)) |
| 309 | elif cdef_kind in ('class', 'struct'): |
| 310 | # Get the full function name. |
| 311 | anc_node = anc.getElementsByTagName('compoundname') |
| 312 | cname = anc_node[0].firstChild.data |
| 313 | self.add_text(' %s::%s "\n%s' % (cname, name, defn)) |
| 314 | |
| 315 | for n in node.childNodes: |
| 316 | if n not in first.values(): |
| 317 | self.parse(n) |
| 318 | self.add_text(['";', '\n']) |
| 319 | |
| 320 | def do_definition(self, node): |
| 321 | data = node.firstChild.data |
| 322 | self.add_text('%s "\n%s' % (data, data)) |
| 323 | |
| 324 | def do_sectiondef(self, node): |
| 325 | kind = node.attributes['kind'].value |
| 326 | if kind in ('public-func', 'func', 'user-defined', ''): |
| 327 | self.generic_parse(node) |
| 328 | |
| 329 | def do_header(self, node): |
| 330 | """For a user defined section def a header field is present |
| 331 | which should not be printed as such, so we comment it in the |
| 332 | output.""" |
| 333 | data = node.firstChild.data |
| 334 | self.add_text('\n/*\n %s \n*/\n' % data) |
| 335 | # If our immediate sibling is a 'description' node then we |
| 336 | # should comment that out also and remove it from the parent |
| 337 | # node's children. |
| 338 | parent = node.parentNode |
| 339 | idx = parent.childNodes.index(node) |
| 340 | if len(parent.childNodes) >= idx + 2: |
| 341 | nd = parent.childNodes[idx + 2] |
| 342 | if nd.nodeName == 'description': |
| 343 | nd = parent.removeChild(nd) |
| 344 | self.add_text('\n/*') |
| 345 | self.generic_parse(nd) |
| 346 | self.add_text('\n*/\n') |
| 347 | |
| 348 | def do_simplesect(self, node): |
| 349 | kind = node.attributes['kind'].value |
| 350 | if kind in ('date', 'rcs', 'version'): |
| 351 | pass |
| 352 | elif kind == 'warning': |
| 353 | self.add_text(['\n', 'WARNING: ']) |
| 354 | self.generic_parse(node) |
| 355 | elif kind == 'see': |
| 356 | self.add_text('\n') |
| 357 | self.add_text('See: ') |
| 358 | self.generic_parse(node) |
| 359 | else: |
| 360 | self.generic_parse(node) |
| 361 | |
| 362 | def do_argsstring(self, node): |
| 363 | self.generic_parse(node, pad=1) |
| 364 | |
| 365 | def do_member(self, node): |
| 366 | kind = node.attributes['kind'].value |
| 367 | refid = node.attributes['refid'].value |
| 368 | if kind == 'function' and refid[:9] == 'namespace': |
| 369 | self.generic_parse(node) |
| 370 | |
| 371 | def do_doxygenindex(self, node): |
| 372 | self.multi = 1 |
| 373 | comps = node.getElementsByTagName('compound') |
| 374 | for c in comps: |
| 375 | refid = c.attributes['refid'].value |
| 376 | fname = refid + '.xml' |
| 377 | if not os.path.exists(fname): |
| 378 | fname = os.path.join(self.my_dir, fname) |
| 379 | if not self.quiet: |
| 380 | print("parsing file: %s" % fname) |
| 381 | p = Doxy2SWIG(fname, self.include_function_definition, self.quiet) |
| 382 | p.generate() |
| 383 | self.pieces.extend(self.clean_pieces(p.pieces)) |
| 384 | |
| 385 | def write(self, fname): |
| 386 | o = my_open_write(fname) |
| 387 | if self.multi: |
| 388 | o.write("".join(self.pieces)) |
| 389 | else: |
| 390 | o.write("".join(self.clean_pieces(self.pieces))) |
| 391 | o.close() |
| 392 | |
| 393 | def clean_pieces(self, pieces): |
| 394 | """Cleans the list of strings given as `pieces`. It replaces |
| 395 | multiple newlines by a maximum of 2 and returns a new list. |
| 396 | It also wraps the paragraphs nicely. |
| 397 | |
| 398 | """ |
| 399 | ret = [] |
| 400 | count = 0 |
| 401 | for i in pieces: |
| 402 | if i == '\n': |
| 403 | count = count + 1 |
| 404 | else: |
| 405 | if i == '";': |
| 406 | if count: |
| 407 | ret.append('\n') |
| 408 | elif count > 2: |
| 409 | ret.append('\n\n') |
| 410 | elif count: |
| 411 | ret.append('\n' * count) |
| 412 | count = 0 |
| 413 | ret.append(i) |
| 414 | |
| 415 | _data = "".join(ret) |
| 416 | ret = [] |
| 417 | for i in _data.split('\n\n'): |
| 418 | if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:': |
| 419 | ret.extend([i, '\n' + '-' * len(i), '\n\n']) |
| 420 | elif i.find('// File:') > -1: # leave comments alone. |
| 421 | ret.extend([i, '\n']) |
| 422 | else: |
| 423 | _tmp = textwrap.fill(i.strip(), break_long_words=False) |
| 424 | _tmp = self.lead_spc.sub(r'\1"\2', _tmp) |
| 425 | ret.extend([_tmp, '\n\n']) |
| 426 | return ret |
| 427 | |
| 428 | |
| 429 | def convert(input, output, include_function_definition=True, quiet=False): |
| 430 | p = Doxy2SWIG(input, include_function_definition, quiet) |
| 431 | p.generate() |
| 432 | p.write(output) |
| 433 | |
| 434 | |
| 435 | def main(): |
| 436 | usage = __doc__ |
| 437 | parser = optparse.OptionParser(usage) |
| 438 | parser.add_option("-n", '--no-function-definition', |
| 439 | action='store_true', |
| 440 | default=False, |
| 441 | dest='func_def', |
| 442 | help='do not include doxygen function definitions') |
| 443 | parser.add_option("-q", '--quiet', |
| 444 | action='store_true', |
| 445 | default=False, |
| 446 | dest='quiet', |
| 447 | help='be quiet and minimize output') |
| 448 | |
| 449 | options, args = parser.parse_args() |
| 450 | if len(args) != 2: |
| 451 | parser.error("error: no input and output specified") |
| 452 | |
| 453 | convert(args[0], args[1], not options.func_def, options.quiet) |
| 454 | |
| 455 | |
| 456 | if __name__ == '__main__': |
| 457 | main() |