| 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # The software in this package is distributed under the GNU General |
| 4 | # Public License version 2 (with a special exception described below). |
| 5 | # |
| 6 | # A copy of GNU General Public License (GPL) is included in this distribution, |
| 7 | # in the file COPYING.GPL. |
| 8 | # |
| 9 | # As a special exception, if other files instantiate templates or use macros |
| 10 | # or inline functions from this file, or you compile this file and link it |
| 11 | # with other works to produce a work based on this file, this file |
| 12 | # does not by itself cause the resulting work to be covered |
| 13 | # by the GNU General Public License. |
| 14 | # |
| 15 | # However the source code for this file must still be made available |
| 16 | # in accordance with section (3) of the GNU General Public License. |
| 17 | # |
| 18 | # This exception does not invalidate any other reasons why a work based |
| 19 | # on this file might be covered by the GNU General Public License. |
| 20 | # |
| 21 | # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com> |
| 22 | |
| 23 | """ |
| 24 | Functionality for parsing cnfvars from strings. |
| 25 | |
| 26 | .. codeauthor:: Intra2net |
| 27 | |
| 28 | This module provides read and parse functionality for the Intra2net *CNF* |
| 29 | format from strings and by extension cnf files. |
| 30 | |
| 31 | The input string takes one round-trip through the parsers and will error out on |
| 32 | problematic lines. Thus, this module can also be used to syntax-check CNF data. |
| 33 | |
| 34 | Note that line numbers may be arbitrarily reassigned in the process. Of course, |
| 35 | parent references and the relative ordering of lines will be preserved in this |
| 36 | case. |
| 37 | |
| 38 | .. todo:: |
| 39 | Decide on some facility for automatic fixup of line number values. The |
| 40 | internal representation is recursive so line numbers are not needed to |
| 41 | establish a variable hierarchy. They might as well be omitted from the json |
| 42 | input and only added when writing cnf. For the time being a lack of the |
| 43 | "number" field is interpreted as an error. Though it should at least |
| 44 | optionally be possible to omit the numbers entirely and have a function add |
| 45 | them after the fact. This might as well be added to :py:func:`is_cnf` |
| 46 | though it would be counterintuitive to have a predicate mutate its |
| 47 | argument. So maybe it could return a second argument to indicate a valid |
| 48 | structure that needs fixup or something like that. |
| 49 | |
| 50 | .. note:: |
| 51 | Since the original *CNF* format assumes latin-1 encoded data pretty much |
| 52 | exclusively, we preserve the original encoding while parsing the file. |
| 53 | When assembling the data structures returned to the user, values are then |
| 54 | converted to strings so they can be used naturally at the Python end. |
| 55 | """ |
| 56 | |
| 57 | import functools |
| 58 | import sys |
| 59 | import json |
| 60 | import re |
| 61 | import io |
| 62 | |
| 63 | |
| 64 | ############################################################################### |
| 65 | # CONSTANTS |
| 66 | ############################################################################### |
| 67 | |
| 68 | |
| 69 | CNF_FIELD_MANDATORY = set ([ "varname", "data", "instance" ]) |
| 70 | CNF_FIELD_OPTIONAL = set ([ "parent", "children", "comment", "number" ]) |
| 71 | CNF_FIELD_KNOWN = CNF_FIELD_MANDATORY | CNF_FIELD_OPTIONAL |
| 72 | |
| 73 | grab_parent_pattern = re.compile(rb""" |
| 74 | ^ # match from start |
| 75 | \s* # optional spaces |
| 76 | \d+ # line number |
| 77 | \s+ # spaces |
| 78 | \((\d+)\) # parent |
| 79 | """, |
| 80 | re.VERBOSE) |
| 81 | |
| 82 | base_line_pattern = re.compile(rb""" |
| 83 | ^ # match from start |
| 84 | \s* # optional spaces |
| 85 | (\d+) # line number |
| 86 | \s+ # spaces |
| 87 | ([A-Z][A-Z0-9_]*) # varname |
| 88 | \s* # optional spaces |
| 89 | , # delimiter |
| 90 | \s* # optional spaces |
| 91 | (-1|\d+) # instance |
| 92 | \s* # optional spaces |
| 93 | : # delimiter |
| 94 | \s* # optional spaces |
| 95 | \"( # quoted string (data) |
| 96 | (?: \\\" # (of escaped dquote |
| 97 | |[^\"])* # or anything not a |
| 98 | )\" # literal quote) |
| 99 | \s* # optional spaces |
| 100 | ( # bgroup |
| 101 | \# # comment leader |
| 102 | \s* # optional spaces |
| 103 | .* # string (comment) |
| 104 | )? # egroup, optional |
| 105 | $ # eol |
| 106 | """, |
| 107 | re.VERBOSE) |
| 108 | |
| 109 | child_line_pattern = re.compile(rb""" |
| 110 | ^ # match from start |
| 111 | \s* # optional spaces |
| 112 | (\d+) # line number |
| 113 | \s+ # spaces |
| 114 | \((\d+)\) # parent |
| 115 | \s+ # spaces |
| 116 | ([A-Z][A-Z0-9_]*) # varname |
| 117 | \s* # optional spaces |
| 118 | , # delimiter |
| 119 | \s* # optional spaces |
| 120 | (-1|\d+) # instance |
| 121 | \s* # optional spaces |
| 122 | : # delimiter |
| 123 | \s* # optional spaces |
| 124 | \"([^\"]*)\" # quoted string (data) |
| 125 | \s* # optional spaces |
| 126 | ( # bgroup |
| 127 | \# # comment leader |
| 128 | \s* # optional spaces |
| 129 | .* # string (comment) |
| 130 | )? # egroup, optional |
| 131 | $ # eol |
| 132 | """, |
| 133 | re.VERBOSE) |
| 134 | |
| 135 | |
| 136 | ############################################################################### |
| 137 | # HELPERS |
| 138 | ############################################################################### |
| 139 | |
| 140 | |
| 141 | # |
| 142 | # Sadly, the Intranator is still stuck with one leg in the 90s. |
| 143 | # |
| 144 | def to_latin1(s): |
| 145 | """Take given unicode str and convert it to a latin1-encoded `bytes`.""" |
| 146 | return s.encode("latin-1") |
| 147 | |
| 148 | |
| 149 | def from_latin1(s): |
| 150 | """Take given latin1-encoded `bytes` value and convert it to `str`.""" |
| 151 | return s.decode("latin-1") |
| 152 | |
| 153 | |
| 154 | # |
| 155 | # Conversion functions |
| 156 | # |
| 157 | |
| 158 | def marshal_in_number(number): |
| 159 | return int(number) |
| 160 | |
| 161 | |
| 162 | def marshal_in_parent(parent): |
| 163 | return int(parent) |
| 164 | |
| 165 | |
| 166 | def marshal_in_instance(instance): |
| 167 | return int(instance) |
| 168 | |
| 169 | |
| 170 | def marshal_in_varname(varname): |
| 171 | return from_latin1(varname).lower() |
| 172 | |
| 173 | |
| 174 | def marshal_in_data(data): |
| 175 | return from_latin1(data).replace(r"\"", "\"") if data is not None else "" |
| 176 | |
| 177 | |
| 178 | def marshal_in_comment(comment): |
| 179 | return comment and from_latin1(comment[1:].strip()) or None |
| 180 | |
| 181 | |
| 182 | # |
| 183 | # Type checking |
| 184 | # |
| 185 | |
| 186 | def is_string(s): |
| 187 | return isinstance(s, str) |
| 188 | |
| 189 | |
| 190 | ############################################################################### |
| 191 | # EXCEPTIONS |
| 192 | ############################################################################### |
| 193 | |
| 194 | |
| 195 | class InvalidCNF(Exception): |
| 196 | |
| 197 | def __init__(self, msg): |
| 198 | self.msg = msg |
| 199 | |
| 200 | def __str__(self): |
| 201 | return "Malformed CNF_VAR: \"%s\"" % self.msg |
| 202 | |
| 203 | |
| 204 | class MalformedCNF(Exception): |
| 205 | |
| 206 | def __init__(self, msg): |
| 207 | self.msg = msg |
| 208 | |
| 209 | def __str__(self): |
| 210 | return "Malformed CNF file: \"%s\"" % self.msg |
| 211 | |
| 212 | |
| 213 | ############################################################################### |
| 214 | # VALIDATION |
| 215 | ############################################################################### |
| 216 | |
| 217 | |
| 218 | def is_valid(acc, |
| 219 | nested, |
| 220 | comment, |
| 221 | data, |
| 222 | instance, |
| 223 | number, |
| 224 | parent, |
| 225 | varname): |
| 226 | if varname is None: |
| 227 | raise InvalidCNF("CNF_VAR lacks a name.") |
| 228 | elif not is_string(varname): |
| 229 | raise InvalidCNF("Varname field of CNF_VAR \"%s\" is not a string." |
| 230 | % varname) |
| 231 | elif varname == "": |
| 232 | raise InvalidCNF("Varname field of CNF_VAR is the empty string.") |
| 233 | |
| 234 | if comment is not None: |
| 235 | if not is_string(comment): |
| 236 | raise InvalidCNF("Comment field of CNF_VAR \"%s\" is not a string." |
| 237 | % varname) |
| 238 | |
| 239 | if data is None: |
| 240 | raise InvalidCNF("Data field of CNF_VAR \"%s\" is empty." |
| 241 | % varname) |
| 242 | elif not is_string(data): |
| 243 | raise InvalidCNF("Data field of CNF_VAR \"%s\" is not a string." |
| 244 | % varname) |
| 245 | |
| 246 | if instance is None: |
| 247 | raise InvalidCNF("Instance field of CNF_VAR \"%s\" is empty." |
| 248 | % varname) |
| 249 | elif not isinstance(instance, int): |
| 250 | raise InvalidCNF("Instance field of CNF_VAR \"%s\" is not an integer." |
| 251 | % varname) |
| 252 | |
| 253 | if number is None: |
| 254 | raise InvalidCNF("Number field of CNF_VAR \"%s\" is empty." |
| 255 | % varname) |
| 256 | elif not isinstance(number, int): |
| 257 | raise InvalidCNF("Number field of CNF_VAR \"%s\" is not an integer." |
| 258 | % varname) |
| 259 | elif number < 1: |
| 260 | raise InvalidCNF("Number field of CNF_VAR \"%s\" must be positive, not %d." |
| 261 | % (varname, number)) |
| 262 | else: |
| 263 | other = acc.get(number, None) |
| 264 | if other is not None: # already in use |
| 265 | raise InvalidCNF("Number field of CNF_VAR \"%s\" already used by variable %s." |
| 266 | % (varname, other)) |
| 267 | acc[number] = varname |
| 268 | |
| 269 | if nested is True: |
| 270 | if parent is None: |
| 271 | raise InvalidCNF("Parent field of nested CNF_VAR \"%s\" is empty." |
| 272 | % varname) |
| 273 | elif not isinstance(parent, int): |
| 274 | raise InvalidCNF("Parent field of CNF_VAR \"%s\" is not an integer." |
| 275 | % varname) |
| 276 | else: |
| 277 | if parent is not None: |
| 278 | raise InvalidCNF("Flat CNF_VAR \"%s\" has nonsensical parent field \"%s\"." |
| 279 | % (varname, parent)) |
| 280 | return acc |
| 281 | |
| 282 | |
| 283 | def is_cnf(root): |
| 284 | """ |
| 285 | is_cnf -- Predicate testing "CNF_VAR-ness". Folds the :py:func:`is_valid` |
| 286 | predicate over the argument which can be either a well-formed CNF |
| 287 | dictionary or a list of CNF_VARs. |
| 288 | |
| 289 | :type root: cnfvar or cnf list |
| 290 | :rtype: bool |
| 291 | |
| 292 | Not that if it returns at all, ``is_cnf()`` returns ``True``. Any non |
| 293 | well-formed member of the argument will cause the predicate to bail out |
| 294 | with an exception during traversal. |
| 295 | """ |
| 296 | cnf = cnf_root(root) |
| 297 | if cnf is None: |
| 298 | raise InvalidCNF(root) |
| 299 | return walk_cnf(cnf, False, is_valid, {}) is not None |
| 300 | |
| 301 | |
| 302 | def is_cnf_var(obj): |
| 303 | """ |
| 304 | Check whether a dictionary is a valid CNF. |
| 305 | |
| 306 | :param dict obj: dictionary to check |
| 307 | :returns: True if the dictionary has all the mandatory fields and no |
| 308 | unknown fields, False otherwise |
| 309 | :rtype: bool |
| 310 | """ |
| 311 | assert isinstance (obj, dict) |
| 312 | |
| 313 | for f in CNF_FIELD_MANDATORY: |
| 314 | if obj.get(f, None) is None: |
| 315 | return False |
| 316 | |
| 317 | for f in obj: |
| 318 | if f not in CNF_FIELD_KNOWN: |
| 319 | return False |
| 320 | |
| 321 | return True |
| 322 | |
| 323 | |
| 324 | ############################################################################### |
| 325 | # DESERIALIZATION |
| 326 | ############################################################################### |
| 327 | # |
| 328 | # CNF reader |
| 329 | # |
| 330 | # Parsing usually starts from the `read_cnf`, which accepts a string containing |
| 331 | # the variables to parse in the same structure as returned by `get_cnf`. |
| 332 | # |
| 333 | # In the `prepare` function the string is split into lines, and a 3-element |
| 334 | # tuple is built. The first (named `current`) and second (named `next`) |
| 335 | # elements of this tuple are respectively the first and second non-empty lines |
| 336 | # of the input, while the third is a list of the remaining lines. This tuple is |
| 337 | # named `state` in the implementation below, and it is passed around during |
| 338 | # parsing. The `get` and `peek` functions are used to easily retrieve the |
| 339 | # `current` and `next` items from the "state". |
| 340 | # |
| 341 | # When we "advance" the state, we actually drop the "current" element, |
| 342 | # replacing it with the "next", while a new "next" is popped from the list of |
| 343 | # remaining lines. Parsing is done this way because we need to look ahead at |
| 344 | # the next line -- if it is a child it needs to be appended to the `children` |
| 345 | # property of the current line. |
| 346 | # |
| 347 | # Regular expressions are used to extract important information from the CNF |
| 348 | # lines. Finally, once parsing is completed, a dictionary is returned. The dict |
| 349 | # has the same structure as the serialized JSON output returned by |
| 350 | # `get_cnf -j`. |
| 351 | # |
| 352 | |
| 353 | |
| 354 | def read_cnf(data): |
| 355 | """ |
| 356 | Read cnf data from data bytes. |
| 357 | |
| 358 | :param data: raw data |
| 359 | :type data: str or bytes |
| 360 | :return: the parsed cnf data |
| 361 | :rtype: {str, {str, str or int}} |
| 362 | """ |
| 363 | if isinstance(data, str): |
| 364 | data = to_latin1(data) |
| 365 | state = prepare(data) |
| 366 | if state is None: |
| 367 | raise InvalidCNF("Empty input string.") |
| 368 | |
| 369 | cnf = parse_cnf_root(state) |
| 370 | if is_cnf(cnf) is False: |
| 371 | raise TypeError("Invalid CNF_VAR.") |
| 372 | return {"cnf": cnf} |
| 373 | |
| 374 | |
| 375 | def prepare(raw): |
| 376 | """ |
| 377 | Build 3-element iterable from a CNF string dump. |
| 378 | |
| 379 | :param raw: string content as returned by `get_cnf` |
| 380 | :type raw: bytes |
| 381 | :returns: 3-element tuple, where the first two elements are the first two |
| 382 | lines of the output and the third is a list containing the rest |
| 383 | of the lines in reverse. |
| 384 | :rtype: (str * str option * str list) option |
| 385 | """ |
| 386 | lines = raw.splitlines() |
| 387 | lines.reverse() |
| 388 | try: |
| 389 | first = lines.pop() |
| 390 | except IndexError: |
| 391 | return None |
| 392 | |
| 393 | try: |
| 394 | second = lines.pop() |
| 395 | except IndexError: |
| 396 | second = None |
| 397 | |
| 398 | first = first.strip() |
| 399 | if first == b"": |
| 400 | return advance((first, second, lines)) |
| 401 | |
| 402 | return (first, second, lines) |
| 403 | |
| 404 | |
| 405 | def advance(cns): |
| 406 | """ |
| 407 | Pop the next line from the stream, advancing the tuple. |
| 408 | |
| 409 | :param cns: a 3-element tuple containing two CNF lines and a list of the |
| 410 | remaining lines |
| 411 | :type cnd: (str, str, [str]) |
| 412 | :returns: a new tuple with a new item popped from the list of lines |
| 413 | :rtype cnd: (str, str, [str]) |
| 414 | """ |
| 415 | current, next, stream = cns |
| 416 | if next is None: # reached end of stream |
| 417 | return None |
| 418 | current = next |
| 419 | |
| 420 | try: |
| 421 | next = stream.pop() |
| 422 | next = next.strip() |
| 423 | except IndexError: |
| 424 | next = None |
| 425 | |
| 426 | if current == "": # skip blank lines |
| 427 | return advance((current, next, stream)) |
| 428 | return (current, next, stream) |
| 429 | |
| 430 | |
| 431 | def get(cns): |
| 432 | """ |
| 433 | Get the current line from the state without advancing it. |
| 434 | |
| 435 | :param cns: a 3-element tuple containing two CNF lines and a list of the |
| 436 | remaining lines |
| 437 | :type cnd: (str, str, [str]) |
| 438 | :returns: the CNF line stored as `current` |
| 439 | :rtype: str |
| 440 | """ |
| 441 | current, _, _ = cns |
| 442 | return current |
| 443 | |
| 444 | |
| 445 | def parse_cnf_root(state): |
| 446 | """ |
| 447 | Iterate over and parse a list of CNF lines. |
| 448 | |
| 449 | :param state: a 3-element tuple containing two lines and a list of the |
| 450 | remaining lines |
| 451 | :type state: (str, str, [str]) |
| 452 | :returns: a list of parsed CNF variables |
| 453 | :rtype: [dict] |
| 454 | |
| 455 | The function will parse the first element from the `state` tuple, then read |
| 456 | the next line to see if it is a child variable. If it is, it will be |
| 457 | appended to the last parsed CNF, otherwise top-level parsing is done |
| 458 | normally. |
| 459 | """ |
| 460 | lines = [] |
| 461 | current = get(state) |
| 462 | while state: |
| 463 | cnf_line = read_base_line(current) |
| 464 | if cnf_line is not None: |
| 465 | lines.append(cnf_line) |
| 466 | state = advance(state) |
| 467 | if state is None: # -> nothing left to do |
| 468 | break |
| 469 | current = get(state) |
| 470 | parent = get_parent(current) # peek at next line |
| 471 | if parent is not None: # -> recurse into children |
| 472 | (state, children, _parent) = parse_cnf_children(state, parent) |
| 473 | cnf_line["children"] = children |
| 474 | if state is None: |
| 475 | break |
| 476 | current = get(state) |
| 477 | else: |
| 478 | state = advance(state) |
| 479 | if state is None: |
| 480 | break |
| 481 | current = get(state) |
| 482 | return lines |
| 483 | |
| 484 | |
| 485 | def parse_cnf_children(state, parent): |
| 486 | """ |
| 487 | Read and parse child CNFs of a given parent until there is none left. |
| 488 | |
| 489 | :param state: a 3-element tuple containing two lines and a list of the |
| 490 | remaining lines |
| 491 | :type state: (str, str, [str]) |
| 492 | :param parent: id of the parent whose children we are looking for |
| 493 | :type parent: int |
| 494 | :returns: a 3-element tuple with the current state, a list of children of |
| 495 | the given parent and the parent ID |
| 496 | :rtype: (tuple, [str], int) |
| 497 | |
| 498 | The function will recursively parse child lines from the `state` tuple |
| 499 | until one of these conditions is satisfied: |
| 500 | |
| 501 | 1. the input is exhausted |
| 502 | 2. the next CNF line |
| 503 | 2.1. is a toplevel line |
| 504 | 2.2. is a child line whose parent has a lower parent number |
| 505 | |
| 506 | Conceptually, 2.1 is a very similar to 2.2 but due to the special status of |
| 507 | toplevel lines in CNF we need to handle them separately. |
| 508 | |
| 509 | Note that since nesting of CNF vars is achieved via parent line numbers, |
| 510 | lines with different parents could appear out of order. libcnffile will |
| 511 | happily parse those and still assign children to the specified parent: |
| 512 | |
| 513 | :: |
| 514 | # set_cnf <<THATSALL |
| 515 | 1 USER,1337: "l33t_h4x0r" |
| 516 | 2 (1) USER_GROUP_MEMBER_REF,0: "2" |
| 517 | 4 USER,1701: "picard" |
| 518 | 5 (4) USER_GROUP_MEMBER_REF,0: "2" |
| 519 | 6 (4) USER_PASSWORD,0: "engage" |
| 520 | 3 (1) USER_PASSWORD,0: "hacktheplanet" |
| 521 | THATSALL |
| 522 | # get_cnf user 1337 |
| 523 | 1 USER,1337: "l33t_h4x0r" |
| 524 | 2 (1) USER_GROUP_MEMBER_REF,0: "2" |
| 525 | 3 (1) USER_PASSWORD,0: "hacktheplanet" |
| 526 | # get_cnf user 1701 |
| 527 | 1 USER,1701: "picard" |
| 528 | 2 (1) USER_GROUP_MEMBER_REF,0: "2" |
| 529 | 3 (1) USER_PASSWORD,0: "engage" |
| 530 | |
| 531 | It is a limitation of ``cnfvar.py`` that it cannot parse CNF data |
| 532 | structured like the above example: child lists are only populated from |
| 533 | subsequent CNF vars using the parent number solely to track nesting levels. |
| 534 | The parser does not keep track of line numbers while traversing the input |
| 535 | so it doesn’t support retroactively assigning a child to anything else but |
| 536 | the immediate parent. |
| 537 | """ |
| 538 | lines = [] |
| 539 | current = get(state) |
| 540 | while True: |
| 541 | cnf_line = read_child_line(current) |
| 542 | if cnf_line is not None: |
| 543 | lines.append(cnf_line) |
| 544 | state = advance(state) |
| 545 | if state is None: |
| 546 | break |
| 547 | current = get(state) |
| 548 | new_parent = get_parent(current) |
| 549 | if new_parent is None: |
| 550 | # drop stack |
| 551 | return (state, lines, None) |
| 552 | if new_parent > parent: |
| 553 | # parent is further down in hierarchy -> new level |
| 554 | (state, children, new_parent) = \ |
| 555 | parse_cnf_children (state, new_parent) |
| 556 | cnf_line["children"] = children |
| 557 | if state is None: |
| 558 | break |
| 559 | current = get(state) |
| 560 | new_parent = get_parent(current) |
| 561 | if new_parent is None: |
| 562 | # drop stack |
| 563 | return (state, lines, None) |
| 564 | if new_parent < parent: |
| 565 | # parent is further up in hierarchy -> pop level |
| 566 | return (state, lines, new_parent) |
| 567 | # new_parent == parent -> continue parsing on same level |
| 568 | return (state, lines, parent) |
| 569 | |
| 570 | |
| 571 | def get_parent(line): |
| 572 | """ |
| 573 | Extract the ID of the parent for a given CNF line. |
| 574 | |
| 575 | :param str line: CNF line |
| 576 | :returns: parent ID or None if no parent is found |
| 577 | :rtype: int or None |
| 578 | """ |
| 579 | match = re.match(grab_parent_pattern, line) |
| 580 | if match is None: # -> no parent |
| 581 | return None |
| 582 | return int(match.groups()[0]) |
| 583 | |
| 584 | |
| 585 | def read_base_line(line): |
| 586 | """ |
| 587 | Turn one top-level CNF line into a dictionary. |
| 588 | |
| 589 | :param str line: CNF line |
| 590 | :rtype: {str: Any} |
| 591 | |
| 592 | This performs the necessary decoding on values to obtain proper Python |
| 593 | strings from 8-bit encoded CNF data. |
| 594 | |
| 595 | The function only operates on individual lines. Argument strings that |
| 596 | contain data for multiple lines – this includes child lines of the current |
| 597 | CNF var! – will trigger a parsing exception. |
| 598 | """ |
| 599 | if len(line.strip()) == 0: |
| 600 | return None # ignore empty lines |
| 601 | if line[0] == b"#": |
| 602 | return None # ignore comments |
| 603 | |
| 604 | match = re.match(base_line_pattern, line) |
| 605 | if match is None: |
| 606 | raise MalformedCNF("Syntax error in line \"\"\"%s\"\"\"" % line) |
| 607 | number, varname, instance, data, comment = match.groups() |
| 608 | return { |
| 609 | "number" : marshal_in_number (number), |
| 610 | "varname" : marshal_in_varname (varname), |
| 611 | "instance" : marshal_in_instance (instance), |
| 612 | "data" : marshal_in_data (data), |
| 613 | "comment" : marshal_in_comment (comment), |
| 614 | } |
| 615 | |
| 616 | |
| 617 | def read_child_line(line): |
| 618 | """ |
| 619 | Turn one child CNF line into a dictionary. |
| 620 | |
| 621 | :param str line: CNF line |
| 622 | :rtype: {str: Any} |
| 623 | |
| 624 | This function only operates on individual lines. If the argument string is |
| 625 | syntactically valid but contains input representing multiple CNF vars, a |
| 626 | parse error will be thrown. |
| 627 | """ |
| 628 | if len(line.strip()) == 0: |
| 629 | return None # ignore empty lines |
| 630 | if line[0] == "#": |
| 631 | return None # ignore comments |
| 632 | |
| 633 | match = re.match(child_line_pattern, line) |
| 634 | if match is None: |
| 635 | raise MalformedCNF("Syntax error in child line \"\"\"%s\"\"\"" |
| 636 | % from_latin1 (line)) |
| 637 | number, parent, varname, instance, data, comment = match.groups() |
| 638 | return { |
| 639 | "number" : marshal_in_number (number), |
| 640 | "parent" : marshal_in_parent (parent), |
| 641 | "varname" : marshal_in_varname (varname), |
| 642 | "instance" : marshal_in_instance (instance), |
| 643 | "data" : marshal_in_data (data), |
| 644 | "comment" : marshal_in_comment (comment), |
| 645 | } |
| 646 | |
| 647 | |
| 648 | ############################################################################### |
| 649 | # SERIALIZATION |
| 650 | ############################################################################### |
| 651 | |
| 652 | |
| 653 | def cnf_root(root): |
| 654 | """ |
| 655 | Extract a list of CNFs from a given structure. |
| 656 | |
| 657 | :param root: list of CNFs or a CNF dictionary |
| 658 | :type root: [dict] or dict |
| 659 | :raises: :py:class:`TypeError` if no CNFs can be extracted |
| 660 | :returns: list with one or more CNF objects |
| 661 | :rtype: [dict] |
| 662 | |
| 663 | Output varies depending on a few conditions: |
| 664 | - If `root` is a list, return it right away |
| 665 | - If `root` is a dict corresponding to a valid CNF value, return it wrapped |
| 666 | in a list |
| 667 | - If `root` is a dict with a `cnf` key containg a list (as the JSON |
| 668 | returned by `get_cnf -j`), return the value |
| 669 | - Otherwise, raise an error |
| 670 | """ |
| 671 | if isinstance(root, list): |
| 672 | return root |
| 673 | if not isinstance(root, dict): |
| 674 | raise TypeError( |
| 675 | "Expected dictionary of CNF_VARs, got %s." % type(root)) |
| 676 | if is_cnf_var(root): |
| 677 | return [root] |
| 678 | cnf = root.get("cnf", None) |
| 679 | if not isinstance(cnf, list): |
| 680 | raise TypeError("Expected list of CNF_VARs, got %s." % type(cnf)) |
| 681 | return cnf |
| 682 | |
| 683 | |
| 684 | ############################################################################### |
| 685 | # TRAVERSAL |
| 686 | ############################################################################### |
| 687 | |
| 688 | |
| 689 | def walk_cnf(cnf, nested, fun, acc): |
| 690 | """ |
| 691 | Depth-first traversal of a CNF tree. |
| 692 | |
| 693 | :type cnf: cnf list |
| 694 | :type nested: bool |
| 695 | :type fun: 'a -> bool -> (cnf stuff) -> 'a |
| 696 | :type acc: 'a |
| 697 | :rtype: 'a |
| 698 | |
| 699 | Executes ``fun`` recursively for each node in the tree. The function |
| 700 | receives the accumulator ``acc`` which can be of an arbitrary type as first |
| 701 | argument. The second argument is a flag indicating whether the current |
| 702 | CNF var is a child (if ``True``) or a parent var. CNF member fields are |
| 703 | passed via named optional arguments. |
| 704 | """ |
| 705 | for var in cnf: |
| 706 | acc = fun(acc, |
| 707 | nested, |
| 708 | comment=var.get("comment", None), |
| 709 | data=var.get("data", None), |
| 710 | instance=var.get("instance", None), |
| 711 | number=var.get("number", None), |
| 712 | parent=var.get("parent", None), |
| 713 | varname=var.get("varname", None)) |
| 714 | children = var.get("children", None) |
| 715 | if children is not None: |
| 716 | acc = walk_cnf(children, True, fun, acc) |
| 717 | return acc |