From f2eaac7ac436048ec9e38a26b4eae8bf42c91e95 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 1 Dec 2017 11:57:50 +0100 Subject: [PATCH] fix stdio processing in cnfvar.py Data not supplied from the Python end still suffers from encoding issues, so fix those. --- src/cnfvar.py | 69 +++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/cnfvar.py b/src/cnfvar.py index fe29150..910a8b2 100644 --- a/src/cnfvar.py +++ b/src/cnfvar.py @@ -67,6 +67,14 @@ For example, the :: convert unicode back to latin1. +notes on Python 3 conversion +------------------------------------------------------------------------------- + +Since the original *CNF* format assumes latin-1 encoded data pretty much +exclusively, we preserve the original encoding while parsing the file. +When assembling the data structures returned to the user, values are then +converted to strings so they can be used naturally at the Python end. + implementation ------------------------------------------------------------------------------- """ @@ -136,7 +144,7 @@ def walk_cnf(cnf, nested, fun, acc): # def is_string(s): - return isinstance(s, str) or isinstance(s, unicode) + return isinstance(s, str) def is_valid(acc, @@ -288,7 +296,7 @@ def peek(cns): return next -def get(cnf): +def get(cns): current, _, _ = cns return current @@ -301,7 +309,7 @@ class MalformedCNF(Exception): def __str__(self): return "Malformed CNF file: \"%s\"" % self.msg -grab_parent_pattern = re.compile(""" +grab_parent_pattern = re.compile(b""" ^ # match from start \d+ # line number \s+ # spaces @@ -316,7 +324,20 @@ def get_parent(line): return None return int(match.groups()[0]) -base_line_pattern = re.compile(""" + +def marshal_in_number (number): return int (number) + +def marshal_in_parent (parent): return int (parent) + +def marshal_in_varname (varname): return from_latin1 (varname) + +def marshal_in_instance (instance): return int (instance) + +def marshal_in_data (data): return from_latin1 (data) if data is not None else "" + +def marshal_in_comment (comment): return comment and from_latin1(comment[1:].strip()) or None + +base_line_pattern = re.compile(b""" ^ # match from start \s* # optional spaces (\d+) # line number @@ -352,14 +373,14 @@ def read_base_line(line): raise MalformedCNF("Syntax error in line \"\"\"%s\"\"\"" % line) number, varname, instance, data, comment = match.groups() return { - "number": int(number), - "varname": varname, - "instance": int(instance), - "data": data and from_latin1(data) or "", - "comment": comment and from_latin1(comment[1:].strip()) or None, + "number" : marshal_in_number (number), + "varname" : marshal_in_varname (varname), + "instance" : marshal_in_instance (instance), + "data" : marshal_in_data (data), + "comment" : marshal_in_comment (comment), } -child_line_pattern = re.compile(""" +child_line_pattern = re.compile(b""" ^ # match from start \s* # optional spaces (\d+) # line number @@ -397,12 +418,12 @@ def read_child_line(line): raise MalformedCNF("Syntax error in child line \"\"\"%s\"\"\"" % line) number, parent, varname, instance, data, comment = match.groups() return { - "number": int(number), - "parent": int(parent), - "varname": varname, - "instance": int(instance), - "data": data and from_latin1(data) or "", - "comment": comment and from_latin1(comment[1:].strip()) or None, + "number" : marshal_in_number (number), + "parent" : marshal_in_parent (parent), + "varname" : marshal_in_varname (varname), + "instance" : marshal_in_instance (instance), + "data" : marshal_in_data (data), + "comment" : marshal_in_comment (comment), } @@ -648,6 +669,12 @@ def get_vars(cnf, data=None, instance=None): return {"cnf": []} +def usage (): + print ("usage: cnfvar.py -" , file=sys.stderr) + print ("" , file=sys.stderr) + print (" Read CNF from stdin.", file=sys.stderr) + print ("" , file=sys.stderr) + # # entry point for development # @@ -657,12 +684,16 @@ def main(argv): if len(argv) > 1: first = argv[1] if first == "-": - cnf = read_cnf(sys.stdin.read()) + cnf = read_cnf(sys.stdin.buffer.read()) print_cnf(cnf) + return 0 elif first == "test": - cnf = read_cnf(sys.stdin.read()) + cnf = read_cnf(sys.stdin.buffer.read()) cnff = get_vars(cnf, instance=2, data="FAX") print_cnf(cnff) + return 0 + usage () + return -1 if __name__ == "__main__": - main(sys.argv) + sys.exit (main(sys.argv)) -- 1.7.1