fix stdio processing in cnfvar.py
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Fri, 1 Dec 2017 10:57:50 +0000 (11:57 +0100)
committerPhilipp Gesang <philipp.gesang@intra2net.com>
Fri, 1 Dec 2017 10:57:55 +0000 (11:57 +0100)
Data not supplied from the Python end still suffers from encoding
issues, so fix those.

src/cnfvar.py

index fe29150..910a8b2 100644 (file)
@@ -67,6 +67,14 @@ For example, the ::
     convert unicode back to latin1.
 
 
+notes on Python 3 conversion
+-------------------------------------------------------------------------------
+
+Since the original *CNF* format assumes latin-1 encoded data pretty much
+exclusively, we preserve the original encoding while parsing the file.
+When assembling the data structures returned to the user, values are then
+converted to strings so they can be used naturally at the Python end.
+
 implementation
 -------------------------------------------------------------------------------
 """
@@ -136,7 +144,7 @@ def walk_cnf(cnf, nested, fun, acc):
 #
 
 def is_string(s):
-    return isinstance(s, str) or isinstance(s, unicode)
+    return isinstance(s, str)
 
 
 def is_valid(acc,
@@ -288,7 +296,7 @@ def peek(cns):
     return next
 
 
-def get(cnf):
+def get(cns):
     current, _, _ = cns
     return current
 
@@ -301,7 +309,7 @@ class MalformedCNF(Exception):
     def __str__(self):
         return "Malformed CNF file: \"%s\"" % self.msg
 
-grab_parent_pattern = re.compile("""
+grab_parent_pattern = re.compile(b"""
                                     ^            # match from start
                                     \d+          # line number
                                     \s+          # spaces
@@ -316,7 +324,20 @@ def get_parent(line):
         return None
     return int(match.groups()[0])
 
-base_line_pattern = re.compile("""
+
+def marshal_in_number   (number):   return int (number)
+
+def marshal_in_parent   (parent):   return int (parent)
+
+def marshal_in_varname  (varname):  return from_latin1 (varname)
+
+def marshal_in_instance (instance): return int (instance)
+
+def marshal_in_data     (data):     return from_latin1 (data) if data is not None else ""
+
+def marshal_in_comment  (comment):  return comment and from_latin1(comment[1:].strip()) or None
+
+base_line_pattern = re.compile(b"""
                                     ^                    # match from start
                                     \s*                  # optional spaces
                                     (\d+)                # line number
@@ -352,14 +373,14 @@ def read_base_line(line):
         raise MalformedCNF("Syntax error in line \"\"\"%s\"\"\"" % line)
     number, varname, instance, data, comment = match.groups()
     return {
-        "number": int(number),
-        "varname": varname,
-        "instance": int(instance),
-        "data": data and from_latin1(data) or "",
-        "comment": comment and from_latin1(comment[1:].strip()) or None,
+        "number"   : marshal_in_number   (number),
+        "varname"  : marshal_in_varname  (varname),
+        "instance" : marshal_in_instance (instance),
+        "data"     : marshal_in_data     (data),
+        "comment"  : marshal_in_comment  (comment),
     }
 
-child_line_pattern = re.compile("""
+child_line_pattern = re.compile(b"""
                                      ^                    # match from start
                                      \s*                  # optional spaces
                                      (\d+)                # line number
@@ -397,12 +418,12 @@ def read_child_line(line):
         raise MalformedCNF("Syntax error in child line \"\"\"%s\"\"\"" % line)
     number, parent, varname, instance, data, comment = match.groups()
     return {
-        "number": int(number),
-        "parent": int(parent),
-        "varname": varname,
-        "instance": int(instance),
-        "data": data and from_latin1(data) or "",
-        "comment": comment and from_latin1(comment[1:].strip()) or None,
+        "number"   : marshal_in_number   (number),
+        "parent"   : marshal_in_parent   (parent),
+        "varname"  : marshal_in_varname  (varname),
+        "instance" : marshal_in_instance (instance),
+        "data"     : marshal_in_data     (data),
+        "comment"  : marshal_in_comment  (comment),
     }
 
 
@@ -648,6 +669,12 @@ def get_vars(cnf, data=None, instance=None):
 
     return {"cnf": []}
 
+def usage ():
+    print ("usage: cnfvar.py -"      , file=sys.stderr)
+    print (""                        , file=sys.stderr)
+    print ("    Read CNF from stdin.", file=sys.stderr)
+    print (""                        , file=sys.stderr)
+
 #
 #                         entry point for development
 #
@@ -657,12 +684,16 @@ def main(argv):
     if len(argv) > 1:
         first = argv[1]
         if first == "-":
-            cnf = read_cnf(sys.stdin.read())
+            cnf = read_cnf(sys.stdin.buffer.read())
             print_cnf(cnf)
+            return 0
         elif first == "test":
-            cnf = read_cnf(sys.stdin.read())
+            cnf = read_cnf(sys.stdin.buffer.read())
             cnff = get_vars(cnf, instance=2, data="FAX")
             print_cnf(cnff)
+            return 0
+    usage ()
+    return -1
 
 if __name__ == "__main__":
-    main(sys.argv)
+    sys.exit (main(sys.argv))