From: Christian Herdtweck Date: Mon, 15 Jul 2019 11:04:03 +0000 (+0200) Subject: Parse email from bytes instead of text X-Git-Tag: v1.6~1^2~2 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=1d21262cc9765b2cf8192753fb148fb692a7de88;p=pyi2ncommon Parse email from bytes instead of text This allows usage of other encodings than the system encoding (e.g. latin1) --- diff --git a/src/mail_utils.py b/src/mail_utils.py index 9efc632..19adb76 100644 --- a/src/mail_utils.py +++ b/src/mail_utils.py @@ -42,7 +42,7 @@ from base64 import b64decode import re import logging from email.utils import parsedate_to_datetime -from email.parser import Parser +from email.parser import BytesParser from . import arnied_wrapper @@ -127,7 +127,7 @@ def parse_mail_file(file_name, headers_only=True, attachment_filenames=False): """ Parse given email file (e.g. a banned message). - This is basically a `email.parser.Parser().parse(...)` with given + This is basically a `email.parser.BytesParser().parse(...)` with given `headers_only`, that can handle BSMTP. As an extra bonus, you can just request headers plus the names of attached files. @@ -149,27 +149,27 @@ def parse_mail_file(file_name, headers_only=True, attachment_filenames=False): """ msg = None start_pos = 0 - with open(file_name, 'r') as read_handle: + with open(file_name, 'rb') as read_handle: line = read_handle.readline() - if line.startswith('EHLO'): + if line.startswith(b'EHLO'): # there is a smtp header. skip to its end - while line.strip() != 'DATA': + while line.strip() != b'DATA': line = read_handle.readline() # the rest is the email plus a trailing '.' (ignored by parser if # multipart) else: read_handle.seek(0) # forget we read the first line already start_pos = read_handle.tell() - msg = Parser().parse(read_handle, headersonly=headers_only) + msg = BytesParser().parse(read_handle, headersonly=headers_only) if not attachment_filenames: return msg # otherwise need to parse complete message to get attachment file names if headers_only: - with open(file_name, 'r') as read_handle: + with open(file_name, 'rb') as read_handle: read_handle.seek(start_pos) - full_msg = Parser().parse(read_handle, headersonly=False) + full_msg = BytesParser().parse(read_handle, headersonly=False) else: full_msg = msg filenames = [get_filename(part) for part in full_msg.walk()]