From b359b15cdb646834771647297d3b1a4aefa2992a Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Thu, 24 Jan 2019 16:43:08 +0100 Subject: [PATCH] Create new mail_utils option: get headers+attachments Often I parse and transfer big email message when actually I only want the headers plus the attachment file names. Create an option for this in mail_utils.parse_mail_file --- src/mail_utils.py | 42 ++++++++++++++++++++++++++++++++++-------- 1 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/mail_utils.py b/src/mail_utils.py index fde7a4b..75c4bb5 100644 --- a/src/mail_utils.py +++ b/src/mail_utils.py @@ -688,21 +688,32 @@ def create_users(usernames, config_file, params): log.info("%s users successfully created!", len(usernames)) -def parse_mail_file(file_name, headers_only=True): +def parse_mail_file(file_name, headers_only=True, attachment_filenames=False): """ Parse given email file (e.g. a banned message). - :param str file_name: file name for the email - :param bool headers_only: whether to parse only the email headers; set this - to False, e.g. if you want to check for - attachments using message.walk() - :returns: parsed email - :rtype: root message object (of class :py:class:`email.message.Message`) + This is basically a `email.parser.Parser().parse(...)` with given + `headers_only`, that can handle BSMTP. As an extra bonus, you can just + request headers plus the names of attached files. Removes the SMTP envelope surrounding the email if present. Only left-over might be a line with a '.' at end of non-multipart messages if `headers_only` is False. + + :param str file_name: file name for the email + :param bool headers_only: whether to parse only the email headers; set this + to False, e.g. if you want to check for + attachments using message.walk() + :param bool attachment_filenames: if you just want headers and names of + attached files, set `headers_only` and + this to True. + :returns: either msg or 2-tuple `(msg, filenames)` if requested per arg + `attachment_filenames` + :rtype: :py:class:`email.message.Message` or + (:py:class:`email.message.Message`, (str)) """ + msg = None + start_pos = 0 with open(file_name, 'r') as read_handle: line = read_handle.readline() if line.startswith('EHLO'): @@ -713,7 +724,22 @@ def parse_mail_file(file_name, headers_only=True): # multipart) else: read_handle.seek(0) # forget we read the first line already - return Parser().parse(read_handle, headersonly=headers_only) + start_pos = read_handle.tell() + msg = Parser().parse(read_handle, headersonly=headers_only) + + if not attachment_filenames: + return msg + + # otherwise need to parse complete message to get attachment file names + if headers_only: + with open(file_name, 'r') as read_handle: + read_handle.seek(start_pos) + full_msg = Parser().parse(read_handle, headersonly=False) + else: + full_msg = msg + filenames = [get_filename(part) for part in full_msg.walk()] + return msg, tuple(filename for filename in filenames + if filename is not None) def get_user_mail(user, mailbox='INBOX', **kwargs): -- 1.7.1