import logging
from email.utils import parsedate_to_datetime
from email.parser import BytesParser
+from email import policy
from . import arnied_wrapper
log.info("%s users successfully created!", len(usernames))
-def parse_mail_file(file_name, headers_only=True, attachment_filenames=False):
+def parse_mail_file(file_name, headers_only=True, attachment_filenames=False,
+ raise_on_defect=False, new_message_type=False):
"""
Parse given email file (e.g. a banned message).
This is basically a `email.parser.BytesParser().parse(...)` with given
- `headers_only`, that can handle BSMTP. As an extra bonus, you can just
- request headers plus the names of attached files.
+ `headers_only` and policy selection, that can also handle BSMTP. As an
+ extra bonus, you can just request headers plus the names of attached files.
Removes the SMTP envelope surrounding the email if present. Only left-over
might be a line with a '.' at end of non-multipart messages if
`headers_only` is False.
- :param str file_name: file name for the email
+ :param str file_name: path to the file that contains the email text
:param bool headers_only: whether to parse only the email headers; set this
to False, e.g. if you want to check for
attachments using message.walk()
:param bool attachment_filenames: if you just want headers and names of
attached files, set `headers_only` and
this to True.
+ :param bool raise_on_defect: whether to raise an error if email parser
+ encounters a defect (email policy `strict`) or
+ just add the defect to message's `defect`
+ attribute
+ :param bool new_message_type: whether to return the older
+ :py:class:`email.message.Message` (policy
+ `compat32`, our default), or the newer
+ :py:class:`email.message.EmailMessage` type
+ (policy `default`). Big difference!
:returns: either msg or 2-tuple `(msg, filenames)` if requested per arg
`attachment_filenames`
:rtype: :py:class:`email.message.Message` or
- (:py:class:`email.message.Message`, (str))
+ (:py:class:`email.message.Message`, (str)) or
+ one of these two with :py:class:`email.message.EmailMessage`
"""
msg = None
start_pos = 0
+
+ if new_message_type:
+ mail_policy = policy.default
+ else:
+ mail_policy = policy.compat32
+ if raise_on_defect:
+ mail_policy += policy.strict
+
with open(file_name, 'rb') as read_handle:
line = read_handle.readline()
if line.startswith(b'EHLO'):
else:
read_handle.seek(0) # forget we read the first line already
start_pos = read_handle.tell()
- msg = BytesParser().parse(read_handle, headersonly=headers_only)
+ msg = BytesParser(policy=mail_policy).parse(read_handle,
+ headersonly=headers_only)
if not attachment_filenames:
return msg
if headers_only:
with open(file_name, 'rb') as read_handle:
read_handle.seek(start_pos)
- full_msg = BytesParser().parse(read_handle, headersonly=False)
+ full_msg = BytesParser(policy=mail_policy).parse(read_handle,
+ headersonly=False)
else:
full_msg = msg
filenames = [get_filename(part) for part in full_msg.walk()]
:param message: message part, e.g. from
:py:meth:`email.message.Message.walk`
- :type message: :py:class:`email.message.Message`
+ :type message: :py:class:`email.message.Message` or
+ :py:class:`email.message.EmailMessage`
:param failobj: object to return in case of failure (defaults to None)
:param bool do_unwrap: undo line-break inserted by mail-creator; may remove
whitespace from file name; only applies to ascii
# '5YWs5byA6K++6K6h5YiS6KGoLnhsc3gi?='
# This may be a re-implementation of email.utils.collapse_rfc2231_value()
- # as mentioned in email.message.Message.get_param()
+ # as mentioned in email.message.EmailMessage.get_param()
# The form is: "=?charset?encoding?encoded text?="
SPLIT_REGEX = '\r?\n *' # should be CRNL but some files miss the \r