[pyi2ncommon] / src / mail_utils.py

# This Python file uses the following encoding: utf-8

# The software in this package is distributed under the GNU General
# Public License version 2 (with a special exception described below).
#
# A copy of GNU General Public License (GPL) is included in this distribution,
# in the file COPYING.GPL.
#
# As a special exception, if other files instantiate templates or use macros
# or inline functions from this file, or you compile this file and link it
# with other works to produce a work based on this file, this file
# does not by itself cause the resulting work to be covered
# by the GNU General Public License.
#
# However the source code for this file must still be made available
# in accordance with section (3) of the GNU General Public License.
#
# This exception does not invalidate any other reasons why a work based
# on this file might be covered by the GNU General Public License.
#
# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>

"""
Utilities for dealing with email.

.. seealso:: :py:mod:`pyi2ncommon.mail_validator`,
             :py:mod:`pyi2ncommon.imap_mailbox`

Copyright: Intra2net AG
"""

from base64 import b64decode
from email.utils import parsedate_to_datetime
from email.parser import BytesParser
from email import policy

# outsourced source, import required for compatibility
from .imap_mailbox import ImapMailbox           # pylint: disable=unused-import
from .mail_validator import *                   # pylint: disable=unused-import
from .sysmisc import replace_file_regex

log = logging.getLogger('pyi2ncommon.mail_utils')


def prep_email_header(email_file, value, regex=None, criterion="envelopeto"):
    """
    Replace value in a provided email file.

    :param str email_file: file to use for the replacement
    :param str value: value to replace the first matched group with
    :param regex: regular expression to use when replacing a header value
    :type regex: str or None
    :param str criterion: criterion to use for replacement, one
                          of 'envelopeto' or 'received'
    :raises: :py:class:`ValueError` if the choice of criterion is invalid

    ..todo:: In some cases this function is reusing arnied wrapper's cnf
             value preparation but for email headers.
    """
    if criterion == "envelopeto":
        logging.debug("Updating test emails' EnvelopeTo header")
        replace_file_regex(email_file, value, regex=regex)
    elif criterion == "received":
        logging.debug("Updating test emails' Received header")
        with open(email_file, "r") as file_handle:
            email_text = file_handle.read()
            email_text = re.sub(regex, value, email_text)
            email_text = re.sub(regex, value, email_text)
        with open(email_file, "w") as file_handle:
            file_handle.write(email_text)
    else:
        raise ValueError("Invalid header preparation criterion '%s'"
                         % criterion)


def parse_mail_file(file_name, headers_only=True, attachment_filenames=False,
                    raise_on_defect=False, new_message_type=False):
    """
    Parse given email file (e.g. a banned message).

    This is basically a `email.parser.BytesParser().parse(...)` with given
    `headers_only` and policy selection, that can also handle BSMTP. As an
    extra bonus, you can just request headers plus the names of attached files.

    Removes the SMTP envelope surrounding the email if present. Only left-over
    might be a line with a '.' at end of non-multipart messages if
    `headers_only` is False.

    :param str file_name: path to the file that contains the email text
    :param bool headers_only: whether to parse only the email headers; set this
                              to False, e.g. if you want to check for
                              attachments using message.walk()
    :param bool attachment_filenames: if you just want headers and names of
                                      attached files, set `headers_only` and
                                      this to True.
    :param bool raise_on_defect: whether to raise an error if email parser
                                 encounters a defect (email policy `strict`) or
                                 just add the defect to message's `defect`
                                 attribute
    :param bool new_message_type: whether to return the older
                                  :py:class:`email.message.Message` (policy
                                  `compat32`, our default), or the newer
                                  :py:class:`email.message.EmailMessage` type
                                  (policy `default`). Big difference!
    :returns: either msg or 2-tuple `(msg, filenames)` if requested per arg
              `attachment_filenames`
    :rtype: :py:class:`email.message.Message` or
             (:py:class:`email.message.Message`, (str)) or
             one of these two with :py:class:`email.message.EmailMessage`
    """
    msg = None
    start_pos = 0

    if new_message_type:
        mail_policy = policy.default
    else:
        mail_policy = policy.compat32
    if raise_on_defect:
        mail_policy += policy.strict

    with open(file_name, 'rb') as read_handle:
        line = read_handle.readline()
        if line.startswith(b'EHLO'):
            # there is a smtp header. skip to its end
            while line.strip() != b'DATA':
                line = read_handle.readline()
            # the rest is the email plus a trailing '.' (ignored by parser if
            # multipart)
        else:
            read_handle.seek(0)  # forget we read the first line already
        start_pos = read_handle.tell()
        msg = BytesParser(policy=mail_policy).parse(read_handle,
                                                    headersonly=headers_only)

    if not attachment_filenames:
        return msg

    # otherwise need to parse complete message to get attachment file names
    if headers_only:
        with open(file_name, 'rb') as read_handle:
            read_handle.seek(start_pos)
            full_msg = BytesParser(policy=mail_policy).parse(read_handle,
                                                             headersonly=False)
    else:
        full_msg = msg
    filenames = [get_filename(part) for part in full_msg.walk()]
    return msg, tuple(filename for filename in filenames
                      if filename is not None)


def parse_mail_date(message):
    """
    Parse the 'Date' header of the given message.

    Shortcut for :py:func:`email.utils.parsedate_to_datetime`.

    This is no longer necessary for newer
    :py:class:`email.message.EmailMessage` since the `Date` Header is
    automatically parsed to a :py:class:`email.headerregistry.DateHeader`.

    :param message: Email message
    :type message: :py:class:`email.message.Message`
    :returns: datetime from Email "Date" header or None if header not present
    :rtype: :py:class:`datetime.datetime` or None
    """
    date_str = message.get('Date', '')
    if not date_str:
        return None
    return parsedate_to_datetime(date_str)


def get_user_mail_files(user, mailbox='INBOX'):
    """
    Iterate over mails in given folder of given user; yields file names.

    Works on local cyrus file system, not on imap server.

    :param str user: Name of user whose mailbox is analyzed
    :param str mailbox: name of mailbox to use, INBOX (default) for base
                        folder; name is modified using :py:func:`cyrus_escape`
    :returns: nothing; but yields full path to messages on disc
    """
    # base folder of user mail
    folder = os.path.join('/datastore', 'imap-mails', 'user', user)

    # adapt paths like "INBOX/sub/dir" to "sub/dir"
    subdirs = mailbox.split('/')
    if subdirs[0].upper() == 'INBOX':
        subdirs = subdirs[1:]
    folder = os.path.join(folder,
                          *(cyrus_escape(subdir) for subdir in subdirs))

    for filename in os.listdir(folder):
        if not re.match(r'\d+\.', filename):
            continue
        full_path = os.path.join(folder, filename)
        yield full_path


def get_user_mail(user, mailbox='INBOX', **kwargs):
    """
    Iterate over mails in given folder of given user; yields parsed mails.

    :param str user: see :py:func:`get_user_mail_files`
    :param str mailbox: see :py:func:`get_user_mail_files`
    :param dict kwargs: all other args are forwarded to
                        :py:func:`parse_mail_file`
    :returns: nothing; but yields 2-tuples (path, email_msg) where first is the
              full path to the message on disc, and the latter is the outcome
              of :py:func:`parse_mail_file` for that file
    """
    for full_path in get_user_mail_files(user, mailbox):
        yield full_path, parse_mail_file(full_path, **kwargs)


def get_message_text(filename, fallback_encoding='iso8859-1',
                     include_all_text=False):
    """
    Extract message text as string from email message.

    Intended as complementary addition to get_user_mail, e.g. ::

        for filename, msg in get_user_mail(user):
            # rough filtering based on headers
            if msg['Subject'] != 'Expected Subject':
                continue
            # get message text for closer inspection
            text = get_message_text(filename)
            if 'Expected Text' not in text:
                continue
            ...

    Finds the first part in message that is of type `text/plain` and decodes it
    using encoding specified in mail or otherwise fallback encoding. If none
    found takes first part of type `text/*`, or otherwise just the first part.

    If include_all_text is True, all `text/*` parts are included, with `text/plain`
    being the first.

    :param str filename: complete path of message file in filesystem
    :param str fallback_encoding: Encoding of email text if none is specified
                                  in mail.
    :param bool include_all_text: include all `text/*` parts in returned text
    :returns: text(s) of message
    :rtype: [str] if include_all_text else str
    """
    result = []
    msg = parse_mail_file(filename, headers_only=False)
    for part in msg.walk():
        if part.get_content_type() != 'text/plain':
            continue
        encoding = part.get_content_charset(fallback_encoding)
        result.append(part.get_payload(decode=True).decode(encoding))

    if result and not include_all_text:
        return result[0]

    # no text/plain found. Try only "text/":
    for part in msg.walk():
        cont_type = part.get_content_type()
        if cont_type.startswith('text/') and cont_type != 'text/plain':
            encoding = part.get_content_charset(fallback_encoding)
            result.append(part.get_payload(decode=True).decode(encoding))

    if result:
        if not include_all_text:
            return result[0]
        return result

    # no "text/" found. Just take first part
    while msg.is_multipart():
        msg = msg.get_payload(0)

    encoding = msg.get_content_charset(fallback_encoding)
    if include_all_text:
        return [msg.get_payload(decode=True).decode(encoding), ]
    return msg.get_payload(decode=True).decode(encoding)


def cyrus_escape(user_or_folder, keep_path=False, regex=False):
    """
    Convert names of users or mailbox folders to cyrus format.

    quite a hack, just does the following hard-coded replacements:

    * . --> ^
    * / --> .  (except if keep_path is True)
    * "u --> &APw-  ,  "o --> &APY-  ,  "a --> &AOQ-
      (if need more: this is modified utf-7)
    * inbox -->   (the empty string)

    Would like to use a general modified utf-7-encoder/decoder but python has
    none builtin (see https://bugs.python.org/issue5305) and an extra lib like
    https://bitbucket.org/mjs0/imapclient/ would be overkill. After all, we
    control the input to this function via params and this is enough umlaut-
    testing I think...

    :param str user_or_folder: name of the user or folder string to escape
    :param bool keep_path: do not replace '/' with '.' so can still use result
                           as path name
    :param bool regex: result is used in grep or other regex, so ^, . and & are
                       escaped again with a backslash
    :returns: escaped user or folder string
    :rtype: str

    .. seealso:: :py:func:`cyrus_unescape`
    """
    temp = user_or_folder.replace('.', '^') \
        .replace('ü', '&APw-').replace('ä', '&AOQ-') \
        .replace('ö', '&APY-') \
        .replace('inbox', '').replace('INBOX', '').replace('Inbox', '')
    if not keep_path:
        temp = temp.replace('/', '.')
    if regex:
        return temp.replace('^', r'\^').replace('&', r'\&') \
                   .replace('.', r'\.').replace('$', r'\$')
    return temp


def cyrus_unescape(user_or_folder):
    """
    Undo effects of :py:func:`cyrus_escape` (but not all of them).

    :param str user_or_folder: name of the user or folder string to unescape
    :returns: unescaped user or folder string
    :rtype: str
    """
    if user_or_folder == '':
        return 'inbox'
    return user_or_folder.replace('.', '/')\
        .replace(r'\^', '.').replace('^', '.')


def get_filename(message, failobj=None, do_unwrap=True):
    """
    Get filename of a message part, even if it is base64-encoded.

    For attachments with base64-encoded file name, the
    :py:func:`email.message.Message.get_filename()` does not work. This
    function tries that first and if it fails tries to interprete the
    Content-Disposition of the message part. If all fails, returns `failobj`.

    Only for ascii filenames: also unwraps file names if they are line-wrapped.
    But note that this may remove too much whitespace from the filename if
    line-wrapping happened in the same position as the filename's whitespace.
    To get unwrapped version, set param `do_unwrap` to `False`.

    See also: https://en.wikipedia.org/wiki/MIME#Encoded-Word

    :param message: message part, e.g. from
                    :py:meth:`email.message.Message.walk`
    :type message: :py:class:`email.message.Message` or
                   :py:class:`email.message.EmailMessage`
    :param failobj: object to return in case of failure (defaults to None)
    :param bool do_unwrap: undo line-break inserted by mail-creator; may remove
                           whitespace from file name; only applies to ascii
                           file names
    :returns: either a string or failobj
    """
    # try the old way and unwrap
    filename = message.get_filename(failobj)

    if isinstance(filename, bytes) and not filename.startswith(b'=?') \
            and not filename.endswith(b'?='):
        filename = filename.decode('utf8')

    if isinstance(filename, str):
        if do_unwrap:
            return re.sub('[\\r\\n]+', '', filename)
        return filename

    if 'Content-Disposition' not in message:
        return failobj

    # try parsing content-disposition. e.g.:
    # attachment; filename="2018年度公开课计划表.xlsx"   -->
    # '=?utf-8?b?YXR0YWNobWVudDsgZmlsZW5hbWU9IjIwMTjlubTluqY=?=\r\n =?utf-8?b?'
    # '5YWs5byA6K++6K6h5YiS6KGoLnhsc3gi?='

    # This may be a re-implementation of email.utils.collapse_rfc2231_value()
    # as mentioned in email.message.EmailMessage.get_param()

    # The form is: "=?charset?encoding?encoded text?="
    SPLIT_REGEX = '\r?\n *'    # should be CRNL but some files miss the \r
    ENCODED_WORD_REGEX = r'\s*=\?([^?]+)\?([^?]+)\?(.*)\?=\s*$'
    LINE_REGEX = r'attachment\s*;\s*filename=(")?(.+)\1\s*$'
    decoded = []
    for word in re.split(SPLIT_REGEX, message['Content-Disposition']):
        match = re.match(ENCODED_WORD_REGEX, word)
        if not match:
            break
        charset, encoding, data = match.groups()
        if encoding.lower() == 'b':
            temp = b64decode(data)
        elif encoding.lower() == 'q':
            raise NotImplementedError('use quopri.decodestring, handle _')
        else:
            raise ValueError('not allowed according to wikipedia: "{}"'
                             .format(encoding))
        decoded.append(temp.decode(charset))
    decoded = u''.join(decoded)

    match = re.match(LINE_REGEX, decoded)
    if match:
        return match.groups()[1]
    return failobj
Commit	Line	Data
	1	# This Python file uses the following encoding: utf-8
	2
	3	# The software in this package is distributed under the GNU General
	4	# Public License version 2 (with a special exception described below).
	5	#
	6	# A copy of GNU General Public License (GPL) is included in this distribution,
	7	# in the file COPYING.GPL.
	8	#
	9	# As a special exception, if other files instantiate templates or use macros
	10	# or inline functions from this file, or you compile this file and link it
	11	# with other works to produce a work based on this file, this file
	12	# does not by itself cause the resulting work to be covered
	13	# by the GNU General Public License.
	14	#
	15	# However the source code for this file must still be made available
	16	# in accordance with section (3) of the GNU General Public License.
	17	#
	18	# This exception does not invalidate any other reasons why a work based
	19	# on this file might be covered by the GNU General Public License.
	20	#
	21	# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
	22
	23	"""
	24	Utilities for dealing with email.
	25
	26	.. seealso:: :py:mod:`pyi2ncommon.mail_validator`,
	27	:py:mod:`pyi2ncommon.imap_mailbox`
	28
	29	Copyright: Intra2net AG
	30	"""
	31
	32	from base64 import b64decode
	33	from email.utils import parsedate_to_datetime
	34	from email.parser import BytesParser
	35	from email import policy
	36
	37	# outsourced source, import required for compatibility
	38	from .imap_mailbox import ImapMailbox # pylint: disable=unused-import
	39	from .mail_validator import * # pylint: disable=unused-import
	40	from .sysmisc import replace_file_regex
	41
	42	log = logging.getLogger('pyi2ncommon.mail_utils')
	43
	44
	45	def prep_email_header(email_file, value, regex=None, criterion="envelopeto"):
	46	"""
	47	Replace value in a provided email file.
	48
	49	:param str email_file: file to use for the replacement
	50	:param str value: value to replace the first matched group with
	51	:param regex: regular expression to use when replacing a header value
	52	:type regex: str or None
	53	:param str criterion: criterion to use for replacement, one
	54	of 'envelopeto' or 'received'
	55	:raises: :py:class:`ValueError` if the choice of criterion is invalid
	56
	57	..todo:: In some cases this function is reusing arnied wrapper's cnf
	58	value preparation but for email headers.
	59	"""
	60	if criterion == "envelopeto":
	61	logging.debug("Updating test emails' EnvelopeTo header")
	62	replace_file_regex(email_file, value, regex=regex)
	63	elif criterion == "received":
	64	logging.debug("Updating test emails' Received header")
	65	with open(email_file, "r") as file_handle:
	66	email_text = file_handle.read()
	67	email_text = re.sub(regex, value, email_text)
	68	email_text = re.sub(regex, value, email_text)
	69	with open(email_file, "w") as file_handle:
	70	file_handle.write(email_text)
	71	else:
	72	raise ValueError("Invalid header preparation criterion '%s'"
	73	% criterion)
	74
	75
	76	def parse_mail_file(file_name, headers_only=True, attachment_filenames=False,
	77	raise_on_defect=False, new_message_type=False):
	78	"""
	79	Parse given email file (e.g. a banned message).
	80
	81	This is basically a `email.parser.BytesParser().parse(...)` with given
	82	`headers_only` and policy selection, that can also handle BSMTP. As an
	83	extra bonus, you can just request headers plus the names of attached files.
	84
	85	Removes the SMTP envelope surrounding the email if present. Only left-over
	86	might be a line with a '.' at end of non-multipart messages if
	87	`headers_only` is False.
	88
	89	:param str file_name: path to the file that contains the email text
	90	:param bool headers_only: whether to parse only the email headers; set this
	91	to False, e.g. if you want to check for
	92	attachments using message.walk()
	93	:param bool attachment_filenames: if you just want headers and names of
	94	attached files, set `headers_only` and
	95	this to True.
	96	:param bool raise_on_defect: whether to raise an error if email parser
	97	encounters a defect (email policy `strict`) or
	98	just add the defect to message's `defect`
	99	attribute
	100	:param bool new_message_type: whether to return the older
	101	:py:class:`email.message.Message` (policy
	102	`compat32`, our default), or the newer
	103	:py:class:`email.message.EmailMessage` type
	104	(policy `default`). Big difference!
	105	:returns: either msg or 2-tuple `(msg, filenames)` if requested per arg
	106	`attachment_filenames`
	107	:rtype: :py:class:`email.message.Message` or
	108	(:py:class:`email.message.Message`, (str)) or
	109	one of these two with :py:class:`email.message.EmailMessage`
	110	"""
	111	msg = None
	112	start_pos = 0
	113
	114	if new_message_type:
	115	mail_policy = policy.default
	116	else:
	117	mail_policy = policy.compat32
	118	if raise_on_defect:
	119	mail_policy += policy.strict
	120
	121	with open(file_name, 'rb') as read_handle:
	122	line = read_handle.readline()
	123	if line.startswith(b'EHLO'):
	124	# there is a smtp header. skip to its end
	125	while line.strip() != b'DATA':
	126	line = read_handle.readline()
	127	# the rest is the email plus a trailing '.' (ignored by parser if
	128	# multipart)
	129	else:
	130	read_handle.seek(0) # forget we read the first line already
	131	start_pos = read_handle.tell()
	132	msg = BytesParser(policy=mail_policy).parse(read_handle,
	133	headersonly=headers_only)
	134
	135	if not attachment_filenames:
	136	return msg
	137
	138	# otherwise need to parse complete message to get attachment file names
	139	if headers_only:
	140	with open(file_name, 'rb') as read_handle:
	141	read_handle.seek(start_pos)
	142	full_msg = BytesParser(policy=mail_policy).parse(read_handle,
	143	headersonly=False)
	144	else:
	145	full_msg = msg
	146	filenames = [get_filename(part) for part in full_msg.walk()]
	147	return msg, tuple(filename for filename in filenames
	148	if filename is not None)
	149
	150
	151	def parse_mail_date(message):
	152	"""
	153	Parse the 'Date' header of the given message.
	154
	155	Shortcut for :py:func:`email.utils.parsedate_to_datetime`.
	156
	157	This is no longer necessary for newer
	158	:py:class:`email.message.EmailMessage` since the `Date` Header is
	159	automatically parsed to a :py:class:`email.headerregistry.DateHeader`.
	160
	161	:param message: Email message
	162	:type message: :py:class:`email.message.Message`
	163	:returns: datetime from Email "Date" header or None if header not present
	164	:rtype: :py:class:`datetime.datetime` or None
	165	"""
	166	date_str = message.get('Date', '')
	167	if not date_str:
	168	return None
	169	return parsedate_to_datetime(date_str)
	170
	171
	172	def get_user_mail_files(user, mailbox='INBOX'):
	173	"""
	174	Iterate over mails in given folder of given user; yields file names.
	175
	176	Works on local cyrus file system, not on imap server.
	177
	178	:param str user: Name of user whose mailbox is analyzed
	179	:param str mailbox: name of mailbox to use, INBOX (default) for base
	180	folder; name is modified using :py:func:`cyrus_escape`
	181	:returns: nothing; but yields full path to messages on disc
	182	"""
	183	# base folder of user mail
	184	folder = os.path.join('/datastore', 'imap-mails', 'user', user)
	185
	186	# adapt paths like "INBOX/sub/dir" to "sub/dir"
	187	subdirs = mailbox.split('/')
	188	if subdirs[0].upper() == 'INBOX':
	189	subdirs = subdirs[1:]
	190	folder = os.path.join(folder,
	191	*(cyrus_escape(subdir) for subdir in subdirs))
	192
	193	for filename in os.listdir(folder):
	194	if not re.match(r'\d+\.', filename):
	195	continue
	196	full_path = os.path.join(folder, filename)
	197	yield full_path
	198
	199
	200	def get_user_mail(user, mailbox='INBOX', **kwargs):
	201	"""
	202	Iterate over mails in given folder of given user; yields parsed mails.
	203
	204	:param str user: see :py:func:`get_user_mail_files`
	205	:param str mailbox: see :py:func:`get_user_mail_files`
	206	:param dict kwargs: all other args are forwarded to
	207	:py:func:`parse_mail_file`
	208	:returns: nothing; but yields 2-tuples (path, email_msg) where first is the
	209	full path to the message on disc, and the latter is the outcome
	210	of :py:func:`parse_mail_file` for that file
	211	"""
	212	for full_path in get_user_mail_files(user, mailbox):
	213	yield full_path, parse_mail_file(full_path, **kwargs)
	214
	215
	216	def get_message_text(filename, fallback_encoding='iso8859-1',
	217	include_all_text=False):
	218	"""
	219	Extract message text as string from email message.
	220
	221	Intended as complementary addition to get_user_mail, e.g. ::
	222
	223	for filename, msg in get_user_mail(user):
	224	# rough filtering based on headers
	225	if msg['Subject'] != 'Expected Subject':
	226	continue
	227	# get message text for closer inspection
	228	text = get_message_text(filename)
	229	if 'Expected Text' not in text:
	230	continue
	231	...
	232
	233	Finds the first part in message that is of type `text/plain` and decodes it
	234	using encoding specified in mail or otherwise fallback encoding. If none
	235	found takes first part of type `text/*`, or otherwise just the first part.
	236
	237	If include_all_text is True, all `text/*` parts are included, with `text/plain`
	238	being the first.
	239
	240	:param str filename: complete path of message file in filesystem
	241	:param str fallback_encoding: Encoding of email text if none is specified
	242	in mail.
	243	:param bool include_all_text: include all `text/*` parts in returned text
	244	:returns: text(s) of message
	245	:rtype: [str] if include_all_text else str
	246	"""
	247	result = []
	248	msg = parse_mail_file(filename, headers_only=False)
	249	for part in msg.walk():
	250	if part.get_content_type() != 'text/plain':
	251	continue
	252	encoding = part.get_content_charset(fallback_encoding)
	253	result.append(part.get_payload(decode=True).decode(encoding))
	254
	255	if result and not include_all_text:
	256	return result[0]
	257
	258	# no text/plain found. Try only "text/":
	259	for part in msg.walk():
	260	cont_type = part.get_content_type()
	261	if cont_type.startswith('text/') and cont_type != 'text/plain':
	262	encoding = part.get_content_charset(fallback_encoding)
	263	result.append(part.get_payload(decode=True).decode(encoding))
	264
	265	if result:
	266	if not include_all_text:
	267	return result[0]
	268	return result
	269
	270	# no "text/" found. Just take first part
	271	while msg.is_multipart():
	272	msg = msg.get_payload(0)
	273
	274	encoding = msg.get_content_charset(fallback_encoding)
	275	if include_all_text:
	276	return [msg.get_payload(decode=True).decode(encoding), ]
	277	return msg.get_payload(decode=True).decode(encoding)
	278
	279
	280	def cyrus_escape(user_or_folder, keep_path=False, regex=False):
	281	"""
	282	Convert names of users or mailbox folders to cyrus format.
	283
	284	quite a hack, just does the following hard-coded replacements:
	285
	286	* . --> ^
	287	* / --> . (except if keep_path is True)
	288	* "u --> &APw- , "o --> &APY- , "a --> &AOQ-
	289	(if need more: this is modified utf-7)
	290	* inbox --> (the empty string)
	291
	292	Would like to use a general modified utf-7-encoder/decoder but python has
	293	none builtin (see https://bugs.python.org/issue5305) and an extra lib like
	294	https://bitbucket.org/mjs0/imapclient/ would be overkill. After all, we
	295	control the input to this function via params and this is enough umlaut-
	296	testing I think...
	297
	298	:param str user_or_folder: name of the user or folder string to escape
	299	:param bool keep_path: do not replace '/' with '.' so can still use result
	300	as path name
	301	:param bool regex: result is used in grep or other regex, so ^, . and & are
	302	escaped again with a backslash
	303	:returns: escaped user or folder string
	304	:rtype: str
	305
	306	.. seealso:: :py:func:`cyrus_unescape`
	307	"""
	308	temp = user_or_folder.replace('.', '^') \
	309	.replace('ü', '&APw-').replace('ä', '&AOQ-') \
	310	.replace('ö', '&APY-') \
	311	.replace('inbox', '').replace('INBOX', '').replace('Inbox', '')
	312	if not keep_path:
	313	temp = temp.replace('/', '.')
	314	if regex:
	315	return temp.replace('^', r'\^').replace('&', r'\&') \
	316	.replace('.', r'\.').replace('$', r'\$')
	317	return temp
	318
	319
	320	def cyrus_unescape(user_or_folder):
	321	"""
	322	Undo effects of :py:func:`cyrus_escape` (but not all of them).
	323
	324	:param str user_or_folder: name of the user or folder string to unescape
	325	:returns: unescaped user or folder string
	326	:rtype: str
	327	"""
	328	if user_or_folder == '':
	329	return 'inbox'
	330	return user_or_folder.replace('.', '/')\
	331	.replace(r'\^', '.').replace('^', '.')
	332
	333
	334	def get_filename(message, failobj=None, do_unwrap=True):
	335	"""
	336	Get filename of a message part, even if it is base64-encoded.
	337
	338	For attachments with base64-encoded file name, the
	339	:py:func:`email.message.Message.get_filename()` does not work. This
	340	function tries that first and if it fails tries to interprete the
	341	Content-Disposition of the message part. If all fails, returns `failobj`.
	342
	343	Only for ascii filenames: also unwraps file names if they are line-wrapped.
	344	But note that this may remove too much whitespace from the filename if
	345	line-wrapping happened in the same position as the filename's whitespace.
	346	To get unwrapped version, set param `do_unwrap` to `False`.
	347
	348	See also: https://en.wikipedia.org/wiki/MIME#Encoded-Word
	349
	350	:param message: message part, e.g. from
	351	:py:meth:`email.message.Message.walk`
	352	:type message: :py:class:`email.message.Message` or
	353	:py:class:`email.message.EmailMessage`
	354	:param failobj: object to return in case of failure (defaults to None)
	355	:param bool do_unwrap: undo line-break inserted by mail-creator; may remove
	356	whitespace from file name; only applies to ascii
	357	file names
	358	:returns: either a string or failobj
	359	"""
	360	# try the old way and unwrap
	361	filename = message.get_filename(failobj)
	362
	363	if isinstance(filename, bytes) and not filename.startswith(b'=?') \
	364	and not filename.endswith(b'?='):
	365	filename = filename.decode('utf8')
	366
	367	if isinstance(filename, str):
	368	if do_unwrap:
	369	return re.sub('[\\r\\n]+', '', filename)
	370	return filename
	371
	372	if 'Content-Disposition' not in message:
	373	return failobj
	374
	375	# try parsing content-disposition. e.g.:
	376	# attachment; filename="2018年度公开课计划表.xlsx" -->
	377	# '=?utf-8?b?YXR0YWNobWVudDsgZmlsZW5hbWU9IjIwMTjlubTluqY=?=\r\n =?utf-8?b?'
	378	# '5YWs5byA6K++6K6h5YiS6KGoLnhsc3gi?='
	379
	380	# This may be a re-implementation of email.utils.collapse_rfc2231_value()
	381	# as mentioned in email.message.EmailMessage.get_param()
	382
	383	# The form is: "=?charset?encoding?encoded text?="
	384	SPLIT_REGEX = '\r?\n *' # should be CRNL but some files miss the \r
	385	ENCODED_WORD_REGEX = r'\s=\?([^?]+)\?([^?]+)\?(.)\?=\s*$'
	386	LINE_REGEX = r'attachment\s;\sfilename=(")?(.+)\1\s*$'
	387	decoded = []
	388	for word in re.split(SPLIT_REGEX, message['Content-Disposition']):
	389	match = re.match(ENCODED_WORD_REGEX, word)
	390	if not match:
	391	break
	392	charset, encoding, data = match.groups()
	393	if encoding.lower() == 'b':
	394	temp = b64decode(data)
	395	elif encoding.lower() == 'q':
	396	raise NotImplementedError('use quopri.decodestring, handle _')
	397	else:
	398	raise ValueError('not allowed according to wikipedia: "{}"'
	399	.format(encoding))
	400	decoded.append(temp.decode(charset))
	401	decoded = u''.join(decoded)
	402
	403	match = re.match(LINE_REGEX, decoded)
	404	if match:
	405	return match.groups()[1]
	406	return failobj