import difflib
import socket
from inspect import currentframe
+from base64 import b64decode
import re
import subprocess
import logging
return 'inbox'
return user_or_folder.replace('.', '/')\
.replace(r'\^', '.').replace('^', '.')
+
+
+def get_filename(message, failobj=None, do_unwrap=True):
+ """
+ Get filename of a message part, even if it is base64-encoded
+
+ For attachments with base64-encoded file name, the
+ :py:func:`email.message.Message.get_filename()` does not work. This function
+ tries that first and if it fails tries to interprete the Content-Disposition
+ of the message part. If all fails, returns `failobj`.
+
+ Only for ascii filenames: also unwraps file names if they are line-wrapped.
+ But note that this may remove too much whitespace from the filename if
+ line-wrapping happend in the same position as the filename's whitespace.
+ To get unwrapped version, set param `do_unwrap` to `False`.
+
+ See also: https://en.wikipedia.org/wiki/MIME#Encoded-Word
+
+ :param message: message part, e.g. from
+ :py:meth:`email.message.Message.walk`
+ :type message: :py:class:`email.message.Message`
+ :param failobj: object to return in case of failure (defaults to None)
+ :param bool do_unwrap: undo line-break inserted by mail-creator; may remove
+ whitespace from file name; only applies to ascii
+ file names
+ :returns: either a string or failobj
+ """
+ # try the old way and unwrap
+ filename = message.get_filename(failobj)
+
+ if isinstance(filename, bytes) and not filename.startswith(b'=?') \
+ and not filename.endswith(b'?='):
+ filename = filename.decode('utf8')
+
+ if isinstance(filename, str):
+ if do_unwrap:
+ return re.sub('[\\r\\n]+', '', filename)
+ return filename
+
+ if 'Content-Disposition' not in message:
+ return failobj
+
+ # try parsing content-disposition. e.g.:
+ # attachment; filename="2018年度公开课计划表.xlsx" -->
+ # '=?utf-8?b?YXR0YWNobWVudDsgZmlsZW5hbWU9IjIwMTjlubTluqY=?=\r\n =?utf-8?b?'
+ # '5YWs5byA6K++6K6h5YiS6KGoLnhsc3gi?='
+
+ # This may be a re-implementation of email.utils.collapse_rfc2231_value()
+ # as mentioned in email.message.get_param()
+
+ # The form is: "=?charset?encoding?encoded text?="
+ SPLIT_REGEX = '\r?\n *' # should be CRNL but some files miss the \r
+ ENCODED_WORD_REGEX = r'\s*=\?([^?]+)\?([^?]+)\?(.*)\?=\s*$'
+ LINE_REGEX = r'attachment\s*;\s*filename=(")?(.+)\1\s*$'
+ decoded = []
+ for word in re.split(SPLIT_REGEX, message['Content-Disposition']):
+ match = re.match(ENCODED_WORD_REGEX, word)
+ if not match:
+ break
+ charset, encoding, data = match.groups()
+ if encoding.lower() == 'b':
+ temp = b64decode(data)
+ elif encoding.lower() == 'q':
+ raise NotImplementedError('use quopri.decodestring, handle _')
+ else:
+ raise ValueError('not allowed according to wikipedia: "{}"'
+ .format(encoding))
+ decoded.append(temp.decode(charset))
+ decoded = u''.join(decoded)
+
+ match = re.match(LINE_REGEX, decoded)
+ if match:
+ return match.groups()[1]
+ return failobj