| 1 | ''' |
| 2 | The module contains the FileIterator class. |
| 3 | |
| 4 | Copyright (c) 2012 Intra2net AG |
| 5 | Author: Plamen Dimitrov and Thomas Jarosch |
| 6 | |
| 7 | This program is free software: you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation, either version 3 of the License, or |
| 10 | (at your option) any later version. |
| 11 | |
| 12 | This program is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | ''' |
| 17 | |
| 18 | import os |
| 19 | import re |
| 20 | import logging |
| 21 | |
| 22 | MAIL_FILENAME = re.compile("^[0-9]+\.$") |
| 23 | MBOXFILE_LINE = re.compile("^(.*?)\t(?:\d )?default[\t ](.*)$") |
| 24 | ACL_STRING = re.compile("^(.*?)[\t ](.*?)\t(.*)$") |
| 25 | |
| 26 | class FileIterator: |
| 27 | """This class iterates through the e-mail files.""" |
| 28 | |
| 29 | # class attributes |
| 30 | # mailboxes created during file traversal |
| 31 | created_mailboxes = None |
| 32 | # mailboxes to update during file traversal |
| 33 | acl_mailboxes = None |
| 34 | # acls retrieved from a file |
| 35 | _file_acls = None |
| 36 | |
| 37 | def __init__(self): |
| 38 | """Creates a connection and a user session.""" |
| 39 | |
| 40 | self.created_mailboxes = [] |
| 41 | self.acl_mailboxes = [] |
| 42 | self._file_acls = {} |
| 43 | |
| 44 | @classmethod |
| 45 | def _message_read(cls, filename): |
| 46 | """Retrieves a message from the message file.""" |
| 47 | |
| 48 | try: |
| 49 | with open(filename, "r") as msgfile: |
| 50 | message = msgfile.read() |
| 51 | except IOError: |
| 52 | logging.warning("Could not open the e-mail file %s", filename) |
| 53 | raise |
| 54 | |
| 55 | return message |
| 56 | |
| 57 | def get_mailbox_acls(self, mailbox, original_user): |
| 58 | """Get the acls loaded from a file for a given mailbox. |
| 59 | Should be used for access of mailbox acls instead of attribute""" |
| 60 | |
| 61 | # translate the mailbox to internal stored format |
| 62 | internal_mailbox = mailbox.replace("INBOX", "user/" + original_user) |
| 63 | internal_mailbox = internal_mailbox.replace(".", "^") |
| 64 | internal_mailbox = internal_mailbox.replace("/", ".") |
| 65 | logging.debug("Get acls for mailbox %s translated as %s", mailbox, internal_mailbox) |
| 66 | |
| 67 | # retrieve from internal dicitonary attribute of file acls |
| 68 | try: |
| 69 | mb_acls = self._file_acls[internal_mailbox] |
| 70 | except KeyError: |
| 71 | # no rights for the mailbox were found and warn if acl file loaded |
| 72 | if len(self._file_acls) > 0: |
| 73 | logging.warning("Could not find the acls for mailbox %s", mailbox) |
| 74 | mb_acls = {} |
| 75 | |
| 76 | return mb_acls |
| 77 | |
| 78 | def load_mailbox_list(self, mboxlistfile): |
| 79 | """Load the list of mailboxes and acl rights for each from file.""" |
| 80 | |
| 81 | try: |
| 82 | with open(mboxlistfile, 'r') as acl_file: |
| 83 | for line in acl_file: |
| 84 | |
| 85 | # read a line using regex |
| 86 | acls = {} |
| 87 | try: |
| 88 | linedata = MBOXFILE_LINE.match(line).groups() |
| 89 | except AttributeError: |
| 90 | logging.warning("Illegal line in mailbox list dump: %s", line) |
| 91 | continue |
| 92 | aclstr = linedata[1] |
| 93 | |
| 94 | # changes acls key encoding to internal cyrus format and makes it absolute (as folder). |
| 95 | key = linedata[0] |
| 96 | |
| 97 | # loop through acl rights string and build dictionary of users and rights |
| 98 | while(aclstr != ""): |
| 99 | try: |
| 100 | acldata = ACL_STRING.match(aclstr).groups() |
| 101 | except AttributeError: |
| 102 | logging.warning("Illegal acl string in mailbox list dump: %s", line) |
| 103 | aclstr = "" |
| 104 | continue |
| 105 | acls[acldata[0]] = acldata[1] |
| 106 | aclstr = acldata[2] |
| 107 | |
| 108 | self._file_acls[key] = acls |
| 109 | except IOError: |
| 110 | logging.warning("Could not open mboxlist file %s", mboxlistfile) |
| 111 | |
| 112 | def load_mails(self, filepath, mailpath): |
| 113 | """Loads all e-mails from file hierarchy. |
| 114 | This recursive generator always returns a tuple of |
| 115 | the next found (e-mail, mailbox to store, internaldate).""" |
| 116 | |
| 117 | logging.debug("Entered directory %s -> %s", filepath, mailpath) |
| 118 | try: |
| 119 | filepath = os.path.abspath(filepath) |
| 120 | os.chdir(filepath) |
| 121 | except OSError: |
| 122 | logging.warning("Can't open the directory %s", filepath) |
| 123 | return |
| 124 | # mark mailboxes that should be created |
| 125 | self.created_mailboxes.append(mailpath) |
| 126 | |
| 127 | subpaths = os.listdir(filepath) |
| 128 | for subpath in subpaths: |
| 129 | if subpath == "." or subpath == "..": |
| 130 | continue |
| 131 | new_filepath = filepath + "/" + subpath |
| 132 | |
| 133 | # if path is file validate name and inject |
| 134 | if (os.path.isfile(new_filepath)): |
| 135 | if os.path.getsize(new_filepath) == 0: |
| 136 | logging.info("Skipping empty file %s", subpath) |
| 137 | else: |
| 138 | if MAIL_FILENAME.match(subpath): |
| 139 | logging.info("Injecting file %s", subpath) |
| 140 | try: |
| 141 | message = self._message_read(new_filepath) |
| 142 | # suggest file modification date for internaldate |
| 143 | yield (message, mailpath, os.path.getmtime(new_filepath)) |
| 144 | except IOError: |
| 145 | logging.warning("Could not retrieve mail from the file %s", new_filepath) |
| 146 | else: |
| 147 | |
| 148 | # if path is directory do recursive call |
| 149 | if os.path.isdir(new_filepath): |
| 150 | # cyrus ^ vs . storage replacement |
| 151 | subpath = subpath.replace("^", ".") |
| 152 | new_mailpath = mailpath + "/" + subpath |
| 153 | logging.debug("Inserting mails from directory %s into mailbox %s", new_filepath, new_mailpath) |
| 154 | # load_mails($mboxdbref, $origuser, $targetuser) |
| 155 | rcrs_generator = self.load_mails(new_filepath, new_mailpath) |
| 156 | # you enter the generator in the for loop |
| 157 | for rcr in rcrs_generator: |
| 158 | yield rcr |
| 159 | logging.debug("Done with directory %s and mailbox %s", new_filepath, new_mailpath) |
| 160 | |
| 161 | # mark mailboxes that need acl update |
| 162 | self.acl_mailboxes.append(mailpath) |