Initial submission of working tool
authorPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Thu, 21 Jun 2012 16:41:40 +0000 (18:41 +0200)
committerPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Thu, 21 Jun 2012 16:41:40 +0000 (18:41 +0200)
confscript.cfg [new file with mode: 0644]
date_interpreter.py [new file with mode: 0644]
fix_imap_internaldate.py [new file with mode: 0644]
mail_iterator.py [new file with mode: 0644]
unit_tester.py [new file with mode: 0644]

diff --git a/confscript.cfg b/confscript.cfg
new file mode 100644 (file)
index 0000000..8b65219
--- /dev/null
@@ -0,0 +1,4 @@
+[basic_settings]
+log_level = 30
+imap_server = imap.company.com
+tolerance = 1800
diff --git a/date_interpreter.py b/date_interpreter.py
new file mode 100644 (file)
index 0000000..083fa1a
--- /dev/null
@@ -0,0 +1,103 @@
+'''
+date_interpreter.py - The module contains the MailIterator class.
+
+Copyright (c) 2012 Intra2net AG
+Author: Plamen Dimitrov
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Add '-t' argument when running the module for a test mode.
+For a detailed list of each message with a date conflict change
+the 'log_level' in the configuration file from '30' to '20'.
+'''
+
+import datetime
+import re
+import time
+
+#reg expressions
+RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+'
+        r'(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])(?::(60|[0-5][0-9]))?\s*'
+        r'(?:([-\+])([0-9]{2})([0-5][0-9]))*')
+INTERNAL_DATE = re.compile(r'(?P<day>[ 0123][0-9])-(?P<mon>[A-Z][a-z][a-z])-(?P<year>[0-9][0-9][0-9][0-9])'
+        r' (?P<hour>[0-9][0-9]):(?P<min>[0-9][0-9]):(?P<sec>[0-9][0-9])'
+        r' (?P<zonen>[-+])(?P<zoneh>[0-9][0-9])(?P<zonem>[0-9][0-9])')
+CONTROL_SYMBOLS = re.compile(r'[\n\r\t]')
+
+class DateInterpreter:
+    """This class extracts dates from imap server responses and compares them."""
+
+    def __init__(self):
+        return
+
+    @classmethod
+    def extract_internal_date(cls, fetchresult):
+        """Extracts the internal date from INTERNALDATE, returns datetime."""
+        return datetime.datetime.fromtimestamp(time.mktime(fetchresult))
+
+    @classmethod
+    def extract_received_date(cls, fetchresult):
+        """Extracts the first date from RECEIVED, returns datetime."""
+        fetchresult = CONTROL_SYMBOLS.sub('', fetchresult[0][1].decode("utf-8"))
+        received_dates = RECEIVED_DATE.findall(fetchresult)
+        if(len(received_dates)==0):
+            return ""
+        else: received_date = received_dates[0]
+        #print("Retrieved date ", received_date, " from header ", fetchresult)
+        month = datetime.datetime.strptime(received_date[1],'%b').month
+
+        if(received_date[3]!=""):
+            hours = int(received_date[3])
+        else: hours = 0
+        if(received_date[4]!=""):
+            minutes = int(received_date[4])
+        else: minutes = 0
+        if(received_date[5]!=""):
+            seconds = int(received_date[5])
+        else: seconds = 0
+
+        if(received_date[6]!=""):
+            zonen = received_date[6]
+        else: zonen = b'+'
+        if(received_date[7]!=""):
+            zoneh = int(received_date[7])
+        else: zoneh = 0
+        if(received_date[8]!=""):
+            zonem = int(received_date[8])
+        else: zonem = 0
+        # subtract time zone to get unified time
+        zone = (zoneh * 60 + zonem) * 60
+        if(zonen == b'-'):
+            zone = -zone
+
+        time_tuple = (int(received_date[2]), month, int(received_date[0]), hours, minutes, seconds, -1, -1, -1)
+        #'mktime' assumes arg in local timezone, so add timezone/altzone
+        utc = time.mktime(time_tuple)
+        #adjust to DST
+        if(time.daylight and time.localtime(utc)[-1]):
+            zone = zone + time.altzone
+        else:
+            zone = zone + time.timezone
+
+        received_time_tuple = time.localtime(utc - zone)
+        converted_received_date = datetime.datetime.fromtimestamp(time.mktime(received_time_tuple))
+        return converted_received_date
+
+    @classmethod
+    def compare_dates(cls, date1, date2, tolerance=1800):
+        """Compares datetime objects for deviation given certain tolerance."""
+        """Returns 1 if there is a significant difference."""
+        #print(date1, "<>", date2)
+        timedelta = abs(date1 - date2)
+        if(timedelta.total_seconds()>tolerance):
+            return 1
+        else:
+            return 0
diff --git a/fix_imap_internaldate.py b/fix_imap_internaldate.py
new file mode 100644 (file)
index 0000000..00e0f15
--- /dev/null
@@ -0,0 +1,113 @@
+'''
+fix_imap_internaldate.py - Fix the INTERNALDATE field on IMAP servers
+
+Copyright (c) 2012 Intra2net AG
+Author: Plamen Dimitrov
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Add '-t' argument when running the module for a test mode.
+For a detailed list of each message with a date conflict change
+the 'log_level' in the configuration file from '30' to '20'.
+'''
+
+import sys
+import csv
+import logging
+import configparser
+from date_interpreter import DateInterpreter
+from mail_iterator import MailIterator
+
+def main():
+    """Iterates through csv list of users and their mailboxes"""
+    if (len(sys.argv) > 1 and sys.argv[1]=="-t"):
+        test_mode = 1
+    else:
+        test_mode = 0
+
+    config = load_configuration()
+    logging.basicConfig(filename='mailscript.log',
+                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                        level=config.getint('basic_settings', 'log_level'))
+
+    date_interp = DateInterpreter()
+    user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',')
+
+    server = config.get('basic_settings', 'imap_server')
+    tolerance = config.getint('basic_settings', 'tolerance')
+    total_per_box = {}
+
+    for user in user_reader:
+        try:
+            session = MailIterator(server, user['username'], user['password'])
+        except UserWarning as ex:
+            logging.error(ex)
+            continue
+        for mailbox in session:
+            try:
+                #print(".")
+                mail_ids = session.fetch_messages()
+            except UserWarning as ex:
+                logging.error(ex)
+                continue
+            for mid in mail_ids:
+                try:
+                    fetched_internal_date = session.fetch_internal_date(mid)
+                    internal_date = date_interp.extract_internal_date(fetched_internal_date)
+                    fetched_received_date = session.fetch_received_date(mid)
+                    received_date = date_interp.extract_received_date(fetched_received_date)
+                    if(received_date==""):
+                        logging.warning("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n",
+                                        mid.decode("utf-8"), mailbox.strip('"'), user['username'])
+                        continue
+                except UserWarning as ex:
+                    logging.error(ex)
+                    continue
+                if(date_interp.compare_dates(received_date, internal_date, tolerance)):
+                    #print(received_date, internal_date)
+                    if(test_mode==0):
+                        try:
+                            session.update_message(mid, mailbox, received_date)
+                        except UserWarning as ex:
+                            logging.error(ex)
+                            continue
+                    else:
+                        logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.",
+                                        mid.decode("utf-8"), mailbox.strip('"'), user['username'],
+                                        internal_date.strftime("%d %b %Y %H:%M:%S"),
+                                        received_date.strftime("%d %b %Y %H:%M:%S"),
+                                        fetched_received_date[0][1].decode("utf-8").split("Received:")[1])
+                    #count total emails for every user and mailbox
+                    mixed_key = user['username']+'|'+mailbox.strip('"')
+                    total_per_box[mixed_key] = 1 + total_per_box.get(mixed_key, 0)
+        total_per_user = 0
+        for warning in total_per_box:
+            total_per_user += total_per_box[warning]
+            logging.warning("Total date conflicts to be corrected in a mailbox %s are %s.",
+                         warning.split('|')[1], total_per_box[warning])
+        logging.warning("Total date conflicts to be corrected for user %s are %s.\n",
+                     user['username'], total_per_user)
+
+def load_configuration():
+    """Loads the script configuration from a file or creates such."""
+    config = configparser.RawConfigParser()    
+    try:
+        config.read('confscript.cfg')
+    except IOError:
+        config.add_section('basic_settings')
+        config.set('basic_settings', 'log_level', logging.DEBUG)
+        #config.set('Basic settings', 'bool', 'true')
+        with open('confscript.cfg', 'w') as configfile:
+            config.write(configfile)
+    return config
+
+if(__name__ == "__main__"):
+    main()
diff --git a/mail_iterator.py b/mail_iterator.py
new file mode 100644 (file)
index 0000000..b217f4a
--- /dev/null
@@ -0,0 +1,101 @@
+'''
+mail_iterator.py - The module contains the MailIterator class.
+
+Copyright (c) 2012 Intra2net AG
+Author: Plamen Dimitrov
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Add '-t' argument when running the module for a test mode.
+For a detailed list of each message with a date conflict change
+the 'log_level' in the configuration file from '30' to '20'.
+'''
+
+import imaplib
+import re
+import time
+
+LIST_RESP = re.compile(r'\((?P<flags>.*?)\) "(?P<delimiter>.*)" (?P<name>.*)')
+
+class MailIterator:
+    """This class communicates with the e-mail server."""
+    mailboxes = []
+
+    def __init__(self, server, username, password):
+        """Creates a connection and a user session."""
+        self.mail_con = imaplib.IMAP4_SSL(server)
+        result, data = self.mail_con.login(username, password)
+        if(result!="OK"):
+            raise UserWarning("Could not log in as user " + username + ". " + data)
+        result, self.mailboxes = self.mail_con.list()
+        if(result!="OK"):
+            raise UserWarning("Could not retrieve mailboxes for user " + username + ".")
+
+    def __del__(self):
+        """Closes the connection and the user session."""
+        self.mail_con.close()
+        self.mail_con.logout()
+
+    def __iter__(self):
+        """Iterates through the retrieved mailboxes."""
+        for mailbox in self.mailboxes:
+            mailbox = LIST_RESP.match(mailbox.decode("utf-8")).groups()
+            #print("Checking mailbox ", mailbox[2])
+            self.mail_con.select(mailbox[2])
+            yield mailbox[2]
+
+    def fetch_messages(self):
+        """Fetches the messages from the current mailbox, return list of uids."""
+        result, data = self.mail_con.uid('search', None, "ALL")
+        if(result!="OK"):
+            raise UserWarning("Could not fetch messages.")
+        #print("E-mail list for user ", row['username'], " is ", data[0])
+        mailid_list = data[0].split()
+        return mailid_list
+
+    def fetch_internal_date(self, mid):
+        """Fetches the internal date of a message, returns a time tuple."""
+        result, data = self.mail_con.uid('fetch', mid, '(INTERNALDATE)')
+        if(result!="OK"):
+            raise UserWarning("Could not fetch the internal date of message" + mid + ".")
+        internal_date = imaplib.Internaldate2tuple(data[0])
+        return internal_date
+
+    def fetch_received_date(self, mid):
+        """Fetches the received date of a message, returns bytes reponse."""
+        result, data = self.mail_con.uid('fetch', mid, '(BODY.PEEK[HEADER.FIELDS (RECEIVED)])')
+        if(result!="OK"):
+            raise UserWarning("Could not fetch the received header of message" + mid + ".")
+        return data
+
+    def update_message(self, mid, mailbox, internal_date):
+        """Replaces a message with one with correct internal date."""
+        internal_date_seconds = time.mktime(internal_date.timetuple())
+        internal_date_str = imaplib.Time2Internaldate(internal_date_seconds)
+        result, data = self.mail_con.uid('fetch', mid, '(RFC822)')
+        if(result!="OK"):
+            raise UserWarning("Could not retrieve the entire e-mail" + mid + ".")
+        #print("Entire e-mail is: ", data[0][1])
+
+        fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0]
+        parsed_flags = imaplib.ParseFlags(fetched_flags)
+        flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags)
+        result, data = self.mail_con.append(mailbox, flags_str,
+                                            internal_date_str, data[0][1])
+        #print(result, data)
+        if(result!="OK"):
+            raise UserWarning("Could not replace the e-mail" + mid + ".")
+        else:
+            result, data = self.mail_con.uid('STORE', mid, '+FLAGS', r'(\Deleted)')
+            if(result!="OK"):
+                raise UserWarning("Could not delete the e-mail" + mid + ".")
+            else: self.mail_con.expunge()
+        return
diff --git a/unit_tester.py b/unit_tester.py
new file mode 100644 (file)
index 0000000..b7d240b
--- /dev/null
@@ -0,0 +1,70 @@
+'''
+unit_tester.py - The module contains the MailScriptTester class.
+
+Copyright (c) 2012 Intra2net AG
+Author: Plamen Dimitrov
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+'''
+
+import unittest
+import datetime
+import date_interpreter
+
+class MailScriptTester(unittest.TestCase):
+
+    def setUp(self):
+        self.date_interp = date_interpreter.DateInterpreter()
+        self.true_date = datetime.datetime(2007, 12, 11, 18, 24, 35)
+
+    def test_received_header1(self):
+        """Tests the date extraction method."""
+        date = [[0, b"Tue, 11 Dec 2007 18:24:35 +0100"]]
+        extracted_date = self.date_interp.extract_received_date(date)
+        self.assertEqual(extracted_date, self.true_date, "Failed date format 1")
+
+    def test_received_header2(self):
+        """Tests the date extraction method."""
+        date = [[0, b"11 Dec 2007 \r\n18:24:35 +0100"]]
+        extracted_date = self.date_interp.extract_received_date(date)
+        self.assertEqual(extracted_date, self.true_date, "Failed date format 2")
+        return
+
+    def test_received_header3(self):
+        """Tests the date extraction method."""  
+        date = [[0, b"11 Dec 2007 18:24:35 +0100"]]
+        extracted_date = self.date_interp.extract_received_date(date)
+        self.assertEqual(extracted_date, self.true_date, "Failed date format 3")
+
+    def test_received_header4(self):
+        """Tests the date extraction method."""
+        date = [[0, b"11 Dec 2007 18:24:35"]]
+        extracted_date = self.date_interp.extract_received_date(date)
+        #should not be equal because of time zone assumption
+        self.assertNotEqual(extracted_date, self.true_date, "Failed date format 4")
+
+    def test_received_header5(self):
+        """Tests the received date extraction method."""
+        date = [[0, b"11 Dec 2007 18:24:35 GMT"]]
+        extracted_date = self.date_interp.extract_received_date(date)
+        #should not be equal because of time zone assumption
+        self.assertNotEqual(extracted_date, self.true_date, "Failed date format 5")
+
+    def test_compare_dates(self):
+        """Tests the date comparison method."""
+        self.true_date2 = datetime.datetime(2007, 12, 11, 18, 34, 35)
+        #is difference of 10 mins significant if tolerance is 9 mins
+        self.assertTrue(bool(self.date_interp.compare_dates(self.true_date, self.true_date2, 9*60)), "Failed at comparison test")
+        #is difference of 10 mins significant if tolerance is 11 mins
+        self.assertFalse(bool(self.date_interp.compare_dates(self.true_date, self.true_date2, 11*60)), "Failed at comparison test")
+
+if __name__ == '__main__':
+    unittest.main()