From: Plamen Dimitrov Date: Thu, 21 Jun 2012 16:41:40 +0000 (+0200) Subject: Initial submission of working tool X-Git-Url: http://developer.intra2net.com/git/?p=imap-fix-internaldate;a=commitdiff_plain;h=c9da760ab064de55ea97d4056536c73cf9d441ad Initial submission of working tool --- diff --git a/confscript.cfg b/confscript.cfg new file mode 100644 index 0000000..8b65219 --- /dev/null +++ b/confscript.cfg @@ -0,0 +1,4 @@ +[basic_settings] +log_level = 30 +imap_server = imap.company.com +tolerance = 1800 diff --git a/date_interpreter.py b/date_interpreter.py new file mode 100644 index 0000000..083fa1a --- /dev/null +++ b/date_interpreter.py @@ -0,0 +1,103 @@ +''' +date_interpreter.py - The module contains the MailIterator class. + +Copyright (c) 2012 Intra2net AG +Author: Plamen Dimitrov + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Add '-t' argument when running the module for a test mode. +For a detailed list of each message with a date conflict change +the 'log_level' in the configuration file from '30' to '20'. +''' + +import datetime +import re +import time + +#reg expressions +RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+' + r'(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])(?::(60|[0-5][0-9]))?\s*' + r'(?:([-\+])([0-9]{2})([0-5][0-9]))*') +INTERNAL_DATE = re.compile(r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' + r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' + r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])') +CONTROL_SYMBOLS = re.compile(r'[\n\r\t]') + +class DateInterpreter: + """This class extracts dates from imap server responses and compares them.""" + + def __init__(self): + return + + @classmethod + def extract_internal_date(cls, fetchresult): + """Extracts the internal date from INTERNALDATE, returns datetime.""" + return datetime.datetime.fromtimestamp(time.mktime(fetchresult)) + + @classmethod + def extract_received_date(cls, fetchresult): + """Extracts the first date from RECEIVED, returns datetime.""" + fetchresult = CONTROL_SYMBOLS.sub('', fetchresult[0][1].decode("utf-8")) + received_dates = RECEIVED_DATE.findall(fetchresult) + if(len(received_dates)==0): + return "" + else: received_date = received_dates[0] + #print("Retrieved date ", received_date, " from header ", fetchresult) + month = datetime.datetime.strptime(received_date[1],'%b').month + + if(received_date[3]!=""): + hours = int(received_date[3]) + else: hours = 0 + if(received_date[4]!=""): + minutes = int(received_date[4]) + else: minutes = 0 + if(received_date[5]!=""): + seconds = int(received_date[5]) + else: seconds = 0 + + if(received_date[6]!=""): + zonen = received_date[6] + else: zonen = b'+' + if(received_date[7]!=""): + zoneh = int(received_date[7]) + else: zoneh = 0 + if(received_date[8]!=""): + zonem = int(received_date[8]) + else: zonem = 0 + # subtract time zone to get unified time + zone = (zoneh * 60 + zonem) * 60 + if(zonen == b'-'): + zone = -zone + + time_tuple = (int(received_date[2]), month, int(received_date[0]), hours, minutes, seconds, -1, -1, -1) + #'mktime' assumes arg in local timezone, so add timezone/altzone + utc = time.mktime(time_tuple) + #adjust to DST + if(time.daylight and time.localtime(utc)[-1]): + zone = zone + time.altzone + else: + zone = zone + time.timezone + + received_time_tuple = time.localtime(utc - zone) + converted_received_date = datetime.datetime.fromtimestamp(time.mktime(received_time_tuple)) + return converted_received_date + + @classmethod + def compare_dates(cls, date1, date2, tolerance=1800): + """Compares datetime objects for deviation given certain tolerance.""" + """Returns 1 if there is a significant difference.""" + #print(date1, "<>", date2) + timedelta = abs(date1 - date2) + if(timedelta.total_seconds()>tolerance): + return 1 + else: + return 0 diff --git a/fix_imap_internaldate.py b/fix_imap_internaldate.py new file mode 100644 index 0000000..00e0f15 --- /dev/null +++ b/fix_imap_internaldate.py @@ -0,0 +1,113 @@ +''' +fix_imap_internaldate.py - Fix the INTERNALDATE field on IMAP servers + +Copyright (c) 2012 Intra2net AG +Author: Plamen Dimitrov + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Add '-t' argument when running the module for a test mode. +For a detailed list of each message with a date conflict change +the 'log_level' in the configuration file from '30' to '20'. +''' + +import sys +import csv +import logging +import configparser +from date_interpreter import DateInterpreter +from mail_iterator import MailIterator + +def main(): + """Iterates through csv list of users and their mailboxes""" + if (len(sys.argv) > 1 and sys.argv[1]=="-t"): + test_mode = 1 + else: + test_mode = 0 + + config = load_configuration() + logging.basicConfig(filename='mailscript.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=config.getint('basic_settings', 'log_level')) + + date_interp = DateInterpreter() + user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',') + + server = config.get('basic_settings', 'imap_server') + tolerance = config.getint('basic_settings', 'tolerance') + total_per_box = {} + + for user in user_reader: + try: + session = MailIterator(server, user['username'], user['password']) + except UserWarning as ex: + logging.error(ex) + continue + for mailbox in session: + try: + #print(".") + mail_ids = session.fetch_messages() + except UserWarning as ex: + logging.error(ex) + continue + for mid in mail_ids: + try: + fetched_internal_date = session.fetch_internal_date(mid) + internal_date = date_interp.extract_internal_date(fetched_internal_date) + fetched_received_date = session.fetch_received_date(mid) + received_date = date_interp.extract_received_date(fetched_received_date) + if(received_date==""): + logging.warning("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n", + mid.decode("utf-8"), mailbox.strip('"'), user['username']) + continue + except UserWarning as ex: + logging.error(ex) + continue + if(date_interp.compare_dates(received_date, internal_date, tolerance)): + #print(received_date, internal_date) + if(test_mode==0): + try: + session.update_message(mid, mailbox, received_date) + except UserWarning as ex: + logging.error(ex) + continue + else: + logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.", + mid.decode("utf-8"), mailbox.strip('"'), user['username'], + internal_date.strftime("%d %b %Y %H:%M:%S"), + received_date.strftime("%d %b %Y %H:%M:%S"), + fetched_received_date[0][1].decode("utf-8").split("Received:")[1]) + #count total emails for every user and mailbox + mixed_key = user['username']+'|'+mailbox.strip('"') + total_per_box[mixed_key] = 1 + total_per_box.get(mixed_key, 0) + total_per_user = 0 + for warning in total_per_box: + total_per_user += total_per_box[warning] + logging.warning("Total date conflicts to be corrected in a mailbox %s are %s.", + warning.split('|')[1], total_per_box[warning]) + logging.warning("Total date conflicts to be corrected for user %s are %s.\n", + user['username'], total_per_user) + +def load_configuration(): + """Loads the script configuration from a file or creates such.""" + config = configparser.RawConfigParser() + try: + config.read('confscript.cfg') + except IOError: + config.add_section('basic_settings') + config.set('basic_settings', 'log_level', logging.DEBUG) + #config.set('Basic settings', 'bool', 'true') + with open('confscript.cfg', 'w') as configfile: + config.write(configfile) + return config + +if(__name__ == "__main__"): + main() diff --git a/mail_iterator.py b/mail_iterator.py new file mode 100644 index 0000000..b217f4a --- /dev/null +++ b/mail_iterator.py @@ -0,0 +1,101 @@ +''' +mail_iterator.py - The module contains the MailIterator class. + +Copyright (c) 2012 Intra2net AG +Author: Plamen Dimitrov + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Add '-t' argument when running the module for a test mode. +For a detailed list of each message with a date conflict change +the 'log_level' in the configuration file from '30' to '20'. +''' + +import imaplib +import re +import time + +LIST_RESP = re.compile(r'\((?P.*?)\) "(?P.*)" (?P.*)') + +class MailIterator: + """This class communicates with the e-mail server.""" + mailboxes = [] + + def __init__(self, server, username, password): + """Creates a connection and a user session.""" + self.mail_con = imaplib.IMAP4_SSL(server) + result, data = self.mail_con.login(username, password) + if(result!="OK"): + raise UserWarning("Could not log in as user " + username + ". " + data) + result, self.mailboxes = self.mail_con.list() + if(result!="OK"): + raise UserWarning("Could not retrieve mailboxes for user " + username + ".") + + def __del__(self): + """Closes the connection and the user session.""" + self.mail_con.close() + self.mail_con.logout() + + def __iter__(self): + """Iterates through the retrieved mailboxes.""" + for mailbox in self.mailboxes: + mailbox = LIST_RESP.match(mailbox.decode("utf-8")).groups() + #print("Checking mailbox ", mailbox[2]) + self.mail_con.select(mailbox[2]) + yield mailbox[2] + + def fetch_messages(self): + """Fetches the messages from the current mailbox, return list of uids.""" + result, data = self.mail_con.uid('search', None, "ALL") + if(result!="OK"): + raise UserWarning("Could not fetch messages.") + #print("E-mail list for user ", row['username'], " is ", data[0]) + mailid_list = data[0].split() + return mailid_list + + def fetch_internal_date(self, mid): + """Fetches the internal date of a message, returns a time tuple.""" + result, data = self.mail_con.uid('fetch', mid, '(INTERNALDATE)') + if(result!="OK"): + raise UserWarning("Could not fetch the internal date of message" + mid + ".") + internal_date = imaplib.Internaldate2tuple(data[0]) + return internal_date + + def fetch_received_date(self, mid): + """Fetches the received date of a message, returns bytes reponse.""" + result, data = self.mail_con.uid('fetch', mid, '(BODY.PEEK[HEADER.FIELDS (RECEIVED)])') + if(result!="OK"): + raise UserWarning("Could not fetch the received header of message" + mid + ".") + return data + + def update_message(self, mid, mailbox, internal_date): + """Replaces a message with one with correct internal date.""" + internal_date_seconds = time.mktime(internal_date.timetuple()) + internal_date_str = imaplib.Time2Internaldate(internal_date_seconds) + result, data = self.mail_con.uid('fetch', mid, '(RFC822)') + if(result!="OK"): + raise UserWarning("Could not retrieve the entire e-mail" + mid + ".") + #print("Entire e-mail is: ", data[0][1]) + + fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0] + parsed_flags = imaplib.ParseFlags(fetched_flags) + flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags) + result, data = self.mail_con.append(mailbox, flags_str, + internal_date_str, data[0][1]) + #print(result, data) + if(result!="OK"): + raise UserWarning("Could not replace the e-mail" + mid + ".") + else: + result, data = self.mail_con.uid('STORE', mid, '+FLAGS', r'(\Deleted)') + if(result!="OK"): + raise UserWarning("Could not delete the e-mail" + mid + ".") + else: self.mail_con.expunge() + return diff --git a/unit_tester.py b/unit_tester.py new file mode 100644 index 0000000..b7d240b --- /dev/null +++ b/unit_tester.py @@ -0,0 +1,70 @@ +''' +unit_tester.py - The module contains the MailScriptTester class. + +Copyright (c) 2012 Intra2net AG +Author: Plamen Dimitrov + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +''' + +import unittest +import datetime +import date_interpreter + +class MailScriptTester(unittest.TestCase): + + def setUp(self): + self.date_interp = date_interpreter.DateInterpreter() + self.true_date = datetime.datetime(2007, 12, 11, 18, 24, 35) + + def test_received_header1(self): + """Tests the date extraction method.""" + date = [[0, b"Tue, 11 Dec 2007 18:24:35 +0100"]] + extracted_date = self.date_interp.extract_received_date(date) + self.assertEqual(extracted_date, self.true_date, "Failed date format 1") + + def test_received_header2(self): + """Tests the date extraction method.""" + date = [[0, b"11 Dec 2007 \r\n18:24:35 +0100"]] + extracted_date = self.date_interp.extract_received_date(date) + self.assertEqual(extracted_date, self.true_date, "Failed date format 2") + return + + def test_received_header3(self): + """Tests the date extraction method.""" + date = [[0, b"11 Dec 2007 18:24:35 +0100"]] + extracted_date = self.date_interp.extract_received_date(date) + self.assertEqual(extracted_date, self.true_date, "Failed date format 3") + + def test_received_header4(self): + """Tests the date extraction method.""" + date = [[0, b"11 Dec 2007 18:24:35"]] + extracted_date = self.date_interp.extract_received_date(date) + #should not be equal because of time zone assumption + self.assertNotEqual(extracted_date, self.true_date, "Failed date format 4") + + def test_received_header5(self): + """Tests the received date extraction method.""" + date = [[0, b"11 Dec 2007 18:24:35 GMT"]] + extracted_date = self.date_interp.extract_received_date(date) + #should not be equal because of time zone assumption + self.assertNotEqual(extracted_date, self.true_date, "Failed date format 5") + + def test_compare_dates(self): + """Tests the date comparison method.""" + self.true_date2 = datetime.datetime(2007, 12, 11, 18, 34, 35) + #is difference of 10 mins significant if tolerance is 9 mins + self.assertTrue(bool(self.date_interp.compare_dates(self.true_date, self.true_date2, 9*60)), "Failed at comparison test") + #is difference of 10 mins significant if tolerance is 11 mins + self.assertFalse(bool(self.date_interp.compare_dates(self.true_date, self.true_date2, 11*60)), "Failed at comparison test") + +if __name__ == '__main__': + unittest.main()