From 8a9d4c89a337f84eaae05ebeeaf1017590258095 Mon Sep 17 00:00:00 2001 From: Plamen Dimitrov Date: Wed, 27 Jun 2012 13:58:07 +0200 Subject: [PATCH] Headers encoding corrected and cache version validation added --- caching_data.py | 8 +++- date_interpreter.py | 100 ---------------------------------------------- fix_imap_internaldate.py | 16 ++++---- mail_date_parser.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++ mail_iterator.py | 8 ++-- mailbox_state.py | 1 - 6 files changed, 118 insertions(+), 115 deletions(-) delete mode 100644 date_interpreter.py create mode 100644 mail_date_parser.py diff --git a/caching_data.py b/caching_data.py index 17ecf5b..cde1a19 100644 --- a/caching_data.py +++ b/caching_data.py @@ -20,6 +20,7 @@ import logging from mailbox_state import MailboxState CACHE_FILENAME = "message_cache.dat" +CACHE_VERSION = 1 class CachingData: """This class is responsible for the caching of data.""" @@ -36,10 +37,14 @@ class CachingData: try: cachefile = open(CACHE_FILENAME, 'rb') self.version, self.data = pickle.load(cachefile) + if(self.version != CACHE_VERSION): + logging.warning("Cache file has version %s and the script version is %s.", + self.version, self.data) + raise IOError logging.info("Cache version %s", self.version) logging.debug("%s users found.", len(self.data)) except IOError: - self.version = 0 + self.version = CACHE_VERSION self.data = {} with open(CACHE_FILENAME, 'wb') as cachefile: pickle.dump((0, self.data), cachefile) @@ -70,7 +75,6 @@ class CachingData: return # serialize in file - self.version += 1 pickle.dump((self.version, self.data), cachefile) logging.debug("%s users stored.", len(self.data)) cachefile.close() diff --git a/date_interpreter.py b/date_interpreter.py deleted file mode 100644 index 6b0ba0c..0000000 --- a/date_interpreter.py +++ /dev/null @@ -1,100 +0,0 @@ -''' -date_interpreter.py - The module contains the MailIterator class. - -Copyright (c) 2012 Intra2net AG -Author: Plamen Dimitrov - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -''' - -import datetime, time -import re -import logging - -#reg expressions -RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+' - r'(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])(?::(60|[0-5][0-9]))?\s*' - r'(?:([-\+])([0-9]{2})([0-5][0-9]))*') -INTERNAL_DATE = re.compile(r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' - r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' - r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])') -CONTROL_SYMBOLS = re.compile(r'[\n\r\t]') - -class DateInterpreter: - """This class extracts dates from imap server responses and compares them. - This class contains only static methods.""" - - def __init__(self): - return - - @classmethod - def extract_internal_date(cls, fetchresult): - """Extracts the internal date from INTERNALDATE, returns datetime.""" - return datetime.datetime.fromtimestamp(time.mktime(fetchresult)) - - @classmethod - def extract_received_date(cls, fetchresult): - """Extracts the first date from RECEIVED, returns datetime.""" - fetchresult = CONTROL_SYMBOLS.sub('', fetchresult[0][1].decode("utf-8")) - received_dates = RECEIVED_DATE.findall(fetchresult) - if(len(received_dates)==0): - return "" - else: received_date = received_dates[0] - logging.debug("Retrieved date %s from header %s.", received_date, fetchresult) - month = datetime.datetime.strptime(received_date[1],'%b').month - - if(received_date[3]!=""): - hours = int(received_date[3]) - else: hours = 0 - if(received_date[4]!=""): - minutes = int(received_date[4]) - else: minutes = 0 - if(received_date[5]!=""): - seconds = int(received_date[5]) - else: seconds = 0 - - if(received_date[6]!=""): - zonen = received_date[6] - else: zonen = b'+' - if(received_date[7]!=""): - zoneh = int(received_date[7]) - else: zoneh = 0 - if(received_date[8]!=""): - zonem = int(received_date[8]) - else: zonem = 0 - # subtract time zone to get unified time - zone = (zoneh * 60 + zonem) * 60 - if(zonen == b'-'): - zone = -zone - - time_tuple = (int(received_date[2]), month, int(received_date[0]), hours, minutes, seconds, -1, -1, -1) - #'mktime' assumes arg in local timezone, so add timezone/altzone - utc = time.mktime(time_tuple) - #adjust to DST - if(time.daylight and time.localtime(utc)[-1]): - zone = zone + time.altzone - else: - zone = zone + time.timezone - - received_time_tuple = time.localtime(utc - zone) - converted_received_date = datetime.datetime.fromtimestamp(time.mktime(received_time_tuple)) - return converted_received_date - - @classmethod - def compare_dates(cls, date1, date2, tolerance=1800): - """Compares datetime objects for deviation given certain tolerance.""" - """Returns 1 if there is a significant difference.""" - logging.debug("Comparing dates %s <> %s.", date1, date2) - timedelta = abs(date1 - date2) - if(timedelta.total_seconds()>tolerance): - return True - else: - return False diff --git a/fix_imap_internaldate.py b/fix_imap_internaldate.py index 30a3a80..5984a74 100644 --- a/fix_imap_internaldate.py +++ b/fix_imap_internaldate.py @@ -19,7 +19,7 @@ import sys import csv import argparse, configparser import logging -from date_interpreter import DateInterpreter +from mail_date_parser import MailDateParser from mail_iterator import MailIterator from caching_data import CachingData @@ -51,7 +51,7 @@ def main(): config = load_configuration() prepare_logger(config) - date_interp = DateInterpreter() + date_parser = MailDateParser() caching_data = CachingData() logging.warning("Cache version %s loaded.", caching_data.version) user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',') @@ -79,18 +79,18 @@ def main(): for mid in new_ids: try: fetched_internal_date = session.fetch_internal_date(mid) - internal_date = date_interp.extract_internal_date(fetched_internal_date) + internal_date = date_parser.extract_internal_date(fetched_internal_date) fetched_received_date = session.fetch_received_date(mid) - received_date = date_interp.extract_received_date(fetched_received_date) + received_date = date_parser.extract_received_date(fetched_received_date) if(received_date==""): logging.debug("No received date could be found in message uid: %s - mailbox: %s - user: %s.", - mid, box.name, box.owner) + mid.decode('iso-8859-1'), box.name, box.owner) box.no_received_field += 1 continue except UserWarning as ex: logging.error(ex) continue - if(date_interp.compare_dates(received_date, internal_date, tolerance)): + if(date_parser.compare_dates(received_date, internal_date, tolerance)): #print(received_date, internal_date) if(test_mode==0): try: @@ -100,10 +100,10 @@ def main(): continue else: logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.", - mid, box.name, box.owner, + mid.decode('iso-8859-1'), box.name, box.owner, internal_date.strftime("%d %b %Y %H:%M:%S"), received_date.strftime("%d %b %Y %H:%M:%S"), - fetched_received_date[0][1].decide("utf-8").split("Received:")[1]) + fetched_received_date[0][1].decode('iso-8859-1').split("Received:")[1]) # count total emails for every user and mailbox box.date_conflicts += 1 # if all messages were successfully fixed confirm caching diff --git a/mail_date_parser.py b/mail_date_parser.py new file mode 100644 index 0000000..d957d23 --- /dev/null +++ b/mail_date_parser.py @@ -0,0 +1,100 @@ +''' +mail_date_parser.py - The module contains the MailDateParser class. + +Copyright (c) 2012 Intra2net AG +Author: Plamen Dimitrov + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +''' + +import datetime, time +import re +import logging + +#reg expressions +RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+' + r'(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])(?::(60|[0-5][0-9]))?\s*' + r'(?:([-\+])([0-9]{2})([0-5][0-9]))*') +INTERNAL_DATE = re.compile(r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' + r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' + r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])') +CONTROL_SYMBOLS = re.compile(r'[\n\r\t]') + +class MailDateParser: + """This class extracts dates from imap server responses and compares them. + This class contains only static methods.""" + + def __init__(self): + return + + @classmethod + def extract_internal_date(cls, fetchresult): + """Extracts the internal date from INTERNALDATE, returns datetime.""" + return datetime.datetime.fromtimestamp(time.mktime(fetchresult)) + + @classmethod + def extract_received_date(cls, fetchresult): + """Extracts the first date from RECEIVED, returns datetime.""" + fetchresult = CONTROL_SYMBOLS.sub('', fetchresult) + received_dates = RECEIVED_DATE.findall(fetchresult) + if(len(received_dates)==0): + return "" + else: received_date = received_dates[0] + logging.debug("Retrieved date %s from header %s.", received_date, fetchresult) + month = datetime.datetime.strptime(received_date[1],'%b').month + + if(received_date[3]!=""): + hours = int(received_date[3]) + else: hours = 0 + if(received_date[4]!=""): + minutes = int(received_date[4]) + else: minutes = 0 + if(received_date[5]!=""): + seconds = int(received_date[5]) + else: seconds = 0 + + if(received_date[6]!=""): + zonen = received_date[6] + else: zonen = b'+' + if(received_date[7]!=""): + zoneh = int(received_date[7]) + else: zoneh = 0 + if(received_date[8]!=""): + zonem = int(received_date[8]) + else: zonem = 0 + # subtract time zone to get unified time + zone = (zoneh * 60 + zonem) * 60 + if(zonen == b'-'): + zone = -zone + + time_tuple = (int(received_date[2]), month, int(received_date[0]), hours, minutes, seconds, -1, -1, -1) + #'mktime' assumes arg in local timezone, so add timezone/altzone + utc = time.mktime(time_tuple) + #adjust to DST + if(time.daylight and time.localtime(utc)[-1]): + zone = zone + time.altzone + else: + zone = zone + time.timezone + + received_time_tuple = time.localtime(utc - zone) + converted_received_date = datetime.datetime.fromtimestamp(time.mktime(received_time_tuple)) + return converted_received_date + + @classmethod + def compare_dates(cls, date1, date2, tolerance=1800): + """Compares datetime objects for deviation given certain tolerance.""" + """Returns 1 if there is a significant difference.""" + logging.debug("Comparing dates %s <> %s.", date1, date2) + timedelta = abs(date1 - date2) + if(timedelta.total_seconds()>tolerance): + return True + else: + return False diff --git a/mail_iterator.py b/mail_iterator.py index 0a4b5df..e4da7d4 100644 --- a/mail_iterator.py +++ b/mail_iterator.py @@ -51,11 +51,11 @@ class MailIterator: """Iterates through all mailboxes, returns (uidval,name).""" for mailbox in self.mailboxes: logging.debug("Checking mailbox %s.", mailbox) - mailbox = MAILBOX_RESP.match(mailbox.decode("utf-8")).groups() + mailbox = MAILBOX_RESP.match(mailbox.decode('iso-8859-1')).groups() result, data = self.mail_con.status(mailbox[2], '(UIDVALIDITY)') if(result!="OK"): raise UserWarning("Could not retrieve mailbox uidvalidity.") - uidval = UIDVAL_RESP.match(data[0].decode("utf-8")).groups() + uidval = UIDVAL_RESP.match(data[0].decode('iso-8859-1')).groups() logging.debug("Extracted mailbox info is %s %s.", data[0], uidval) self.mail_con.select(mailbox[2]) yield (mailbox[2], uidval[1]) @@ -81,7 +81,7 @@ class MailIterator: result, data = self.mail_con.uid('fetch', mid, '(BODY.PEEK[HEADER.FIELDS (RECEIVED)])') if(result!="OK"): raise UserWarning("Could not fetch the received header of message" + mid + ".") - return data + return data[0][1].decode('iso-8859-1') def update_message(self, mid, mailbox, internal_date): """Replaces a message with one with correct internal date.""" @@ -94,7 +94,7 @@ class MailIterator: fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0] parsed_flags = imaplib.ParseFlags(fetched_flags) - flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags) + flags_str = " ".join(flag.decode('iso-8859-1') for flag in parsed_flags) result, data = self.mail_con.append(mailbox, flags_str, internal_date_str, data[0][1]) logging.debug("Adding corrected copy of the message reponse: %s %s", result, data) diff --git a/mailbox_state.py b/mailbox_state.py index 12e5c78..e38c584 100644 --- a/mailbox_state.py +++ b/mailbox_state.py @@ -14,7 +14,6 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ''' -import logging class MailboxState: """This class is responsible for containing and updating a mailbox data.""" -- 1.7.1