From: Plamen Dimitrov Date: Wed, 27 Jun 2012 11:58:07 +0000 (+0200) Subject: Headers encoding corrected and cache version validation added X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=8a9d4c89a337f84eaae05ebeeaf1017590258095;p=imap-fix-internaldate Headers encoding corrected and cache version validation added --- diff --git a/caching_data.py b/caching_data.py index 17ecf5b..cde1a19 100644 --- a/caching_data.py +++ b/caching_data.py @@ -20,6 +20,7 @@ import logging from mailbox_state import MailboxState CACHE_FILENAME = "message_cache.dat" +CACHE_VERSION = 1 class CachingData: """This class is responsible for the caching of data.""" @@ -36,10 +37,14 @@ class CachingData: try: cachefile = open(CACHE_FILENAME, 'rb') self.version, self.data = pickle.load(cachefile) + if(self.version != CACHE_VERSION): + logging.warning("Cache file has version %s and the script version is %s.", + self.version, self.data) + raise IOError logging.info("Cache version %s", self.version) logging.debug("%s users found.", len(self.data)) except IOError: - self.version = 0 + self.version = CACHE_VERSION self.data = {} with open(CACHE_FILENAME, 'wb') as cachefile: pickle.dump((0, self.data), cachefile) @@ -70,7 +75,6 @@ class CachingData: return # serialize in file - self.version += 1 pickle.dump((self.version, self.data), cachefile) logging.debug("%s users stored.", len(self.data)) cachefile.close() diff --git a/fix_imap_internaldate.py b/fix_imap_internaldate.py index 30a3a80..5984a74 100644 --- a/fix_imap_internaldate.py +++ b/fix_imap_internaldate.py @@ -19,7 +19,7 @@ import sys import csv import argparse, configparser import logging -from date_interpreter import DateInterpreter +from mail_date_parser import MailDateParser from mail_iterator import MailIterator from caching_data import CachingData @@ -51,7 +51,7 @@ def main(): config = load_configuration() prepare_logger(config) - date_interp = DateInterpreter() + date_parser = MailDateParser() caching_data = CachingData() logging.warning("Cache version %s loaded.", caching_data.version) user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',') @@ -79,18 +79,18 @@ def main(): for mid in new_ids: try: fetched_internal_date = session.fetch_internal_date(mid) - internal_date = date_interp.extract_internal_date(fetched_internal_date) + internal_date = date_parser.extract_internal_date(fetched_internal_date) fetched_received_date = session.fetch_received_date(mid) - received_date = date_interp.extract_received_date(fetched_received_date) + received_date = date_parser.extract_received_date(fetched_received_date) if(received_date==""): logging.debug("No received date could be found in message uid: %s - mailbox: %s - user: %s.", - mid, box.name, box.owner) + mid.decode('iso-8859-1'), box.name, box.owner) box.no_received_field += 1 continue except UserWarning as ex: logging.error(ex) continue - if(date_interp.compare_dates(received_date, internal_date, tolerance)): + if(date_parser.compare_dates(received_date, internal_date, tolerance)): #print(received_date, internal_date) if(test_mode==0): try: @@ -100,10 +100,10 @@ def main(): continue else: logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.", - mid, box.name, box.owner, + mid.decode('iso-8859-1'), box.name, box.owner, internal_date.strftime("%d %b %Y %H:%M:%S"), received_date.strftime("%d %b %Y %H:%M:%S"), - fetched_received_date[0][1].decide("utf-8").split("Received:")[1]) + fetched_received_date[0][1].decode('iso-8859-1').split("Received:")[1]) # count total emails for every user and mailbox box.date_conflicts += 1 # if all messages were successfully fixed confirm caching diff --git a/date_interpreter.py b/mail_date_parser.py similarity index 95% rename from date_interpreter.py rename to mail_date_parser.py index 6b0ba0c..d957d23 100644 --- a/date_interpreter.py +++ b/mail_date_parser.py @@ -1,5 +1,5 @@ ''' -date_interpreter.py - The module contains the MailIterator class. +mail_date_parser.py - The module contains the MailDateParser class. Copyright (c) 2012 Intra2net AG Author: Plamen Dimitrov @@ -28,7 +28,7 @@ INTERNAL_DATE = re.compile(r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P< r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])') CONTROL_SYMBOLS = re.compile(r'[\n\r\t]') -class DateInterpreter: +class MailDateParser: """This class extracts dates from imap server responses and compares them. This class contains only static methods.""" @@ -43,7 +43,7 @@ class DateInterpreter: @classmethod def extract_received_date(cls, fetchresult): """Extracts the first date from RECEIVED, returns datetime.""" - fetchresult = CONTROL_SYMBOLS.sub('', fetchresult[0][1].decode("utf-8")) + fetchresult = CONTROL_SYMBOLS.sub('', fetchresult) received_dates = RECEIVED_DATE.findall(fetchresult) if(len(received_dates)==0): return "" diff --git a/mail_iterator.py b/mail_iterator.py index 0a4b5df..e4da7d4 100644 --- a/mail_iterator.py +++ b/mail_iterator.py @@ -51,11 +51,11 @@ class MailIterator: """Iterates through all mailboxes, returns (uidval,name).""" for mailbox in self.mailboxes: logging.debug("Checking mailbox %s.", mailbox) - mailbox = MAILBOX_RESP.match(mailbox.decode("utf-8")).groups() + mailbox = MAILBOX_RESP.match(mailbox.decode('iso-8859-1')).groups() result, data = self.mail_con.status(mailbox[2], '(UIDVALIDITY)') if(result!="OK"): raise UserWarning("Could not retrieve mailbox uidvalidity.") - uidval = UIDVAL_RESP.match(data[0].decode("utf-8")).groups() + uidval = UIDVAL_RESP.match(data[0].decode('iso-8859-1')).groups() logging.debug("Extracted mailbox info is %s %s.", data[0], uidval) self.mail_con.select(mailbox[2]) yield (mailbox[2], uidval[1]) @@ -81,7 +81,7 @@ class MailIterator: result, data = self.mail_con.uid('fetch', mid, '(BODY.PEEK[HEADER.FIELDS (RECEIVED)])') if(result!="OK"): raise UserWarning("Could not fetch the received header of message" + mid + ".") - return data + return data[0][1].decode('iso-8859-1') def update_message(self, mid, mailbox, internal_date): """Replaces a message with one with correct internal date.""" @@ -94,7 +94,7 @@ class MailIterator: fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0] parsed_flags = imaplib.ParseFlags(fetched_flags) - flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags) + flags_str = " ".join(flag.decode('iso-8859-1') for flag in parsed_flags) result, data = self.mail_con.append(mailbox, flags_str, internal_date_str, data[0][1]) logging.debug("Adding corrected copy of the message reponse: %s %s", result, data) diff --git a/mailbox_state.py b/mailbox_state.py index 12e5c78..e38c584 100644 --- a/mailbox_state.py +++ b/mailbox_state.py @@ -14,7 +14,6 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ''' -import logging class MailboxState: """This class is responsible for containing and updating a mailbox data."""