Headers encoding corrected and cache version validation added
authorPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Wed, 27 Jun 2012 11:58:07 +0000 (13:58 +0200)
committerPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Wed, 27 Jun 2012 12:01:10 +0000 (14:01 +0200)
caching_data.py
fix_imap_internaldate.py
mail_date_parser.py [moved from date_interpreter.py with 95% similarity]
mail_iterator.py
mailbox_state.py

index 17ecf5b..cde1a19 100644 (file)
@@ -20,6 +20,7 @@ import logging
 from mailbox_state import MailboxState
 
 CACHE_FILENAME = "message_cache.dat"
+CACHE_VERSION = 1
 
 class CachingData:
     """This class is responsible for the caching of data."""
@@ -36,10 +37,14 @@ class CachingData:
         try:
             cachefile = open(CACHE_FILENAME, 'rb')
             self.version, self.data = pickle.load(cachefile)
+            if(self.version != CACHE_VERSION):
+                logging.warning("Cache file has version %s and the script version is %s.",
+                                self.version, self.data)
+                raise IOError
             logging.info("Cache version %s", self.version)
             logging.debug("%s users found.", len(self.data))
         except IOError:
-            self.version = 0
+            self.version = CACHE_VERSION
             self.data = {}
             with open(CACHE_FILENAME, 'wb') as cachefile:
                 pickle.dump((0, self.data), cachefile)
@@ -70,7 +75,6 @@ class CachingData:
                 return
             
             # serialize in file
-            self.version += 1
             pickle.dump((self.version, self.data), cachefile)
             logging.debug("%s users stored.", len(self.data))
             cachefile.close()
index 30a3a80..5984a74 100644 (file)
@@ -19,7 +19,7 @@ import sys
 import csv
 import argparse, configparser
 import logging
-from date_interpreter import DateInterpreter
+from mail_date_parser import MailDateParser
 from mail_iterator import MailIterator
 from caching_data import CachingData
 
@@ -51,7 +51,7 @@ def main():
     config = load_configuration()
     prepare_logger(config)
 
-    date_interp = DateInterpreter()
+    date_parser = MailDateParser()
     caching_data = CachingData()
     logging.warning("Cache version %s loaded.", caching_data.version)
     user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',')
@@ -79,18 +79,18 @@ def main():
             for mid in new_ids:
                 try:
                     fetched_internal_date = session.fetch_internal_date(mid)
-                    internal_date = date_interp.extract_internal_date(fetched_internal_date)
+                    internal_date = date_parser.extract_internal_date(fetched_internal_date)
                     fetched_received_date = session.fetch_received_date(mid)
-                    received_date = date_interp.extract_received_date(fetched_received_date)
+                    received_date = date_parser.extract_received_date(fetched_received_date)
                     if(received_date==""):
                         logging.debug("No received date could be found in message uid: %s - mailbox: %s - user: %s.",
-                                        mid, box.name, box.owner)
+                                        mid.decode('iso-8859-1'), box.name, box.owner)
                         box.no_received_field += 1
                         continue
                 except UserWarning as ex:
                     logging.error(ex)
                     continue
-                if(date_interp.compare_dates(received_date, internal_date, tolerance)):
+                if(date_parser.compare_dates(received_date, internal_date, tolerance)):
                     #print(received_date, internal_date)
                     if(test_mode==0):
                         try:
@@ -100,10 +100,10 @@ def main():
                             continue
                     else:
                         logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.",
-                                        mid, box.name, box.owner,
+                                        mid.decode('iso-8859-1'), box.name, box.owner,
                                         internal_date.strftime("%d %b %Y %H:%M:%S"),
                                         received_date.strftime("%d %b %Y %H:%M:%S"),
-                                        fetched_received_date[0][1].decide("utf-8").split("Received:")[1])
+                                        fetched_received_date[0][1].decode('iso-8859-1').split("Received:")[1])
                     # count total emails for every user and mailbox
                     box.date_conflicts += 1
             # if all messages were successfully fixed confirm caching
similarity index 95%
rename from date_interpreter.py
rename to mail_date_parser.py
index 6b0ba0c..d957d23 100644 (file)
@@ -1,5 +1,5 @@
 '''
-date_interpreter.py - The module contains the MailIterator class.
+mail_date_parser.py - The module contains the MailDateParser class.
 
 Copyright (c) 2012 Intra2net AG
 Author: Plamen Dimitrov
@@ -28,7 +28,7 @@ INTERNAL_DATE = re.compile(r'(?P<day>[ 0123][0-9])-(?P<mon>[A-Z][a-z][a-z])-(?P<
         r' (?P<zonen>[-+])(?P<zoneh>[0-9][0-9])(?P<zonem>[0-9][0-9])')
 CONTROL_SYMBOLS = re.compile(r'[\n\r\t]')
 
-class DateInterpreter:
+class MailDateParser:
     """This class extracts dates from imap server responses and compares them.
     This class contains only static methods."""
 
@@ -43,7 +43,7 @@ class DateInterpreter:
     @classmethod
     def extract_received_date(cls, fetchresult):
         """Extracts the first date from RECEIVED, returns datetime."""
-        fetchresult = CONTROL_SYMBOLS.sub('', fetchresult[0][1].decode("utf-8"))
+        fetchresult = CONTROL_SYMBOLS.sub('', fetchresult)
         received_dates = RECEIVED_DATE.findall(fetchresult)
         if(len(received_dates)==0):
             return ""
index 0a4b5df..e4da7d4 100644 (file)
@@ -51,11 +51,11 @@ class MailIterator:
         """Iterates through all mailboxes, returns (uidval,name)."""
         for mailbox in self.mailboxes:
             logging.debug("Checking mailbox %s.", mailbox)
-            mailbox = MAILBOX_RESP.match(mailbox.decode("utf-8")).groups()
+            mailbox = MAILBOX_RESP.match(mailbox.decode('iso-8859-1')).groups()
             result, data = self.mail_con.status(mailbox[2], '(UIDVALIDITY)')
             if(result!="OK"):
                 raise UserWarning("Could not retrieve mailbox uidvalidity.")
-            uidval = UIDVAL_RESP.match(data[0].decode("utf-8")).groups()
+            uidval = UIDVAL_RESP.match(data[0].decode('iso-8859-1')).groups()
             logging.debug("Extracted mailbox info is %s %s.", data[0], uidval)
             self.mail_con.select(mailbox[2])
             yield (mailbox[2], uidval[1])
@@ -81,7 +81,7 @@ class MailIterator:
         result, data = self.mail_con.uid('fetch', mid, '(BODY.PEEK[HEADER.FIELDS (RECEIVED)])')
         if(result!="OK"):
             raise UserWarning("Could not fetch the received header of message" + mid + ".")
-        return data
+        return data[0][1].decode('iso-8859-1')
 
     def update_message(self, mid, mailbox, internal_date):
         """Replaces a message with one with correct internal date."""
@@ -94,7 +94,7 @@ class MailIterator:
 
         fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0]
         parsed_flags = imaplib.ParseFlags(fetched_flags)
-        flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags)
+        flags_str = " ".join(flag.decode('iso-8859-1') for flag in parsed_flags)
         result, data = self.mail_con.append(mailbox, flags_str,
                                             internal_date_str, data[0][1])
         logging.debug("Adding corrected copy of the message reponse: %s %s", result, data)
index 12e5c78..e38c584 100644 (file)
@@ -14,7 +14,6 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 '''
-import logging
 
 class MailboxState:
     """This class is responsible for containing and updating a mailbox data."""