Improved logging and changed tolerance to minutes
authorPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Wed, 27 Jun 2012 10:19:02 +0000 (12:19 +0200)
committerPlamen Dimitrov <plamen.dimitrov@intra2net.com>
Wed, 27 Jun 2012 10:19:02 +0000 (12:19 +0200)
.gitignore
README
caching_data.py
confscript.cfg
date_interpreter.py
fix_imap_internaldate.py
mail_iterator.py
mailbox_state.py

index f5182fb..f2423ba 100644 (file)
 build/
 bin/
 
+.metadata/
+.project
+.pydevproject
+__pycache__/
+
 # kdevelop
 .kdev4
 
diff --git a/README b/README
index 2909b53..e3dd2d0 100644 (file)
--- a/README
+++ b/README
@@ -14,5 +14,9 @@ Features:
 
 *** TO DOCUMENT ***
 - Prerequisites
+python version 3 or later
 - How to invoke
+necessary files
 - Invocation on Windows
+
+For more infomation please contact opensource@intra2net.com
\ No newline at end of file
index e7c5f9e..220aaa0 100644 (file)
@@ -16,9 +16,10 @@ GNU General Public License for more details.
 '''
 import os, tempfile
 import pickle
+import logging
 from mailbox_state import MailboxState
 
-CACHING_FILENAME = "caching_data.dat"
+CACHE_FILENAME = "message_cache.dat"
 
 class CachingData:
     """This class is responsible for the caching of data."""
@@ -31,56 +32,61 @@ class CachingData:
     data = None
 
     def __init__(self):
+        # open data file or create one and initialize date if not found
         try:
-            cachingfile = open(CACHING_FILENAME, 'rb')
-            self.version, self.data = pickle.load(cachingfile)
-            print("Cache version", self.version)
-            print(len(self.data), "users found.")
+            cachefile = open(CACHE_FILENAME, 'rb')
+            self.version, self.data = pickle.load(cachefile)
+            logging.info("Cache version %s", self.version)
+            logging.debug("%s users found.", len(self.data))
         except IOError:
             self.version = 0
             self.data = {}
-            with open(CACHING_FILENAME, 'wb') as cachingfile:
-                pickle.dump((0, self.data), cachingfile)
+            with open(CACHE_FILENAME, 'wb') as cachefile:
+                pickle.dump((0, self.data), cachefile)
 
     def __del__(self):
         # create temporary file first
-        location = os.path.dirname(CACHING_FILENAME)    
+        location = os.path.dirname(CACHE_FILENAME)    
         file_descriptor, tmpname = tempfile.mkstemp(dir=location)
-        cachingfile = os.fdopen(file_descriptor, 'wb')
-
-        # prepare data based on a save flag
-        saved_data = {}
-        for user in self.data:
-            saved_data[user] = {}
-            for box_key in self.data[user]:
-                if(self.data[user][box_key].needs_save):
-                    saved_data[user][box_key] = self.data[user][box_key]
-                    print(saved_data[user][box_key].name, "will be saved.")
-            if(len(saved_data[user])==0):
-                del saved_data[user]
-                print(user, "will not be saved.")
-        self.data = saved_data
-
-        # avoid test mode or cases where nothing needs saving
-        if(len(saved_data)==0):
-            os.unlink(tmpname)
-            return
-        # serialize in file
-        self.version += 1
-        pickle.dump((self.version, self.data), cachingfile)
-        print(len(self.data), "users stored.")
-        cachingfile.close()
-        os.rename(tmpname, CACHING_FILENAME)
+        try:
+            cachefile = os.fdopen(file_descriptor, 'wb')
+    
+            # prepare data based on a save flag
+            saved_data = {}
+            for user in self.data:
+                saved_data[user] = {}
+                for box_key in self.data[user]:
+                    if(self.data[user][box_key].needs_save):
+                        saved_data[user][box_key] = self.data[user][box_key]
+                        logging.debug("The mailbox %s will be saved.", saved_data[user][box_key].name)
+                if(len(saved_data[user])==0):
+                    del saved_data[user]
+                    logging.debug("The user %s will not be saved.", user)
+            self.data = saved_data
+    
+            # avoid test mode or cases where nothing needs saving
+            if(len(saved_data)==0):
+                os.unlink(tmpname)
+                return
+            
+            # serialize in file
+            self.version += 1
+            pickle.dump((self.version, self.data), cachefile)
+            logging.debug("%s users stored.", len(self.data))
+            cachefile.close()
+            os.rename(tmpname, CACHE_FILENAME)
+        except:
+            os.unlink(tmpname)           
 
     def retrieve_cached_mailbox(self, name, uidvalidity, user):
         """Retrieve a cached mailbox or create it."""
         box_key = name.strip('"') + uidvalidity
         if(user not in self.data):
             self.data[user] = {}
-            #print(user, "created.")
+            logging.debug("New user %s cached.", user)
         if(box_key not in self.data[user]):
             self.data[user][box_key] = MailboxState(name, uidvalidity, user)
-            #print(box_key, "created.")
+            logging.debug("New mailbox %s cached.", box_key)
         return self.data[user][box_key]
     
     def report_date_conflicts(self):
index 8b65219..edbb7ae 100644 (file)
@@ -1,4 +1,6 @@
 [basic_settings]
-log_level = 30
+file_log_level = 20
+console_log_level = 20
 imap_server = imap.company.com
-tolerance = 1800
+tolerance = 30
+
index f2eeedc..6b0ba0c 100644 (file)
@@ -17,6 +17,7 @@ GNU General Public License for more details.
 
 import datetime, time
 import re
+import logging
 
 #reg expressions
 RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+'
@@ -47,7 +48,7 @@ class DateInterpreter:
         if(len(received_dates)==0):
             return ""
         else: received_date = received_dates[0]
-        #print("Retrieved date ", received_date, " from header ", fetchresult)
+        logging.debug("Retrieved date %s from header %s.", received_date, fetchresult)
         month = datetime.datetime.strptime(received_date[1],'%b').month
 
         if(received_date[3]!=""):
@@ -91,7 +92,7 @@ class DateInterpreter:
     def compare_dates(cls, date1, date2, tolerance=1800):
         """Compares datetime objects for deviation given certain tolerance."""
         """Returns 1 if there is a significant difference."""
-        #print(date1, "<>", date2)
+        logging.debug("Comparing dates %s <> %s.", date1, date2)
         timedelta = abs(date1 - date2)
         if(timedelta.total_seconds()>tolerance):
             return True
index bd95732..dd4799c 100644 (file)
@@ -17,13 +17,24 @@ GNU General Public License for more details.
 
 import sys
 import csv
-import logging, configparser
+import argparse, configparser
+import logging
 from date_interpreter import DateInterpreter
 from mail_iterator import MailIterator
 from caching_data import CachingData
 
 def main():
     """Iterates through csv list of users and their mailboxes"""
+    
+    """parser = argparse.ArgumentParser(description='Fix the INTERNALDATE field on IMAP servers.')
+    parser.add_argument('--h', metavar='N', type=int, nargs='+',
+                       help='an integer for the accumulator')
+    parser.add_argument('--u', dest='accumulate', type=bool,
+                       const=sum, default=max,
+                       help='sum the integers (default: find the max)')
+    args = parser.parse_args()
+    print(args.accumulate(args.integers))"""
+
     if(len(sys.argv) > 1):
         if(sys.argv[1]=="--h"):
             print("The default mode of the script is test mode."
@@ -36,18 +47,19 @@ def main():
     else:
         test_mode = True
 
+    # config and logging setup
     config = load_configuration()
-    logging.basicConfig(filename='fix_imap_internaldate.log',
-                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-                        level=config.getint('basic_settings', 'log_level'))
+    prepare_logger(config)
 
     date_interp = DateInterpreter()
     caching_data = CachingData()
-    logging.warning("Cache version %s loaded.\n\n", caching_data.version)
+    logging.warning("Cache version %s loaded.", caching_data.version)
     user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',')
-
+    
+    # server name is stored in the config
     server = config.get('basic_settings', 'imap_server')
-    tolerance = config.getint('basic_settings', 'tolerance')
+    # tolerance is now in seconds
+    tolerance = config.getint('basic_settings', 'tolerance') * 60
 
     for user in user_reader:
         try:
@@ -60,7 +72,7 @@ def main():
                 box = caching_data.retrieve_cached_mailbox(mailbox[0], mailbox[1], user['username'])
                 mail_ids = session.fetch_messages()
                 new_ids = box.synchronize(mail_ids)
-                logging.warning("%s non-cached messages found out of %s in %s.\n", len(new_ids), len(mail_ids), box.name)
+                logging.warning("%s non-cached messages found out of %s in %s.", len(new_ids), len(mail_ids), box.name)
             except UserWarning as ex:
                 logging.error(ex)
                 continue
@@ -71,8 +83,8 @@ def main():
                     fetched_received_date = session.fetch_received_date(mid)
                     received_date = date_interp.extract_received_date(fetched_received_date)
                     if(received_date==""):
-                        logging.info("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n",
-                                        mid.decode("utf-8"), box.name, box.owner)
+                        logging.info("No received date could be found in message uid: %s - mailbox: %s - user: %s.",
+                                        mid, box.name, box.owner)
                         box.no_received_field += 1
                         continue
                 except UserWarning as ex:
@@ -88,31 +100,53 @@ def main():
                             continue
                     else:
                         logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.",
-                                        mid.decode("utf-8"), box.name, box.owner,
+                                        mid, box.name, box.owner,
                                         internal_date.strftime("%d %b %Y %H:%M:%S"),
                                         received_date.strftime("%d %b %Y %H:%M:%S"),
-                                        fetched_received_date[0][1].decode("utf-8").split("Received:")[1])
+                                        fetched_received_date[0][1].decide("utf-8").split("Received:")[1])
                     # count total emails for every user and mailbox
                     box.date_conflicts += 1
             # if all messages were successfully fixed confirm caching
             if(not test_mode):
                 box.confirm_change()
-        
+
         # final report on date conflicts
         caching_data.report_date_conflicts()
 
 def load_configuration():
     """Loads the script configuration from a file or creates such."""
     config = configparser.RawConfigParser()    
-    try:
-        config.read('confscript.cfg')
-    except IOError:
+    success = config.read('confscript.cfg')
+    if(len(success)==0):
         config.add_section('basic_settings')
-        config.set('basic_settings', 'log_level', logging.DEBUG)
+        config.set('basic_settings', 'file_log_level', logging.INFO)
+        config.set('basic_settings', 'console_log_level', logging.INFO)
+        config.set('basic_settings', 'imap_server', 'imap.company.com')
+        config.set('basic_settings', 'tolerance', 30)
         #config.set('Basic settings', 'bool', 'true')
         with open('confscript.cfg', 'w') as configfile:
             config.write(configfile)
     return config
 
+def prepare_logger(config):
+    """Sets up the logging functionality"""
+    
+    # reset the log
+    with open('fix_imap_internaldate.log', 'w'):
+        pass
+    
+    # add basic configuration
+    logging.basicConfig(filename='fix_imap_internaldate.log',
+                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                        level=config.getint('basic_settings', 'file_log_level'))
+    
+    # add a handler for a console output
+    console = logging.StreamHandler()
+    console.setLevel(config.getint('basic_settings', 'console_log_level'))
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    console.setFormatter(formatter)
+    logging.getLogger('').addHandler(console)
+    return
+
 if(__name__ == "__main__"):
     main()
index be7964d..0a4b5df 100644 (file)
@@ -18,6 +18,7 @@ GNU General Public License for more details.
 import imaplib
 import re
 import time
+import logging
 
 MAILBOX_RESP = re.compile(r'\((?P<flags>.*?)\) "(?P<delimiter>.*)" (?P<name>.*)')
 UIDVAL_RESP = re.compile(r'(?P<name>.*) \(UIDVALIDITY (?P<uidval>.*)\)')
@@ -49,13 +50,13 @@ class MailIterator:
     def __iter__(self):
         """Iterates through all mailboxes, returns (uidval,name)."""
         for mailbox in self.mailboxes:
-            #print("Checking mailbox ", mailbox)
+            logging.debug("Checking mailbox %s.", mailbox)
             mailbox = MAILBOX_RESP.match(mailbox.decode("utf-8")).groups()
             result, data = self.mail_con.status(mailbox[2], '(UIDVALIDITY)')
             if(result!="OK"):
                 raise UserWarning("Could not retrieve mailbox uidvalidity.")
             uidval = UIDVAL_RESP.match(data[0].decode("utf-8")).groups()
-            #print(data[0], uidval)
+            logging.debug("Extracted mailbox info is %s %s.", data[0], uidval)
             self.mail_con.select(mailbox[2])
             yield (mailbox[2], uidval[1])
 
@@ -64,7 +65,6 @@ class MailIterator:
         result, data = self.mail_con.uid('search', None, "ALL")
         if(result!="OK"):
             raise UserWarning("Could not fetch messages.")
-        #print("E-mail list for user ", row['username'], " is ", data[0])
         mailid_list = data[0].split()
         return mailid_list
 
@@ -90,18 +90,19 @@ class MailIterator:
         result, data = self.mail_con.uid('fetch', mid, '(RFC822)')
         if(result!="OK"):
             raise UserWarning("Could not retrieve the entire e-mail" + mid + ".")
-        #print("Entire e-mail is: ", data[0][1])
+        #logging.debug("Entire e-mail is: %s", data[0][1])
 
         fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0]
         parsed_flags = imaplib.ParseFlags(fetched_flags)
         flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags)
         result, data = self.mail_con.append(mailbox, flags_str,
                                             internal_date_str, data[0][1])
-        #print(result, data)
+        logging.debug("Adding corrected copy of the message reponse: %s %s", result, data)
         if(result!="OK"):
             raise UserWarning("Could not replace the e-mail" + mid + ".")
         else:
             result, data = self.mail_con.uid('STORE', mid, '+FLAGS', r'(\Deleted)')
+            logging.debug("Removing old copy of the message reponse: %s %s", result, data)
             if(result!="OK"):
                 raise UserWarning("Could not delete the e-mail" + mid + ".")
             else: self.mail_con.expunge()
index e2bd86a..12e5c78 100644 (file)
@@ -14,6 +14,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 '''
+import logging
 
 class MailboxState:
     """This class is responsible for containing and updating a mailbox data."""
@@ -59,7 +60,6 @@ class MailboxState:
         del changed_dict['needs_save']
         del changed_dict['date_conflicts']
         del changed_dict['no_received_field']
-        #print("pickling preparation complete")
         return changed_dict
 
     def __setstate__(self, dict):
@@ -76,7 +76,6 @@ class MailboxState:
         
         self.key = dict["key"]
         
-        #print("unpickling preparation complete")
         return    
 
     def __str__(self):
@@ -93,10 +92,8 @@ class MailboxState:
             for uid in list_ids:
                 try:
                     self.uids.index(uid)
-                    #print("found", uid, self.key)
                 except ValueError:
                     new_ids.append(uid)
-                    #print("new", uid, self.key)
         # update this mailbox potential uids
         self.uids = list_ids
         return new_ids
@@ -104,5 +101,4 @@ class MailboxState:
     def confirm_change(self):
         """Confirm the chages to the cached mailbox."""
         self.needs_save = True
-        #print(self.owner, self.key, "committed.") 
         return