From 8fe4e3ff6e5a94d6abd076a33616a8873a6fa11a Mon Sep 17 00:00:00 2001 From: Plamen Dimitrov Date: Wed, 27 Jun 2012 12:19:02 +0200 Subject: [PATCH] Improved logging and changed tolerance to minutes --- .gitignore | 5 +++ README | 4 ++ caching_data.py | 76 +++++++++++++++++++++++++--------------------- confscript.cfg | 6 ++- date_interpreter.py | 5 ++- fix_imap_internaldate.py | 68 +++++++++++++++++++++++++++++++---------- mail_iterator.py | 11 ++++--- mailbox_state.py | 6 +--- 8 files changed, 115 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index f5182fb..f2423ba 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,11 @@ build/ bin/ +.metadata/ +.project +.pydevproject +__pycache__/ + # kdevelop .kdev4 diff --git a/README b/README index 2909b53..e3dd2d0 100644 --- a/README +++ b/README @@ -14,5 +14,9 @@ Features: *** TO DOCUMENT *** - Prerequisites +python version 3 or later - How to invoke +necessary files - Invocation on Windows + +For more infomation please contact opensource@intra2net.com \ No newline at end of file diff --git a/caching_data.py b/caching_data.py index e7c5f9e..220aaa0 100644 --- a/caching_data.py +++ b/caching_data.py @@ -16,9 +16,10 @@ GNU General Public License for more details. ''' import os, tempfile import pickle +import logging from mailbox_state import MailboxState -CACHING_FILENAME = "caching_data.dat" +CACHE_FILENAME = "message_cache.dat" class CachingData: """This class is responsible for the caching of data.""" @@ -31,56 +32,61 @@ class CachingData: data = None def __init__(self): + # open data file or create one and initialize date if not found try: - cachingfile = open(CACHING_FILENAME, 'rb') - self.version, self.data = pickle.load(cachingfile) - print("Cache version", self.version) - print(len(self.data), "users found.") + cachefile = open(CACHE_FILENAME, 'rb') + self.version, self.data = pickle.load(cachefile) + logging.info("Cache version %s", self.version) + logging.debug("%s users found.", len(self.data)) except IOError: self.version = 0 self.data = {} - with open(CACHING_FILENAME, 'wb') as cachingfile: - pickle.dump((0, self.data), cachingfile) + with open(CACHE_FILENAME, 'wb') as cachefile: + pickle.dump((0, self.data), cachefile) def __del__(self): # create temporary file first - location = os.path.dirname(CACHING_FILENAME) + location = os.path.dirname(CACHE_FILENAME) file_descriptor, tmpname = tempfile.mkstemp(dir=location) - cachingfile = os.fdopen(file_descriptor, 'wb') - - # prepare data based on a save flag - saved_data = {} - for user in self.data: - saved_data[user] = {} - for box_key in self.data[user]: - if(self.data[user][box_key].needs_save): - saved_data[user][box_key] = self.data[user][box_key] - print(saved_data[user][box_key].name, "will be saved.") - if(len(saved_data[user])==0): - del saved_data[user] - print(user, "will not be saved.") - self.data = saved_data - - # avoid test mode or cases where nothing needs saving - if(len(saved_data)==0): - os.unlink(tmpname) - return - # serialize in file - self.version += 1 - pickle.dump((self.version, self.data), cachingfile) - print(len(self.data), "users stored.") - cachingfile.close() - os.rename(tmpname, CACHING_FILENAME) + try: + cachefile = os.fdopen(file_descriptor, 'wb') + + # prepare data based on a save flag + saved_data = {} + for user in self.data: + saved_data[user] = {} + for box_key in self.data[user]: + if(self.data[user][box_key].needs_save): + saved_data[user][box_key] = self.data[user][box_key] + logging.debug("The mailbox %s will be saved.", saved_data[user][box_key].name) + if(len(saved_data[user])==0): + del saved_data[user] + logging.debug("The user %s will not be saved.", user) + self.data = saved_data + + # avoid test mode or cases where nothing needs saving + if(len(saved_data)==0): + os.unlink(tmpname) + return + + # serialize in file + self.version += 1 + pickle.dump((self.version, self.data), cachefile) + logging.debug("%s users stored.", len(self.data)) + cachefile.close() + os.rename(tmpname, CACHE_FILENAME) + except: + os.unlink(tmpname) def retrieve_cached_mailbox(self, name, uidvalidity, user): """Retrieve a cached mailbox or create it.""" box_key = name.strip('"') + uidvalidity if(user not in self.data): self.data[user] = {} - #print(user, "created.") + logging.debug("New user %s cached.", user) if(box_key not in self.data[user]): self.data[user][box_key] = MailboxState(name, uidvalidity, user) - #print(box_key, "created.") + logging.debug("New mailbox %s cached.", box_key) return self.data[user][box_key] def report_date_conflicts(self): diff --git a/confscript.cfg b/confscript.cfg index 8b65219..edbb7ae 100644 --- a/confscript.cfg +++ b/confscript.cfg @@ -1,4 +1,6 @@ [basic_settings] -log_level = 30 +file_log_level = 20 +console_log_level = 20 imap_server = imap.company.com -tolerance = 1800 +tolerance = 30 + diff --git a/date_interpreter.py b/date_interpreter.py index f2eeedc..6b0ba0c 100644 --- a/date_interpreter.py +++ b/date_interpreter.py @@ -17,6 +17,7 @@ GNU General Public License for more details. import datetime, time import re +import logging #reg expressions RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+' @@ -47,7 +48,7 @@ class DateInterpreter: if(len(received_dates)==0): return "" else: received_date = received_dates[0] - #print("Retrieved date ", received_date, " from header ", fetchresult) + logging.debug("Retrieved date %s from header %s.", received_date, fetchresult) month = datetime.datetime.strptime(received_date[1],'%b').month if(received_date[3]!=""): @@ -91,7 +92,7 @@ class DateInterpreter: def compare_dates(cls, date1, date2, tolerance=1800): """Compares datetime objects for deviation given certain tolerance.""" """Returns 1 if there is a significant difference.""" - #print(date1, "<>", date2) + logging.debug("Comparing dates %s <> %s.", date1, date2) timedelta = abs(date1 - date2) if(timedelta.total_seconds()>tolerance): return True diff --git a/fix_imap_internaldate.py b/fix_imap_internaldate.py index bd95732..dd4799c 100644 --- a/fix_imap_internaldate.py +++ b/fix_imap_internaldate.py @@ -17,13 +17,24 @@ GNU General Public License for more details. import sys import csv -import logging, configparser +import argparse, configparser +import logging from date_interpreter import DateInterpreter from mail_iterator import MailIterator from caching_data import CachingData def main(): """Iterates through csv list of users and their mailboxes""" + + """parser = argparse.ArgumentParser(description='Fix the INTERNALDATE field on IMAP servers.') + parser.add_argument('--h', metavar='N', type=int, nargs='+', + help='an integer for the accumulator') + parser.add_argument('--u', dest='accumulate', type=bool, + const=sum, default=max, + help='sum the integers (default: find the max)') + args = parser.parse_args() + print(args.accumulate(args.integers))""" + if(len(sys.argv) > 1): if(sys.argv[1]=="--h"): print("The default mode of the script is test mode." @@ -36,18 +47,19 @@ def main(): else: test_mode = True + # config and logging setup config = load_configuration() - logging.basicConfig(filename='fix_imap_internaldate.log', - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - level=config.getint('basic_settings', 'log_level')) + prepare_logger(config) date_interp = DateInterpreter() caching_data = CachingData() - logging.warning("Cache version %s loaded.\n\n", caching_data.version) + logging.warning("Cache version %s loaded.", caching_data.version) user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',') - + + # server name is stored in the config server = config.get('basic_settings', 'imap_server') - tolerance = config.getint('basic_settings', 'tolerance') + # tolerance is now in seconds + tolerance = config.getint('basic_settings', 'tolerance') * 60 for user in user_reader: try: @@ -60,7 +72,7 @@ def main(): box = caching_data.retrieve_cached_mailbox(mailbox[0], mailbox[1], user['username']) mail_ids = session.fetch_messages() new_ids = box.synchronize(mail_ids) - logging.warning("%s non-cached messages found out of %s in %s.\n", len(new_ids), len(mail_ids), box.name) + logging.warning("%s non-cached messages found out of %s in %s.", len(new_ids), len(mail_ids), box.name) except UserWarning as ex: logging.error(ex) continue @@ -71,8 +83,8 @@ def main(): fetched_received_date = session.fetch_received_date(mid) received_date = date_interp.extract_received_date(fetched_received_date) if(received_date==""): - logging.info("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n", - mid.decode("utf-8"), box.name, box.owner) + logging.info("No received date could be found in message uid: %s - mailbox: %s - user: %s.", + mid, box.name, box.owner) box.no_received_field += 1 continue except UserWarning as ex: @@ -88,31 +100,53 @@ def main(): continue else: logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.", - mid.decode("utf-8"), box.name, box.owner, + mid, box.name, box.owner, internal_date.strftime("%d %b %Y %H:%M:%S"), received_date.strftime("%d %b %Y %H:%M:%S"), - fetched_received_date[0][1].decode("utf-8").split("Received:")[1]) + fetched_received_date[0][1].decide("utf-8").split("Received:")[1]) # count total emails for every user and mailbox box.date_conflicts += 1 # if all messages were successfully fixed confirm caching if(not test_mode): box.confirm_change() - + # final report on date conflicts caching_data.report_date_conflicts() def load_configuration(): """Loads the script configuration from a file or creates such.""" config = configparser.RawConfigParser() - try: - config.read('confscript.cfg') - except IOError: + success = config.read('confscript.cfg') + if(len(success)==0): config.add_section('basic_settings') - config.set('basic_settings', 'log_level', logging.DEBUG) + config.set('basic_settings', 'file_log_level', logging.INFO) + config.set('basic_settings', 'console_log_level', logging.INFO) + config.set('basic_settings', 'imap_server', 'imap.company.com') + config.set('basic_settings', 'tolerance', 30) #config.set('Basic settings', 'bool', 'true') with open('confscript.cfg', 'w') as configfile: config.write(configfile) return config +def prepare_logger(config): + """Sets up the logging functionality""" + + # reset the log + with open('fix_imap_internaldate.log', 'w'): + pass + + # add basic configuration + logging.basicConfig(filename='fix_imap_internaldate.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=config.getint('basic_settings', 'file_log_level')) + + # add a handler for a console output + console = logging.StreamHandler() + console.setLevel(config.getint('basic_settings', 'console_log_level')) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + console.setFormatter(formatter) + logging.getLogger('').addHandler(console) + return + if(__name__ == "__main__"): main() diff --git a/mail_iterator.py b/mail_iterator.py index be7964d..0a4b5df 100644 --- a/mail_iterator.py +++ b/mail_iterator.py @@ -18,6 +18,7 @@ GNU General Public License for more details. import imaplib import re import time +import logging MAILBOX_RESP = re.compile(r'\((?P.*?)\) "(?P.*)" (?P.*)') UIDVAL_RESP = re.compile(r'(?P.*) \(UIDVALIDITY (?P.*)\)') @@ -49,13 +50,13 @@ class MailIterator: def __iter__(self): """Iterates through all mailboxes, returns (uidval,name).""" for mailbox in self.mailboxes: - #print("Checking mailbox ", mailbox) + logging.debug("Checking mailbox %s.", mailbox) mailbox = MAILBOX_RESP.match(mailbox.decode("utf-8")).groups() result, data = self.mail_con.status(mailbox[2], '(UIDVALIDITY)') if(result!="OK"): raise UserWarning("Could not retrieve mailbox uidvalidity.") uidval = UIDVAL_RESP.match(data[0].decode("utf-8")).groups() - #print(data[0], uidval) + logging.debug("Extracted mailbox info is %s %s.", data[0], uidval) self.mail_con.select(mailbox[2]) yield (mailbox[2], uidval[1]) @@ -64,7 +65,6 @@ class MailIterator: result, data = self.mail_con.uid('search', None, "ALL") if(result!="OK"): raise UserWarning("Could not fetch messages.") - #print("E-mail list for user ", row['username'], " is ", data[0]) mailid_list = data[0].split() return mailid_list @@ -90,18 +90,19 @@ class MailIterator: result, data = self.mail_con.uid('fetch', mid, '(RFC822)') if(result!="OK"): raise UserWarning("Could not retrieve the entire e-mail" + mid + ".") - #print("Entire e-mail is: ", data[0][1]) + #logging.debug("Entire e-mail is: %s", data[0][1]) fetched_flags = self.mail_con.uid('fetch', mid, '(FLAGS)')[1][0] parsed_flags = imaplib.ParseFlags(fetched_flags) flags_str = " ".join(flag.decode("utf-8") for flag in parsed_flags) result, data = self.mail_con.append(mailbox, flags_str, internal_date_str, data[0][1]) - #print(result, data) + logging.debug("Adding corrected copy of the message reponse: %s %s", result, data) if(result!="OK"): raise UserWarning("Could not replace the e-mail" + mid + ".") else: result, data = self.mail_con.uid('STORE', mid, '+FLAGS', r'(\Deleted)') + logging.debug("Removing old copy of the message reponse: %s %s", result, data) if(result!="OK"): raise UserWarning("Could not delete the e-mail" + mid + ".") else: self.mail_con.expunge() diff --git a/mailbox_state.py b/mailbox_state.py index e2bd86a..12e5c78 100644 --- a/mailbox_state.py +++ b/mailbox_state.py @@ -14,6 +14,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ''' +import logging class MailboxState: """This class is responsible for containing and updating a mailbox data.""" @@ -59,7 +60,6 @@ class MailboxState: del changed_dict['needs_save'] del changed_dict['date_conflicts'] del changed_dict['no_received_field'] - #print("pickling preparation complete") return changed_dict def __setstate__(self, dict): @@ -76,7 +76,6 @@ class MailboxState: self.key = dict["key"] - #print("unpickling preparation complete") return def __str__(self): @@ -93,10 +92,8 @@ class MailboxState: for uid in list_ids: try: self.uids.index(uid) - #print("found", uid, self.key) except ValueError: new_ids.append(uid) - #print("new", uid, self.key) # update this mailbox potential uids self.uids = list_ids return new_ids @@ -104,5 +101,4 @@ class MailboxState: def confirm_change(self): """Confirm the chages to the cached mailbox.""" self.needs_save = True - #print(self.owner, self.key, "committed.") return -- 1.7.1