MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
'''
-
+import os, tempfile
import pickle
+from mailbox_state import MailboxState
+
+CACHING_FILENAME = "caching_data.dat"
class CachingData:
"""This class is responsible for the caching of data."""
+
+ # class attributes
+ # integer for version of the cache
+ version = None
+ # dictionary of usernames as keys and dictionaries as values
+ # the second dictionaries have unique mailbox keys and mailboxes as values
+ data = None
def __init__(self):
try:
- cachingfile = open('caching_data.pkl', 'rb')
- self.data = pickle.load(cachingfile)
- #print(len(self.data), "users found.")
- self.save_flag = {}
- for user in self.data:
- self.save_flag[user] = {}
- for uid_key in self.data[user]:
- self.save_flag[user][uid_key] = False
+ cachingfile = open(CACHING_FILENAME, 'rb')
+ self.version, self.data = pickle.load(cachingfile)
+ print("Cache version", self.version)
+ print(len(self.data), "users found.")
except IOError:
+ self.version = 0
self.data = {}
- self.save_flag = {}
- with open('caching_data.pkl', 'wb') as cachingfile:
- pickle.dump(self.data, cachingfile)
+ with open(CACHING_FILENAME, 'wb') as cachingfile:
+ pickle.dump((0, self.data), cachingfile)
def __del__(self):
- with open('caching_data.pkl', 'wb') as cachingfile:
- # prepare data based on a save flag
- for user in self.save_flag:
- for uid_key in self.save_flag[user]:
- if(not self.save_flag[user][uid_key]):
- del self.data[user][uid_key]
- #print(uidvalidity, "deleted from cache.")
- if(len(self.data[user])==0):
- del self.data[user]
- #print(user, "deleted from cache.")
-
- # serialize in file
- pickle.dump(self.data, cachingfile)
-
- #print(len(self.data), "users stored.")
-
- def _cache_new_mailbox(self, username, uid_key):
- """Store the mailbox as integer uidvalidity"""
- if(username not in self.data):
- self.data[username] = {}
- self.save_flag[username] = {}
- #print(username, "created.")
- if(uid_key not in self.data[username]):
- self.data[username][uid_key] = []
- self.save_flag[username][uid_key] = False
- #print(uid_key, "created.")
- return
-
- def sync_cached_mailbox(self, username, uid_key, list_ids):
- """Adds new messages to the cache and returns a list of them.
- Confirm the changes to a mailbox to finally save it."""
- new_ids = []
+ # create temporary file first
+ location = os.path.dirname(CACHING_FILENAME)
+ file_descriptor, tmpname = tempfile.mkstemp(dir=location)
+ cachingfile = os.fdopen(file_descriptor, 'wb')
- if(username not in self.data or \
- uid_key not in self.data[username]):
- self._cache_new_mailbox(username, uid_key)
- new_ids = list_ids
- else:
- for uid in list_ids:
- try:
- self.data[username][uid_key].index(uid)
- #print("found", uid, uid_key)
- except ValueError:
- #print("new", uid, uid_key)
- new_ids.append(uid)
+ # prepare data based on a save flag
+ saved_data = {}
+ for user in self.data:
+ saved_data[user] = {}
+ for box_key in self.data[user]:
+ if(self.data[user][box_key].needs_save):
+ saved_data[user][box_key] = self.data[user][box_key]
+ print(saved_data[user][box_key].name, "will be saved.")
+ if(len(saved_data[user])==0):
+ del saved_data[user]
+ print(user, "will not be saved.")
+ self.data = saved_data
- # update cached_mailbox
- self.data[username][uid_key] = list_ids
+ # avoid test mode or cases where nothing needs saving
+ if(len(saved_data)==0):
+ os.unlink(tmpname)
+ return
+ # serialize in file
+ self.version += 1
+ pickle.dump((self.version, self.data), cachingfile)
+ print(len(self.data), "users stored.")
+ cachingfile.close()
+ os.rename(tmpname, CACHING_FILENAME)
- return new_ids
-
- def commit_cached_mailbox(self, username, uid_key):
- """Confirm the chages to the cached mailbox."""
- self.save_flag[username][uid_key] = True
- #print(username, uid_key, "committed.")
+ def retrieve_cached_mailbox(self, name, uidvalidity, user):
+ """Retrieve a cached mailbox or create it."""
+ box_key = name.strip('"') + uidvalidity
+ if(user not in self.data):
+ self.data[user] = {}
+ #print(user, "created.")
+ if(box_key not in self.data[user]):
+ self.data[user][box_key] = MailboxState(name, uidvalidity, user)
+ #print(box_key, "created.")
+ return self.data[user][box_key]
+
+ def report_date_conflicts(self):
+ """Write a date conflicts report in a file."""
+ with open("conflict_stats.txt", 'w') as statsfile:
+ owner_total_conflicts = {}
+ owner_total_missing = {}
+ for user in self.data:
+ owner_total_conflicts[user] = 0
+ owner_total_missing[user] = 0
+ for box_key in self.data[user]:
+ owner_total_conflicts[user] += self.data[user][box_key].date_conflicts
+ owner_total_missing[user] += self.data[user][box_key].no_received_field
+ statsfile.write("Total date conflicts to be corrected in a mailbox {0} are {1}.\n"\
+ .format(self.data[user][box_key].name, self.data[user][box_key].date_conflicts))
+ statsfile.write("Total missing received headers in a mailbox {0} are {1}.\n"\
+ .format(self.data[user][box_key].name, self.data[user][box_key].no_received_field))
+ statsfile.write("Total date conflicts to be corrected for user {0} are {1}.\n\n"\
+ .format(user, owner_total_missing[user]))
return
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
-Add '-t' argument when running the module for a test mode.
-For a detailed list of each message with a date conflict change
-the 'log_level' in the configuration file from '30' to '20'.
'''
-import datetime
+import datetime, time
import re
-import time
#reg expressions
RECEIVED_DATE = re.compile(r'(0?[1-9]|[1-2][0-9]|3[01])\s+([A-Z][a-z][a-z])\s+'
CONTROL_SYMBOLS = re.compile(r'[\n\r\t]')
class DateInterpreter:
- """This class extracts dates from imap server responses and compares them."""
+ """This class extracts dates from imap server responses and compares them.
+ This class contains only static methods."""
def __init__(self):
return
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
-Add '-t' argument when running the module for a test mode.
-For a detailed list of each message with a date conflict change
-the 'log_level' in the configuration file from '30' to '20'.
'''
import sys
import csv
-import logging
-import configparser
+import logging, configparser
from date_interpreter import DateInterpreter
from mail_iterator import MailIterator
from caching_data import CachingData
def main():
"""Iterates through csv list of users and their mailboxes"""
- if (len(sys.argv) > 1 and sys.argv[1]=="-t"):
- test_mode = 1
+ if(len(sys.argv) > 1):
+ if(sys.argv[1]=="--h"):
+ print("The default mode of the script is test mode."
+ "Add '--u' argument to exit to modify messages."
+ "For a detailed list of each message with a date conflict change"
+ "change the 'log_level' in the configuration file from '30' to '20'.")
+ return
+ if(sys.argv[1]=="--u"):
+ test_mode = False
else:
- test_mode = 0
+ test_mode = True
config = load_configuration()
- logging.basicConfig(filename='mailscript.log',
+ logging.basicConfig(filename='fix_imap_internaldate.log',
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=config.getint('basic_settings', 'log_level'))
date_interp = DateInterpreter()
- cashing_data = CachingData()
- logging.warning(cashing_data)
+ caching_data = CachingData()
+ logging.warning("Cache version %s loaded.\n\n", caching_data.version)
user_reader = csv.DictReader(open("userdata.csv", "r"), delimiter=',')
server = config.get('basic_settings', 'imap_server')
tolerance = config.getint('basic_settings', 'tolerance')
- total_per_box = {}
for user in user_reader:
try:
continue
for mailbox in session:
try:
- #special key to ensure better mailbox uniqueness
- mailbox_key = mailbox[0].strip('"') + mailbox[1]
+ box = caching_data.retrieve_cached_mailbox(mailbox[0], mailbox[1], user['username'])
mail_ids = session.fetch_messages()
- new_ids = cashing_data.sync_cached_mailbox(user['username'], mailbox_key, mail_ids)
- #print(len(new_ids), "new out of", len(mail_ids), "in", mailbox)
+ new_ids = box.synchronize(mail_ids)
+ logging.warning("%s non-cached messages found out of %s in %s.\n", len(new_ids), len(mail_ids), box.name)
except UserWarning as ex:
logging.error(ex)
continue
fetched_received_date = session.fetch_received_date(mid)
received_date = date_interp.extract_received_date(fetched_received_date)
if(received_date==""):
- logging.warning("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n",
- mid.decode("utf-8"), mailbox[0], user['username'])
+ logging.info("No received date could be found in message uid: %s - mailbox: %s - user: %s.\n",
+ mid.decode("utf-8"), box.name, box.owner)
+ box.no_received_field += 1
continue
except UserWarning as ex:
logging.error(ex)
#print(received_date, internal_date)
if(test_mode==0):
try:
- session.update_message(mid, mailbox[0], received_date)
+ session.update_message(mid, box.name, received_date)
except UserWarning as ex:
logging.error(ex)
continue
else:
logging.info("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from received date %s from RECEIVED header:\n%s.",
- mid.decode("utf-8"), mailbox[0], user['username'],
+ mid.decode("utf-8"), box.name, box.owner,
internal_date.strftime("%d %b %Y %H:%M:%S"),
received_date.strftime("%d %b %Y %H:%M:%S"),
fetched_received_date[0][1].decode("utf-8").split("Received:")[1])
# count total emails for every user and mailbox
- user_key = user['username']+'|'+mailbox[0].strip('"')
- total_per_box[user_key] = 1 + total_per_box.get(user_key, 0)
+ box.date_conflicts += 1
# if all messages were successfully fixed confirm caching
- cashing_data.commit_cached_mailbox(user['username'], mailbox_key)
+ if(not test_mode):
+ box.confirm_change()
+
# final report on date conflicts
- total_per_user = 0
- for warning in total_per_box:
- total_per_user += total_per_box[warning]
- logging.warning("Total date conflicts to be corrected in a mailbox %s are %s.",
- warning.split('|')[1], total_per_box[warning])
- logging.warning("Total date conflicts to be corrected for user %s are %s.\n",
- user['username'], total_per_user)
+ caching_data.report_date_conflicts()
def load_configuration():
"""Loads the script configuration from a file or creates such."""
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
-Add '-t' argument when running the module for a test mode.
-For a detailed list of each message with a date conflict change
-the 'log_level' in the configuration file from '30' to '20'.
'''
import imaplib
class MailIterator:
"""This class communicates with the e-mail server."""
+ # class attributes
+ # IMAP4_SSL for connection with an IMAP server
+ mail_con = None
+ # list of tuples (uidvalidity, mailboxname) for the retrieved mailboxes
+ mailboxes = None
+
def __init__(self, server, username, password):
"""Creates a connection and a user session."""
self.mail_con = imaplib.IMAP4_SSL(server)
--- /dev/null
+'''
+mailbox_state.py - The module contains the MailboxState class.
+
+Copyright (c) 2012 Intra2net AG
+Author: Plamen Dimitrov
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+'''
+
+class MailboxState:
+ """This class is responsible for containing and updating a mailbox data."""
+
+ # class attributes
+ # string with quotation marks for the mailbox name
+ name = None
+ # string for the mailbox uidvalidity
+ uidvalidity = None
+ # string for user owning the mailbox
+ owner = None
+ # list of bytes for last cached mail uids
+ uids = None
+ # boolean flag for committing state changes
+ needs_save = None
+ # integer for found date conflicts
+ date_conflicts = None
+ # integer for found messages with missing received headers
+ no_received_field = None
+ # unique key for a mailbox
+ key = None
+
+ def __init__(self, name, uidvalidity, owner):
+ self.name = name
+ self.uidvalidity = uidvalidity
+ self.owner = owner
+
+ self.uids = []
+ self.needs_save = False
+
+ self.date_conflicts = 0
+ self.no_received_field = 0
+
+ #special key to ensure better mailbox uniqueness
+ self.key = self.name.strip('"') + self.uidvalidity
+
+ return
+
+ def __getstate__(self):
+ """Prepares the MailboxState instance for pickling."""
+ changed_dict = self.__dict__.copy()
+ # remove the following attributes for pickling
+ del changed_dict['needs_save']
+ del changed_dict['date_conflicts']
+ del changed_dict['no_received_field']
+ #print("pickling preparation complete")
+ return changed_dict
+
+ def __setstate__(self, dict):
+ """Prepares the MailboxState instance for unpickling."""
+ self.name = dict["name"]
+ self.uidvalidity = dict["uidvalidity"]
+ self.owner = dict["owner"]
+
+ self.uids = dict["uids"]
+ self.needs_save = False
+
+ self.date_conflicts = 0
+ self.no_received_field = 0
+
+ self.key = dict["key"]
+
+ #print("unpickling preparation complete")
+ return
+
+ def __str__(self):
+ """Makes the class printable."""
+ return self.key
+
+ def synchronize(self, list_ids):
+ """Adds new messages to the cache and returns a list of them.
+ Confirm the changes to a mailbox to finally save it."""
+ new_ids = []
+ if(len(self.uids)==0):
+ new_ids = list_ids
+ else:
+ for uid in list_ids:
+ try:
+ self.uids.index(uid)
+ #print("found", uid, self.key)
+ except ValueError:
+ new_ids.append(uid)
+ #print("new", uid, self.key)
+ # update this mailbox potential uids
+ self.uids = list_ids
+ return new_ids
+
+ def confirm_change(self):
+ """Confirm the chages to the cached mailbox."""
+ self.needs_save = True
+ #print(self.owner, self.key, "committed.")
+ return
'''
import unittest
-import datetime
-import date_interpreter
+import datetime, date_interpreter
class MailScriptTester(unittest.TestCase):
+ # class attributes
+ # DateInterpreter instance testing the DateInterpreter methods
+ date_interp = None
+ # datetime for comparison with extracted datetimes and assertions
+ true_date = None
+
def setUp(self):
self.date_interp = date_interpreter.DateInterpreter()
self.true_date = datetime.datetime(2007, 12, 11, 18, 24, 35)
- def test_received_header1(self):
+ def test_received_date_extraction1(self):
"""Tests the date extraction method."""
date = [[0, b"Tue, 11 Dec 2007 18:24:35 +0100"]]
extracted_date = self.date_interp.extract_received_date(date)
self.assertEqual(extracted_date, self.true_date, "Failed date format 1")
- def test_received_header2(self):
+ def test_received_date_extraction2(self):
"""Tests the date extraction method."""
date = [[0, b"11 Dec 2007 \r\n18:24:35 +0100"]]
extracted_date = self.date_interp.extract_received_date(date)
self.assertEqual(extracted_date, self.true_date, "Failed date format 2")
return
- def test_received_header3(self):
+ def test_received_date_extraction3(self):
"""Tests the date extraction method."""
date = [[0, b"11 Dec 2007 18:24:35 +0100"]]
extracted_date = self.date_interp.extract_received_date(date)
self.assertEqual(extracted_date, self.true_date, "Failed date format 3")
- def test_received_header4(self):
+ def test_received_date_extraction4(self):
"""Tests the date extraction method."""
date = [[0, b"11 Dec 2007 18:24:35"]]
extracted_date = self.date_interp.extract_received_date(date)
#should not be equal because of time zone assumption
self.assertNotEqual(extracted_date, self.true_date, "Failed date format 4")
- def test_received_header5(self):
+ def test_received_date_extraction5(self):
"""Tests the received date extraction method."""
date = [[0, b"11 Dec 2007 18:24:35 GMT"]]
extracted_date = self.date_interp.extract_received_date(date)
#should not be equal because of time zone assumption
self.assertNotEqual(extracted_date, self.true_date, "Failed date format 5")
+ def test_received_date_extraction6(self):
+ """Tests the received date extraction method."""
+ date = [[0, b'Received: from intranator.m.i2n ([unix socket])'
+ b'by intranator.m.i2n with LMTPA; Tue, 11 Dec 2007 18:24:35'
+ b'+0100Received: from localhost (intranator.m.i2n [127.0.0.1])'
+ b'by localhost (Postfix) with ESMTP id 895812AC54for <intra2net_thomas@intranator.m.i2n>;'
+ b'Sun, 13 Mar 2011 18:47:18 +0100 (CET)Received: from re04.intra2net.com '
+ b'(re04.intra2net.com [82.165.46.26])(using TLSv1 with cipher ADH-AES256-SHA '
+ b'(256/256 bits))(No client certificate requested)by intranator.m.i2n (Postfix) with '
+ b'ESMTPS id 28DB92AC53for <thomas.jarosch@intra2net.com>; Sun, 13 Mar 2011 18:47:15 +0100 '
+ b'(CET)Received: from postfix.charite.de (postfix.charite.de [141.42.206.35])(using TLSv1 '
+ b'with cipher ADH-AES256-SHA (256/256 bits))(No client certificate requested)by '
+ b're04.intra2net.com (Postfix) with ESMTP id C054A3010Afor <thomas.jarosch@intra2net.com>; '
+ b'Sun, 13 Mar 2011 18:47:14 +0100 (CET)Received: from localhost (localhost [127.0.0.1])by '
+ b'de.postfix.org (Postfix) with ESMTP id 7FCCFF7879for <thomas.jarosch@intra2net.com>; '
+ b'Sun, 13 Mar 2011 18:47:14 +0100 (CET)Received: from de.postfix.org ([127.0.0.1])by '
+ b'localhost (de.postfix.org [127.0.0.1]) (amavisd-new, port 10026)with LMTP id '
+ b'YSXF-vf3+6E1 for <thomas.jarosch@intra2net.com>;Sun, 13 Mar 2011 18:47:14 +0100 (CET)'
+ b'Received: from de.postfix.org (localhost [127.0.0.1])by de.postfix.org (Postfix) with '
+ b'ESMTP id 3C3123DF1Efor <thomas.jarosch@intra2net.com>; Sun, 13 Mar 2011 18:46:33 +0100 '
+ b'(CET)Received: from localhost (localhost [127.0.0.1])by de.postfix.org (Postfix) with '
+ b'ESMTP id AB6CE3DBD2for <amavis-users@amavis.org>; Sun, 13 Mar 2011 18:45:57 +0100 (CET)'
+ b'Received: from de.postfix.org ([127.0.0.1])by localhost (de.postfix.org [127.0.0.1]) '
+ b'(amavisd-new, port 10024)with ESMTP id mBYiZO8wREeS for <amavis-users@amavis.org>;Sun, '
+ b'13 Mar 2011 18:45:56 +0100 (CET)Received: from mail.inetmsg.com (mail.inetmsg.com '
+ b'[173.10.94.185])by de.postfix.org (Postfix) with ESMTPSfor <amavis-users@amavis.org>; '
+ b'Sun, 13 Mar 2011 18:45:55 +0100 (CET)Received: from [192.168.1.107] (fw1.inetmsg.com '
+ b'[10.20.30.253])(using TLSv1 with cipher DHE-RSA-CAMELLIA256-SHA (256/256 bits))'
+ b'(No client certificate requested)by mail.inetmsg.com (INetMsg Mail Service) with ESMTPSA '
+ b'id 0B95326CD1for <amavis-users@amavis.org>; Sun, 13 Mar 2011 10:45:41 -0700 (PDT)"]]']]
+ extracted_date = self.date_interp.extract_received_date(date)
+ #should not be equal because of time zone assumption
+ self.assertEqual(extracted_date, self.true_date, "Failed date format 6")
+
def test_compare_dates(self):
"""Tests the date comparison method."""
self.true_date2 = datetime.datetime(2007, 12, 11, 18, 34, 35)