43f9c5b4d25d9cf900213ba10f87ca698ccf8736
[imap-fix-internaldate] / src / fix_imap_internaldate.py
1 '''
2 fix_imap_internaldate.py - Fix the INTERNALDATE field on IMAP servers
3
4 Copyright (c) 2012 Intra2net AG
5 Author: Plamen Dimitrov and Thomas Jarosch
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 '''
17
18 import sys
19 import csv
20 import argparse
21
22 try:
23     import configparser
24 except ImportError:
25     import ConfigParser as configparser
26
27 import logging
28 from mail_date_parser import MailDateParser
29 from mail_iterator import MailIterator
30 from caching_data import CachingData
31
32 CONFIG_FILENAME = "fix_imap_internaldate.cfg"
33 LOG_FILENAME = "fix_imap_internaldate.log"
34 CSV_FILENAME = "userdata.csv"
35
36 def main():
37     """Interprets command arguments and initializes configuration and logger.
38         Then begins mail synchronization."""
39
40     # parse arguments
41     parser = argparse.ArgumentParser(description="Fix the INTERNALDATE field on IMAP servers. "
42                                                 "Small tool to fix the IMAP internaldate "
43                                                 "in case it's too much off compared to the last date "
44                                                 "stored in the received lines.")
45     parser.add_argument('-u', '--update', dest='test_mode', action='store_false',
46                         default=True, help='update all e-mails and exit test mode')
47
48     # config and logging setup
49     config = load_configuration()
50     prepare_logger(config)
51     args = parser.parse_args()
52     if(args.test_mode):
53         logging.info("Testing mode initiated. No message will be modified on the server.")
54     else:
55         logging.info("Update mode initiated. Messages will be modified.")
56
57     # proceed to main functionality
58     try:
59         synchronize_csv(config, args.test_mode)
60     except KeyboardInterrupt:
61         logging.info("Script was interrupted by the user.")
62
63     logging.info("All done. Exiting.")
64     return
65
66 def load_configuration():
67     """Loads the script configuration from a file or creates such."""
68     config = configparser.RawConfigParser()
69     success = config.read(CONFIG_FILENAME)
70
71     # if no file is found create a default one
72     if(len(success)==0):
73         if(not config.has_section('basic_settings')):
74             config.add_section('basic_settings')
75         config.set('basic_settings', 'file_log_level', logging.INFO)
76         config.set('basic_settings', 'console_log_level', logging.INFO)
77         config.set('basic_settings', 'imap_server', 'imap.company.com')
78         config.set('basic_settings', 'tolerance_mins', 30)
79         config.set('basic_settings', 'skip_shared_folders', "True")
80         config.set('basic_settings', 'fallback_to_date_header', "False")
81         with open(CONFIG_FILENAME, 'w') as configfile:
82             config.write(configfile)
83             configfile.write("# 0 NOTSET, 10 DEBUG, 20 INFO, 30 WARNING, 40 ERROR, 50 CRITICAL\n")
84             print("Created initial config %s" % CONFIG_FILENAME)
85
86     try:
87         config.get('basic_settings', 'file_log_level')
88         config.get('basic_settings', 'console_log_level')
89         config.get('basic_settings', 'imap_server')
90         config.getint('basic_settings', 'tolerance_mins')
91         config.getboolean('basic_settings', 'skip_shared_folders')
92         config.getboolean('basic_settings', 'fallback_to_date_header')
93     except (configparser.NoSectionError, configparser.NoOptionError, ValueError) as ex:
94         print("Could not read config file '%s': %s." % (CONFIG_FILENAME, ex))
95         print("Please change or remove the config file.")
96         sys.exit()
97
98     return config
99
100 def prepare_logger(config):
101     """Sets up the logging functionality"""
102
103     # reset the log
104     with open(LOG_FILENAME, 'w'):
105         pass
106
107     # add basic configuration
108     logging.basicConfig(filename=LOG_FILENAME,
109                         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
110                         level=config.getint('basic_settings', 'file_log_level'))
111
112     # add a handler for a console output
113     console = logging.StreamHandler()
114     console.setLevel(config.getint('basic_settings', 'console_log_level'))
115     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
116     console.setFormatter(formatter)
117     logging.getLogger('').addHandler(console)
118     return
119
120 def synchronize_csv(config, test_mode):
121     """Iterates through csv list of users and synchronizes their messages."""
122
123     # initialize loop permanent data
124     caching_data = CachingData(config.getboolean('basic_settings', 'fallback_to_date_header'))  
125     date_parser = MailDateParser()
126     server = config.get('basic_settings', 'imap_server')
127     tolerance = config.getint('basic_settings', 'tolerance_mins') * 60
128     skip_shared_folders = config.getboolean('basic_settings', 'skip_shared_folders')
129
130     # iterate through the users in the csv data
131     user_reader = csv.DictReader(open(CSV_FILENAME, "r"), delimiter=',')
132     for user in user_reader:
133         try:
134             session = MailIterator(server, user['username'], user['password'], skip_shared_folders)
135         except UserWarning as ex:
136             logging.error(ex)
137             continue
138         for mailbox in session:
139             try:
140                 box = caching_data.retrieve_cached_mailbox(mailbox[0], mailbox[1], user['username'])
141                 mail_ids = session.fetch_messages()
142                 new_ids = box.synchronize(mail_ids, tolerance)
143                 logging.info("%s new messages out of %s found in %s.", len(new_ids), len(mail_ids), box.name)
144             except UserWarning as ex:
145                 logging.error(ex)
146                 continue
147             for mid in new_ids:
148                 try:
149                     fetched_internal_date = session.fetch_internal_date(mid)
150                     internal_date = date_parser.extract_internal_date(fetched_internal_date)
151                     fetched_correct_date = session.fetch_received_date(mid)
152                     correct_date = date_parser.extract_received_date(fetched_correct_date)
153                     # check for empty received headers
154                     if(correct_date == ""):
155                         logging.debug("No received date could be found in message uid: %s - mailbox: %s - user: %s.",
156                                         mid.decode('iso-8859-1'), box.name, box.owner)
157                         box.no_received_field += 1
158                         # correct these messages if required and override received_date from basic date
159                         if(config.getboolean('basic_settings', 'fallback_to_date_header')):
160                             fetched_correct_date = session.fetch_basic_date(mid)
161                             correct_date = date_parser.extract_received_date(fetched_correct_date)
162                             if(correct_date == ""):
163                                 logging.debug("No fallback date header could be found in message uid: %s - mailbox: %s - user: %s.",
164                                                 mid.decode('iso-8859-1'), box.name, box.owner)
165
166                         if(correct_date == ""):
167                             # skip synchronization for this message
168                             continue
169                     else:
170                         # preserve only the first received line as fetched if everything is ok
171                         fetched_correct_date = fetched_correct_date.split("Received:")[1]
172                 except UserWarning as ex:
173                     logging.error(ex)
174                     continue
175                 if(date_parser.compare_dates(correct_date, internal_date, tolerance)):
176                     logging.warning("Date conflict found in message uid: %s - mailbox: %s - user: %s.\nInternal date %s is different from extracted date %s from header:\n%s.",
177                                     mid.decode('iso-8859-1'), box.name, box.owner,
178                                     internal_date.strftime("%d %b %Y %H:%M:%S"),
179                                     correct_date.strftime("%d %b %Y %H:%M:%S"),
180                                     fetched_correct_date)
181                     if(not test_mode):
182                         try:
183                             session.update_message(mid, box.name, correct_date)
184                         except UserWarning as ex:
185                             logging.error(ex)
186                             continue
187
188                     # count total emails for every user and mailbox
189                     box.date_conflicts += 1
190
191             # if all messages were successfully fixed confirm caching
192             if(not test_mode):
193                 box.confirm_change()
194
195     # final report on date conflicts
196     caching_data.report_conflicts()
197     return
198
199 if(__name__ == "__main__"):
200     main()