Basic refactoring and Tom's recommendations
[imap-fix-internaldate] / caching_data.py
index ea3fb8c..e7c5f9e 100644 (file)
@@ -14,82 +14,90 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 '''
-
+import os, tempfile
 import pickle
+from mailbox_state import MailboxState
+
+CACHING_FILENAME = "caching_data.dat"
 
 class CachingData:
     """This class is responsible for the caching of data."""
+    
+    # class attributes
+    # integer for version of the cache
+    version = None
+    # dictionary of usernames as keys and dictionaries as values
+    # the second dictionaries have unique mailbox keys and mailboxes as values
+    data = None
 
     def __init__(self):
         try:
-            cachingfile = open('caching_data.pkl', 'rb')
-            self.data = pickle.load(cachingfile)
-            #print(len(self.data), "users found.")
-            self.save_flag = {}
-            for user in self.data:
-                self.save_flag[user] = {}
-                for uid_key in self.data[user]:
-                    self.save_flag[user][uid_key] = False
+            cachingfile = open(CACHING_FILENAME, 'rb')
+            self.version, self.data = pickle.load(cachingfile)
+            print("Cache version", self.version)
+            print(len(self.data), "users found.")
         except IOError:
+            self.version = 0
             self.data = {}
-            self.save_flag = {}
-            with open('caching_data.pkl', 'wb') as cachingfile:
-                pickle.dump(self.data, cachingfile)
+            with open(CACHING_FILENAME, 'wb') as cachingfile:
+                pickle.dump((0, self.data), cachingfile)
 
     def __del__(self):
-        with open('caching_data.pkl', 'wb') as cachingfile:
-            # prepare data based on a save flag
-            for user in self.save_flag:
-                for uid_key in self.save_flag[user]:
-                    if(not self.save_flag[user][uid_key]):
-                        del self.data[user][uid_key]
-                        #print(uidvalidity, "deleted from cache.")
-                if(len(self.data[user])==0):
-                    del self.data[user]
-                    #print(user, "deleted from cache.")
-
-            # serialize in file
-            pickle.dump(self.data, cachingfile)
-
-        #print(len(self.data), "users stored.")
-    
-    def _cache_new_mailbox(self, username, uid_key):
-        """Store the mailbox as integer uidvalidity"""
-        if(username not in self.data):
-            self.data[username] = {}
-            self.save_flag[username] = {}
-            #print(username, "created.")
-        if(uid_key not in self.data[username]):
-            self.data[username][uid_key] = []
-            self.save_flag[username][uid_key] = False
-            #print(uid_key, "created.")
-        return
-
-    def sync_cached_mailbox(self, username, uid_key, list_ids):
-        """Adds new messages to the cache and returns a list of them.
-        Confirm the changes to a mailbox to finally save it."""
-        new_ids = []
+        # create temporary file first
+        location = os.path.dirname(CACHING_FILENAME)    
+        file_descriptor, tmpname = tempfile.mkstemp(dir=location)
+        cachingfile = os.fdopen(file_descriptor, 'wb')
 
-        if(username not in self.data or \
-           uid_key not in self.data[username]):
-            self._cache_new_mailbox(username, uid_key)
-            new_ids = list_ids
-        else:
-            for uid in list_ids:
-                try:
-                    self.data[username][uid_key].index(uid)
-                    #print("found", uid, uid_key)
-                except ValueError:
-                    #print("new", uid, uid_key)
-                    new_ids.append(uid)
+        # prepare data based on a save flag
+        saved_data = {}
+        for user in self.data:
+            saved_data[user] = {}
+            for box_key in self.data[user]:
+                if(self.data[user][box_key].needs_save):
+                    saved_data[user][box_key] = self.data[user][box_key]
+                    print(saved_data[user][box_key].name, "will be saved.")
+            if(len(saved_data[user])==0):
+                del saved_data[user]
+                print(user, "will not be saved.")
+        self.data = saved_data
 
-        # update cached_mailbox
-        self.data[username][uid_key] = list_ids
+        # avoid test mode or cases where nothing needs saving
+        if(len(saved_data)==0):
+            os.unlink(tmpname)
+            return
+        # serialize in file
+        self.version += 1
+        pickle.dump((self.version, self.data), cachingfile)
+        print(len(self.data), "users stored.")
+        cachingfile.close()
+        os.rename(tmpname, CACHING_FILENAME)
 
-        return new_ids
-
-    def commit_cached_mailbox(self, username, uid_key):
-        """Confirm the chages to the cached mailbox."""
-        self.save_flag[username][uid_key] = True
-        #print(username, uid_key, "committed.") 
+    def retrieve_cached_mailbox(self, name, uidvalidity, user):
+        """Retrieve a cached mailbox or create it."""
+        box_key = name.strip('"') + uidvalidity
+        if(user not in self.data):
+            self.data[user] = {}
+            #print(user, "created.")
+        if(box_key not in self.data[user]):
+            self.data[user][box_key] = MailboxState(name, uidvalidity, user)
+            #print(box_key, "created.")
+        return self.data[user][box_key]
+    
+    def report_date_conflicts(self):
+        """Write a date conflicts report in a file."""
+        with open("conflict_stats.txt", 'w') as statsfile:
+            owner_total_conflicts = {}
+            owner_total_missing = {}
+            for user in self.data:
+                owner_total_conflicts[user] = 0
+                owner_total_missing[user] = 0
+                for box_key in self.data[user]:
+                    owner_total_conflicts[user] += self.data[user][box_key].date_conflicts
+                    owner_total_missing[user] += self.data[user][box_key].no_received_field
+                    statsfile.write("Total date conflicts to be corrected in a mailbox {0} are {1}.\n"\
+                                    .format(self.data[user][box_key].name, self.data[user][box_key].date_conflicts))
+                    statsfile.write("Total missing received headers in a mailbox {0} are {1}.\n"\
+                                    .format(self.data[user][box_key].name, self.data[user][box_key].no_received_field))
+                statsfile.write("Total date conflicts to be corrected for user {0} are {1}.\n\n"\
+                                .format(user, owner_total_missing[user]))
         return