From: Thomas Jarosch Date: Thu, 23 Jun 2016 08:08:16 +0000 (+0200) Subject: Implement cache for pwd.getpwuid() and grp.getgrgid() X-Git-Tag: v2.2~20 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=9ef1fb87c299841d91266bb9499d27f9bf0cfc8f;p=python-delta-tar Implement cache for pwd.getpwuid() and grp.getgrgid() Those functions always parse /etc/passwd and we look up the owner for each file we backup. This change is only relevant when creating full backups. Speed up with ~1.000.000 emails is 11%. --- diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index f63b6ad..8ff3d9d 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -1747,6 +1747,9 @@ class TarFile(object): # if you manage lots of files and don't want # to have high memory usage + cache_uid2user = {} # cache to avoid getpwuid calls. It always parses /etc/passwd. + cache_gid2group = {} # same cache for groups + def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, @@ -2256,15 +2259,27 @@ class TarFile(object): tarinfo.type = type tarinfo.linkname = linkname if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass + if tarinfo.uid in self.cache_uid2user: + tarinfo.uname = self.cache_uid2user[tarinfo.uid] + else: + try: + tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] + self.cache_uid2user[tarinfo.uid] = tarinfo.uname + except KeyError: + # remember user does not exist: + # same default value as in tarinfo class + self.cache_uid2user[tarinfo.uid] = "" if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass + if tarinfo.gid in self.cache_gid2group: + tarinfo.gname = self.cache_gid2group[tarinfo.gid] + else: + try: + tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] + self.cache_gid2group[tarinfo.gid] = tarinfo.gname + except KeyError: + # remember group does not exist: + # same default value as in tarinfo class + self.cache_gid2group[tarinfo.gid] = "" if type in (CHRTYPE, BLKTYPE): if hasattr(os, "major") and hasattr(os, "minor"):