From a793ee30b81fc348b653e0e4c9f00d33969003e7 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 29 Aug 2017 14:45:31 +0200 Subject: [PATCH] lift block alignment requirement for tar archive rescue It is unlikely that damaged archives have correctly aligned tar headers. Thus we need to check each header-like section whether it contains the right magic and the checksum matches. Objects without a correct checksum (which spans the better part of the header) are discarded similar to what file(1) does. --- deltatar/tarfile.py | 26 ++++++++++++++++++++------ testing/test_recover.py | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index b2de2d5..4987d7d 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -3384,6 +3384,7 @@ TAR_FMT_HDR = (# See tar(5): # valid, though the header is not truly POSIX conforming. */ # # +TAR_HDR_OFF_MAGIC = 257 TAR_FMT_OLDGNU_MAGIC = b"ustar " def read_gnu_tar_hdr (data): @@ -3445,6 +3446,14 @@ def read_gnu_tar_hdr (data): } +def tar_hdr_check_chksum (data): + hdr = read_gnu_tar_hdr (data) + if hdr is None: + return False + s = calc_chksums (data) + return nti (hdr ["checksum"]) in s + + def readable_tar_objects_offsets (ifd): """ Traverse blocks in file, trying to extract tar headers. @@ -3452,14 +3461,19 @@ def readable_tar_objects_offsets (ifd): pos = 0 offsets = [] + mm = mmap.mmap(ifd, 0, mmap.MAP_SHARED, mmap.PROT_READ) + pos = TAR_HDR_OFF_MAGIC + while True: - blk = os.read (ifd, BLOCKSIZE) - if len (blk) != BLOCKSIZE: + pos = mm.find (TAR_FMT_OLDGNU_MAGIC, pos) + if pos == -1: break - hdr = read_gnu_tar_hdr (blk) - if hdr is not None: - offsets.append (pos) - pos += BLOCKSIZE + off = pos - TAR_HDR_OFF_MAGIC + mm.seek (off) + blk = mm.read (BLOCKSIZE) + if tar_hdr_check_chksum (blk) is True: + offsets.append (off) + pos += 1 return offsets diff --git a/testing/test_recover.py b/testing/test_recover.py index f2147dd..6e8ab99 100644 --- a/testing/test_recover.py +++ b/testing/test_recover.py @@ -156,7 +156,7 @@ def corrupt_leading_garbage (_, fname, compress, encrypt): assert os.lseek (infd, 0, os.SEEK_SET) == 0 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE, stat.S_IRUSR | stat.S_IWUSR) - junk = os.urandom (512) # tar block sized + junk = os.urandom (42) # write new file with garbage prepended done = 0 -- 1.7.1