Closing the tarfile after an unreadable object was encountered
causes the stream to be reopened for the next read. Otherwise,
the corrupt object is already buffered and tarfile would continue
to seek inside the bad data.
# seek tarfile if needed
offset = file_data.get('offset', -1)
if index_data['tarobj']:
- member = index_data['tarobj'].__iter__().__next__()
+ try:
+ member = index_data['tarobj'].__iter__().__next__()
+ except tarfile.ReadError as exn:
+ # Possibly corrupted archive; may still be recoverable
+ # if offsets did not change.
+ index_data['tarobj'].close()
+ index_data['tarobj'] = None
+ raise
if not member or member.path != file_data['path']:
# force a seek and reopen
index_data['tarobj'].close()