From 155602ef11834102cfe7819aea2ca54da01ecf80 Mon Sep 17 00:00:00 2001 From: Thomas Jarosch Date: Thu, 23 Jun 2016 12:31:11 +0200 Subject: [PATCH] Rename design document so pylint3 doesn't pick it up --- docs/design.py | 254 ------------------------------------------- docs/design_python_code.txt | 254 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+), 254 deletions(-) delete mode 100644 docs/design.py create mode 100644 docs/design_python_code.txt diff --git a/docs/design.py b/docs/design.py deleted file mode 100644 index 070bfef..0000000 --- a/docs/design.py +++ /dev/null @@ -1,254 +0,0 @@ -''' -Backup Files Index format: - * one line per file so that it can be parsed line by line - * it will contain one line per file in the directory, even if the file didn't - change. This way we can restore a diff backup without needing previous diffs. - - -{"type": "python-delta-tar-index", version: "1" } -{"type": "BEGIN-FILE-LIST"} -{"type": "directory", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 0} -{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 0} -{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 56464} -[...] -{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 1, "offset": 0} -{"type": "END-FILE-LIST"} -{"type": "file-list-checksum", "checksum": "4327847432743278943278942" } -(future additional fields) - -This is an extensible format. The first line indicates that this is a -python-delta-tar-index, and the version. Then there's the file list and the -checksum of the file list. After that, nothing else is currently defined but -new extra fields could be defined in the future. - -The items inside of the file list are usually of type "directory" or "file": - * The "path" field of a directory points to the relative path to the backup - directory, for example "mbox/m/marina" if the backup dir is "/var/mail/". - The complete restore path would be "/var/mail/mbox/m/marina". - * The "path" field of a file points to the filename of the file in the - previous directory marker. For example "marina.dat" could be inside - "mbox/m/marina/" and the complete restore path could be - "/var/mail/mbox/m/marina/marina.dat". - * When a file is going to be removed, it will be prepended with "del:/" and - the file will have no offset set. - -DeltaTar proposed backup directory structure is quite simple: - -backups/ -├── backup-2013-07-22-0200/ -│   ├── bfull-2013-07-22-0200.index -│   ├── bfull-2013-07-22-0200-001.tar.gz.aes128 -│   ├── bfull-2013-07-22-0200-002.tar.gz.aes128 -│   └── bfull-2013-07-22-0200-003.tar.gz.aes128 -├── backup-2013-07-22-1400/ -│   ├── bdiff-2013-07-22-1400.index -│   ├── bdiff-2013-07-22-1400-001.tar.gz.aes128 -└── backup-2013-07-23-0200/ -│   ├── bdiff-2013-07-23-0200.index -│   ├── bdiff-2013-07-23-0200-001.tar.gz.aes128 -│   ├── bdiff-2013-07-23-0200-002.tar.gz.aes128 - -''' - - -class DeltaTar(object): - ''' - Backup class used to create backups - ''' - - def __init__(self, excluded_files=[], included_files=[], - filter_func=None, mode="tar", password=None, logger=None, - index_encrypted=True, index_name_func=None, - volume_name_func=None): - ''' - Constructor. Configures the diff engine. - - Parameters: - - excluded_files: list of files to exclude in the index. It can - contain python regular expressions. - - - included_files: list of files to include in the index. It can - contain python regular expressions. If empty, all files in the source - path will be backed up, but of the list is set then only the files - include in the list will be backed up. - - - filter_func: custom filter of files to be backed up. Unused by - default. The function receives a file path and must return a boolean. - - - mode: Mode in which the delta will be created. Accepts the same modes - as our tarfile python library. - - - password: used together with aes modes to encrypt and decrypt backups. - - - logger: python logger object. Not required. - - - index_encrypted: whether the index should be encrypted or not. Only - makes sense to set it as True if mode includes aes128 or aes256. - - - index_name_func: function that sets a custom name for the index file. This - function receives the backup_path and if it's a full backup as arguments - and must return the name of the corresponding index file. Optional, - DeltaTar gives index files a "backup.index" name by default. - - - volume_name_func: function that defines the name of tar volumes. It - receives the backup_path, if it's a full backup and the volume number, - and must return the name for the corresponding volume name. Optional, - DeltaTar has default names for tar volumes. - ''' - pass - - def create_full_backup(self, source_path, backup_path, - max_volume_size=None): - ''' - Creates a full backup. - - Parameters: - - source_path: source path to the directory to back up. - - backup_path: path where the back up will be stored. Backup path will - be created if not existent. - - max_volume_size: maximum volume size. Used to split the backup in - volumes. Optional (won't split in volumes by default). - ''' - pass - - def create_diff_backup(self, source_path, backup_path, previous_index_path, - max_volume_size=None): - ''' - Creates a backup. - - Parameters: - - source_path: source path to the directory to back up. - - backup_path: path where the back up will be stored. Backup path will - be created if not existent. - - previous_index_path: index of the previous backup, needed to know - which files changed since then. - - max_volume_size: maximum volume size in megabytes (MB). Used to split - the backup in volumes. Optional (won't split in volumes by default). - - restore_callback: callback function to be called during restore. - This is passed to the helper and gets called for every file. - ''' - pass - - def restore_backup(self, target_path, backup_indexes_paths=[], - backup_tar_path=None, restore_callback=None): - ''' - Restores a backup. - - Parameters: - - backup_path: path where the back up will is stored. - - target_path: path to restore. - - backup_indexes_paths: path to backup indexes, in descending date order. - The indexes indicate the location of their respective backup volumes, - and multiple indexes are needed to be able to restore diff backups. - Note that this is an optional parameter: if not suplied, it will - try to restore directly from backup_tar_path. - - backup_tar_path: path to the backup tar file. Used as an alternative - to backup_indexes_paths to restore directly from a tar file without - using any file index. If it's a multivol tarfile, volume_name_func - will be called. - ''' - pass - - -class TestDeltaTar(UnitTest): - ''' - This is an example of how DeltaTar class could be used - ''' - def test_create(self): - import os - from deltatar import DeltaTar - - def index_name_func(backup_path, is_full): - prefix = "bfull" if is_full else "bdiff" - # get the name and remove backup- - basename = os.path.basename(backup_path)[7:] - - return "%s-%s.index" % (prefix, basename) - - def volume_name_func(backup_path, is_full, volume_number): - ''' - Handles the new volumes - ''' - prefix = "bfull" if is_full else "bdiff" - # get the name and remove backup- - basename = os.path.basename(backup_path)[7:] - - return "%s-%s-%03d.tar.gz.aes128" % (prefix, basename, volume_number) - - - # constructor of DeltaTar class allows to set the configuration - deltatar = DeltaTar( - # these options are the same as in tarfile: - mode="tar#gz.aes128", - max_volume_size=100, # 100MB - index_name_func=index_name_func, # optional - volume_name_func=volume_name_func # optional - ) - - # create first backup - deltatar.create_full_backup( - source_path="/path/to/important/dir", - backup_path="/var/backups/backup-2013-07-22-0200") - - # here: change some files - - # create second backup - deltatar.create_diff_backup( - source_path="/path/to/important/dir", - backup_path="/var/backups/backup-2013-07-22-1400", - previous_index_path="/var/backups/backup-2013-07-22-0200/bfull-2013-07-22-0200.index") - - # restore backup in another dir. it will restore last version - deltatar.restore_backup(target_path="/path/to/second/dir", - backup_indexes_paths=[ - "/var/backups/backup-2013-07-22-1400/bfull-2013-07-22-1400.index", - "/var/backups/backup-2013-07-22-0200/bfull-2013-07-22-0200.index" - ]) - -''' - - -Each step will include a comprehensive list of unit tests for the developed -features, pydoc documentation and email updates/reviews. - -1. Initial simple implementation of full backup (7 hours, already done) - - * It must be able to create a full backup and restore it. - * It will create the file index but will only use backup_tar_path option to - restore (no index). - * It will support the options: mode, password, index_encrypted, - index_name_func, volume_name_func, max_volume_size. The other options will be - ignored. - -2. Restore from file index a full backup (5 hours) - - * It'll be able to read a file index and restore a backup from it. - * It'll also support the logger option. - -3. Include and exclude filters (5 hours) - - * It'll support the include_files, exclude_files and filter_func for both - creating and restoring full backups. - -4. Create diff backup (8 hours) - - * It'll support to create a diff backup upon an existing full backup. This will - be implemented in a performant way, we'll take a look at duplicity for ideas. - * It'll be able to restore a diff backup without using the index, just applying - the delta-tar. - -5. Restore diff backup (10 hours) - - * It'll be able to restore a diff backup using the index, applying an efficient - restore algorithm. - -6. Polishing and corner cases (12 hours) - - * Review the existing features looking for possible bugs. Implement missing - corner cases and unit tests, for example support for diff backup chains. - * Benchmark agains other tools like duplicity in different scenarios to check - that our performance is good. - -Total estimation: 47 hours - -''' \ No newline at end of file diff --git a/docs/design_python_code.txt b/docs/design_python_code.txt new file mode 100644 index 0000000..070bfef --- /dev/null +++ b/docs/design_python_code.txt @@ -0,0 +1,254 @@ +''' +Backup Files Index format: + * one line per file so that it can be parsed line by line + * it will contain one line per file in the directory, even if the file didn't + change. This way we can restore a diff backup without needing previous diffs. + + +{"type": "python-delta-tar-index", version: "1" } +{"type": "BEGIN-FILE-LIST"} +{"type": "directory", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 0} +{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 0} +{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 0, "offset": 56464} +[...] +{"type": "file", "path": value, "mode": value, "mtime": value, "ctime": value, "uid": value, "gid": value, "inode": value, "size": value, "volume": 1, "offset": 0} +{"type": "END-FILE-LIST"} +{"type": "file-list-checksum", "checksum": "4327847432743278943278942" } +(future additional fields) + +This is an extensible format. The first line indicates that this is a +python-delta-tar-index, and the version. Then there's the file list and the +checksum of the file list. After that, nothing else is currently defined but +new extra fields could be defined in the future. + +The items inside of the file list are usually of type "directory" or "file": + * The "path" field of a directory points to the relative path to the backup + directory, for example "mbox/m/marina" if the backup dir is "/var/mail/". + The complete restore path would be "/var/mail/mbox/m/marina". + * The "path" field of a file points to the filename of the file in the + previous directory marker. For example "marina.dat" could be inside + "mbox/m/marina/" and the complete restore path could be + "/var/mail/mbox/m/marina/marina.dat". + * When a file is going to be removed, it will be prepended with "del:/" and + the file will have no offset set. + +DeltaTar proposed backup directory structure is quite simple: + +backups/ +├── backup-2013-07-22-0200/ +│   ├── bfull-2013-07-22-0200.index +│   ├── bfull-2013-07-22-0200-001.tar.gz.aes128 +│   ├── bfull-2013-07-22-0200-002.tar.gz.aes128 +│   └── bfull-2013-07-22-0200-003.tar.gz.aes128 +├── backup-2013-07-22-1400/ +│   ├── bdiff-2013-07-22-1400.index +│   ├── bdiff-2013-07-22-1400-001.tar.gz.aes128 +└── backup-2013-07-23-0200/ +│   ├── bdiff-2013-07-23-0200.index +│   ├── bdiff-2013-07-23-0200-001.tar.gz.aes128 +│   ├── bdiff-2013-07-23-0200-002.tar.gz.aes128 + +''' + + +class DeltaTar(object): + ''' + Backup class used to create backups + ''' + + def __init__(self, excluded_files=[], included_files=[], + filter_func=None, mode="tar", password=None, logger=None, + index_encrypted=True, index_name_func=None, + volume_name_func=None): + ''' + Constructor. Configures the diff engine. + + Parameters: + - excluded_files: list of files to exclude in the index. It can + contain python regular expressions. + + - included_files: list of files to include in the index. It can + contain python regular expressions. If empty, all files in the source + path will be backed up, but of the list is set then only the files + include in the list will be backed up. + + - filter_func: custom filter of files to be backed up. Unused by + default. The function receives a file path and must return a boolean. + + - mode: Mode in which the delta will be created. Accepts the same modes + as our tarfile python library. + + - password: used together with aes modes to encrypt and decrypt backups. + + - logger: python logger object. Not required. + + - index_encrypted: whether the index should be encrypted or not. Only + makes sense to set it as True if mode includes aes128 or aes256. + + - index_name_func: function that sets a custom name for the index file. This + function receives the backup_path and if it's a full backup as arguments + and must return the name of the corresponding index file. Optional, + DeltaTar gives index files a "backup.index" name by default. + + - volume_name_func: function that defines the name of tar volumes. It + receives the backup_path, if it's a full backup and the volume number, + and must return the name for the corresponding volume name. Optional, + DeltaTar has default names for tar volumes. + ''' + pass + + def create_full_backup(self, source_path, backup_path, + max_volume_size=None): + ''' + Creates a full backup. + + Parameters: + - source_path: source path to the directory to back up. + - backup_path: path where the back up will be stored. Backup path will + be created if not existent. + - max_volume_size: maximum volume size. Used to split the backup in + volumes. Optional (won't split in volumes by default). + ''' + pass + + def create_diff_backup(self, source_path, backup_path, previous_index_path, + max_volume_size=None): + ''' + Creates a backup. + + Parameters: + - source_path: source path to the directory to back up. + - backup_path: path where the back up will be stored. Backup path will + be created if not existent. + - previous_index_path: index of the previous backup, needed to know + which files changed since then. + - max_volume_size: maximum volume size in megabytes (MB). Used to split + the backup in volumes. Optional (won't split in volumes by default). + - restore_callback: callback function to be called during restore. + This is passed to the helper and gets called for every file. + ''' + pass + + def restore_backup(self, target_path, backup_indexes_paths=[], + backup_tar_path=None, restore_callback=None): + ''' + Restores a backup. + + Parameters: + - backup_path: path where the back up will is stored. + - target_path: path to restore. + - backup_indexes_paths: path to backup indexes, in descending date order. + The indexes indicate the location of their respective backup volumes, + and multiple indexes are needed to be able to restore diff backups. + Note that this is an optional parameter: if not suplied, it will + try to restore directly from backup_tar_path. + - backup_tar_path: path to the backup tar file. Used as an alternative + to backup_indexes_paths to restore directly from a tar file without + using any file index. If it's a multivol tarfile, volume_name_func + will be called. + ''' + pass + + +class TestDeltaTar(UnitTest): + ''' + This is an example of how DeltaTar class could be used + ''' + def test_create(self): + import os + from deltatar import DeltaTar + + def index_name_func(backup_path, is_full): + prefix = "bfull" if is_full else "bdiff" + # get the name and remove backup- + basename = os.path.basename(backup_path)[7:] + + return "%s-%s.index" % (prefix, basename) + + def volume_name_func(backup_path, is_full, volume_number): + ''' + Handles the new volumes + ''' + prefix = "bfull" if is_full else "bdiff" + # get the name and remove backup- + basename = os.path.basename(backup_path)[7:] + + return "%s-%s-%03d.tar.gz.aes128" % (prefix, basename, volume_number) + + + # constructor of DeltaTar class allows to set the configuration + deltatar = DeltaTar( + # these options are the same as in tarfile: + mode="tar#gz.aes128", + max_volume_size=100, # 100MB + index_name_func=index_name_func, # optional + volume_name_func=volume_name_func # optional + ) + + # create first backup + deltatar.create_full_backup( + source_path="/path/to/important/dir", + backup_path="/var/backups/backup-2013-07-22-0200") + + # here: change some files + + # create second backup + deltatar.create_diff_backup( + source_path="/path/to/important/dir", + backup_path="/var/backups/backup-2013-07-22-1400", + previous_index_path="/var/backups/backup-2013-07-22-0200/bfull-2013-07-22-0200.index") + + # restore backup in another dir. it will restore last version + deltatar.restore_backup(target_path="/path/to/second/dir", + backup_indexes_paths=[ + "/var/backups/backup-2013-07-22-1400/bfull-2013-07-22-1400.index", + "/var/backups/backup-2013-07-22-0200/bfull-2013-07-22-0200.index" + ]) + +''' + + +Each step will include a comprehensive list of unit tests for the developed +features, pydoc documentation and email updates/reviews. + +1. Initial simple implementation of full backup (7 hours, already done) + + * It must be able to create a full backup and restore it. + * It will create the file index but will only use backup_tar_path option to + restore (no index). + * It will support the options: mode, password, index_encrypted, + index_name_func, volume_name_func, max_volume_size. The other options will be + ignored. + +2. Restore from file index a full backup (5 hours) + + * It'll be able to read a file index and restore a backup from it. + * It'll also support the logger option. + +3. Include and exclude filters (5 hours) + + * It'll support the include_files, exclude_files and filter_func for both + creating and restoring full backups. + +4. Create diff backup (8 hours) + + * It'll support to create a diff backup upon an existing full backup. This will + be implemented in a performant way, we'll take a look at duplicity for ideas. + * It'll be able to restore a diff backup without using the index, just applying + the delta-tar. + +5. Restore diff backup (10 hours) + + * It'll be able to restore a diff backup using the index, applying an efficient + restore algorithm. + +6. Polishing and corner cases (12 hours) + + * Review the existing features looking for possible bugs. Implement missing + corner cases and unit tests, for example support for diff backup chains. + * Benchmark agains other tools like duplicity in different scenarios to check + that our performance is good. + +Total estimation: 47 hours + +''' \ No newline at end of file -- 1.7.1