Fixed encryption pad treatment using random pad
[python-delta-tar] / filesplit.py
CommitLineData
3759f796
ERE
1#!/usr/bin/env python
2
866c42e6
DGM
3# Copyright (C) 2013 Intra2net AG
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18
3759f796
ERE
19import argparse
20
21BUFSIZE = 16 * 1024
22
0112ba0d 23def split_file(separator, prefix, input_file, new_file_func=None):
3759f796
ERE
24 '''
25 splits a file when it finds a regexp, including the regexp in the begining
26 of the new file
27 '''
28 i = 0
29 pos = 0
30 buf = ""
31 sep_len = len(separator)
32 if sep_len == 0:
33 raise Exception("empty separator")
34
0112ba0d
ERE
35 if new_file_func is None:
36 new_file_func = lambda prefix, i: open(prefix + str(i), 'w')
37
38 output = new_file_func(prefix, i)
3759f796
ERE
39
40 # buffered search. we try not to have the while input file in memory, as
41 # it's not needed
42 with open(input_file, 'r') as f:
43 while True:
44 buf += f.read(BUFSIZE)
45 if len(buf) == 0:
46 break
47
48 # split using the separator
49 while separator in buf:
50 idx = buf.index(separator)
51
52 if idx > 0:
53 output.write(buf[0:idx])
54 output.close()
55 i += 1
0112ba0d 56 output = new_file_func(prefix, i)
3759f796
ERE
57 output.write(buf[idx:idx + sep_len])
58 else:
59 output.write(buf[0:sep_len])
60
61 buf = buf[idx + sep_len:]
62
63 # corner case: separator is between this buf and next one. In this
64 # case, we write to current output everything before that and
65 # iterate
a6210a35 66 if len(buf) > sep_len and separator[0] in buf[-sep_len:]:
3759f796
ERE
67 output.write(buf[:-sep_len])
68 buf = buf[-sep_len:]
69 continue
70
71 # else: continue writing to the current output and iterate
72 output.write(buf)
73 buf = ""
74
75 output.close()
76
77if __name__ == "__main__":
78 parser = argparse.ArgumentParser()
79
80 parser.add_argument("-s", "--separator", required=True,
81 help="string for the separator")
82 parser.add_argument("-p", "--prefix", required=True,
83 help="prefix for split files")
84 parser.add_argument("input_file", help="input file")
85
86 args = parser.parse_args()
87 split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)