adding rescue file splitter and some unit tests using it
[python-delta-tar] / filesplit.py
CommitLineData
3759f796
ERE
1#!/usr/bin/env python
2
3import argparse
4
5BUFSIZE = 16 * 1024
6
7def split_file(separator, prefix, input_file):
8 '''
9 splits a file when it finds a regexp, including the regexp in the begining
10 of the new file
11 '''
12 i = 0
13 pos = 0
14 buf = ""
15 sep_len = len(separator)
16 if sep_len == 0:
17 raise Exception("empty separator")
18
19 output = open(prefix + str(i), 'w')
20
21 # buffered search. we try not to have the while input file in memory, as
22 # it's not needed
23 with open(input_file, 'r') as f:
24 while True:
25 buf += f.read(BUFSIZE)
26 if len(buf) == 0:
27 break
28
29 # split using the separator
30 while separator in buf:
31 idx = buf.index(separator)
32
33 if idx > 0:
34 output.write(buf[0:idx])
35 output.close()
36 i += 1
37 output = open(prefix + str(i), 'w')
38 output.write(buf[idx:idx + sep_len])
39 else:
40 output.write(buf[0:sep_len])
41
42 buf = buf[idx + sep_len:]
43
44 # corner case: separator is between this buf and next one. In this
45 # case, we write to current output everything before that and
46 # iterate
47 if separator[0] in buf[-sep_len:]:
48 output.write(buf[:-sep_len])
49 buf = buf[-sep_len:]
50 continue
51
52 # else: continue writing to the current output and iterate
53 output.write(buf)
54 buf = ""
55
56 output.close()
57
58if __name__ == "__main__":
59 parser = argparse.ArgumentParser()
60
61 parser.add_argument("-s", "--separator", required=True,
62 help="string for the separator")
63 parser.add_argument("-p", "--prefix", required=True,
64 help="prefix for split files")
65 parser.add_argument("input_file", help="input file")
66
67 args = parser.parse_args()
68 split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)