7 def split_file(separator, prefix, input_file, new_file_func=None):
9 splits a file when it finds a regexp, including the regexp in the begining
15 sep_len = len(separator)
17 raise Exception("empty separator")
19 if new_file_func is None:
20 new_file_func = lambda prefix, i: open(prefix + str(i), 'w')
22 output = new_file_func(prefix, i)
24 # buffered search. we try not to have the while input file in memory, as
26 with open(input_file, 'r') as f:
28 buf += f.read(BUFSIZE)
32 # split using the separator
33 while separator in buf:
34 idx = buf.index(separator)
37 output.write(buf[0:idx])
40 output = new_file_func(prefix, i)
41 output.write(buf[idx:idx + sep_len])
43 output.write(buf[0:sep_len])
45 buf = buf[idx + sep_len:]
47 # corner case: separator is between this buf and next one. In this
48 # case, we write to current output everything before that and
50 if separator[0] in buf[-sep_len:]:
51 output.write(buf[:-sep_len])
55 # else: continue writing to the current output and iterate
61 if __name__ == "__main__":
62 parser = argparse.ArgumentParser()
64 parser.add_argument("-s", "--separator", required=True,
65 help="string for the separator")
66 parser.add_argument("-p", "--prefix", required=True,
67 help="prefix for split files")
68 parser.add_argument("input_file", help="input file")
70 args = parser.parse_args()
71 split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)