7 def split_file(separator, prefix, input_file):
9 splits a file when it finds a regexp, including the regexp in the begining
15 sep_len = len(separator)
17 raise Exception("empty separator")
19 output = open(prefix + str(i), 'w')
21 # buffered search. we try not to have the while input file in memory, as
23 with open(input_file, 'r') as f:
25 buf += f.read(BUFSIZE)
29 # split using the separator
30 while separator in buf:
31 idx = buf.index(separator)
34 output.write(buf[0:idx])
37 output = open(prefix + str(i), 'w')
38 output.write(buf[idx:idx + sep_len])
40 output.write(buf[0:sep_len])
42 buf = buf[idx + sep_len:]
44 # corner case: separator is between this buf and next one. In this
45 # case, we write to current output everything before that and
47 if separator[0] in buf[-sep_len:]:
48 output.write(buf[:-sep_len])
52 # else: continue writing to the current output and iterate
58 if __name__ == "__main__":
59 parser = argparse.ArgumentParser()
61 parser.add_argument("-s", "--separator", required=True,
62 help="string for the separator")
63 parser.add_argument("-p", "--prefix", required=True,
64 help="prefix for split files")
65 parser.add_argument("input_file", help="input file")
67 args = parser.parse_args()
68 split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)