3 # Copyright (C) 2013 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
24 def split_file(separator, prefix, input_file, new_file_func=None):
26 splits a file when it finds a regexp, including the regexp in the begining
32 sep_len = len(separator)
34 raise Exception("empty separator")
36 if new_file_func is None:
37 new_file_func = lambda prefix, i: open(prefix + str(i), 'w')
39 output = new_file_func(prefix, i)
41 # buffered search. we try not to have the while input file in memory, as
43 with open(input_file, 'r') as f:
45 buf += f.read(BUFSIZE)
49 # split using the separator
50 while separator in buf:
51 idx = buf.index(separator)
54 output.write(buf[0:idx])
57 output = new_file_func(prefix, i)
58 output.write(buf[idx:idx + sep_len])
60 output.write(buf[0:sep_len])
62 buf = buf[idx + sep_len:]
64 # corner case: separator is between this buf and next one. In this
65 # case, we write to current output everything before that and
67 if len(buf) > sep_len and separator[0] in buf[-sep_len:]:
68 output.write(buf[:-sep_len])
72 # else: continue writing to the current output and iterate
78 if __name__ == "__main__":
79 parser = argparse.ArgumentParser()
81 parser.add_argument("-s", "--separator", required=True,
82 help="string for the separator")
83 parser.add_argument("-p", "--prefix", required=True,
84 help="prefix for split files")
85 parser.add_argument("input_file", help="input file")
87 args = parser.parse_args()
88 split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)