bpo-32713: Fix tarfile.itn for large/negative float values. (GH-5434)
[python-delta-tar] / filesplit.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
3759f796 2
866c42e6
DGM
3# Copyright (C) 2013 Intra2net AG
4#
494b38aa
DGM
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
866c42e6
DGM
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
494b38aa 13# GNU Lesser General Public License for more details.
866c42e6
DGM
14#
15# You should have received a copy of the GNU General Public License
494b38aa
DGM
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
866c42e6
DGM
18
19
3759f796
ERE
20import argparse
21
22BUFSIZE = 16 * 1024
23
0112ba0d 24def split_file(separator, prefix, input_file, new_file_func=None):
3759f796
ERE
25 '''
26 splits a file when it finds a regexp, including the regexp in the begining
27 of the new file
28 '''
29 i = 0
30 pos = 0
be60ffd0 31 buf = b""
3759f796
ERE
32 sep_len = len(separator)
33 if sep_len == 0:
34 raise Exception("empty separator")
35
0112ba0d 36 if new_file_func is None:
be60ffd0 37 new_file_func = lambda prefix, i: open(prefix + str(i), 'wb')
0112ba0d
ERE
38
39 output = new_file_func(prefix, i)
3759f796
ERE
40
41 # buffered search. we try not to have the while input file in memory, as
42 # it's not needed
be60ffd0 43 with open(input_file, 'rb') as f:
3759f796
ERE
44 while True:
45 buf += f.read(BUFSIZE)
46 if len(buf) == 0:
47 break
48
49 # split using the separator
50 while separator in buf:
51 idx = buf.index(separator)
52
53 if idx > 0:
54 output.write(buf[0:idx])
55 output.close()
56 i += 1
0112ba0d 57 output = new_file_func(prefix, i)
3759f796
ERE
58 output.write(buf[idx:idx + sep_len])
59 else:
60 output.write(buf[0:sep_len])
61
62 buf = buf[idx + sep_len:]
63
64 # corner case: separator is between this buf and next one. In this
65 # case, we write to current output everything before that and
66 # iterate
a6210a35 67 if len(buf) > sep_len and separator[0] in buf[-sep_len:]:
3759f796
ERE
68 output.write(buf[:-sep_len])
69 buf = buf[-sep_len:]
70 continue
71
72 # else: continue writing to the current output and iterate
73 output.write(buf)
be60ffd0 74 buf = b""
3759f796
ERE
75
76 output.close()
77
67389434 78def chunk_file(input_file, output_file, from_pos, to_pos):
be60ffd0
ERE
79 ifl = open(input_file, 'rb')
80 ofl = open(output_file, 'wb')
67389434
ERE
81
82 ifl.seek(from_pos)
83 ofl.write(ifl.read(to_pos-from_pos))
84 ifl.close()
85 ofl.close()
86
3759f796
ERE
87if __name__ == "__main__":
88 parser = argparse.ArgumentParser()
89
67389434
ERE
90 parser.add_argument("-s", "--separator", help="string for the separator")
91 parser.add_argument("-p", "--prefix", help="prefix for split files")
3759f796 92 parser.add_argument("input_file", help="input file")
67389434
ERE
93 parser.add_argument("-f", "--from-pos", type=int, default=-1)
94 parser.add_argument("-t", "--to-pos", type=int, default=-1)
95 parser.add_argument("-o", "--output", default=None)
3759f796
ERE
96
97 args = parser.parse_args()
67389434
ERE
98 if args.from_pos > -1 and args.to_pos > -1:
99 chunk_file(input_file=args.input_file, output_file=args.output,
100 from_pos=args.from_pos, to_pos=args.to_pos)
101 else:
c7609167
ERE
102 split_file(separator=args.separator.encode('UTF-8', errors="surrogateescape"),
103 prefix=args.prefix, input_file=args.input_file)