Clean up, remove compat with py < 3.6
[pyi2ncommon] / src / log_read.py
CommitLineData
3237d2a6
CH
1# The software in this package is distributed under the GNU General
2# Public License version 2 (with a special exception described below).
3#
4# A copy of GNU General Public License (GPL) is included in this distribution,
5# in the file COPYING.GPL.
6#
7# As a special exception, if other files instantiate templates or use macros
8# or inline functions from this file, or you compile this file and link it
9# with other works to produce a work based on this file, this file
10# does not by itself cause the resulting work to be covered
11# by the GNU General Public License.
12#
13# However the source code for this file must still be made available
14# in accordance with section (3) of the GNU General Public License.
15#
16# This exception does not invalidate any other reasons why a work based
17# on this file might be covered by the GNU General Public License.
f365f614
CH
18#
19# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
3237d2a6 20
e2e13005
CH
21"""
22
23SUMMARY
24------------------------------------------------------
25Iterative reading of log files, similar to shell command `tail -f`.
26
27Copyright: Intra2net AG
28
29
30CONTENTS
31------------------------------------------------------
e7d49180 32
7c362208 33Basic Functionality (class :py:class:`IterativeReader`):
e7d49180
CH
34Runs stat in a loop to find out whether file size has changed. Then reads the
35new data and forwards that
36
e2e13005
CH
37.. todo:: Want to also use lsof to find out whether file/pipe/socket was
38 closed, so can return from read loop
e7d49180
CH
39
40:py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
41it line-wise as is normal for log files
42
43:py:class:`LogParser` takes those lines and tries to parse them into fields
44like date, time, module name, urgency and message.
45
2713b352 46.. todo:: auto-detect log line layout
e2e13005
CH
47
48
49INTERFACE
50------------------------------------------------------
51
e7d49180
CH
52"""
53
54import os
215a12fd 55import os.path
445afb23 56import re
e7d49180 57from warnings import warn
77a07d09 58import logging
1242b1cf 59from .iter_helpers import zip_longest
1242b1cf 60from .type_helpers import is_str_or_byte, is_file_obj
e7d49180
CH
61
62
63class LogReadWarning(UserWarning):
e2e13005 64 """Warnings issued by classes in this module."""
e7d49180
CH
65 pass
66
67
e2e13005
CH
68def true_func(_):
69 """Replacement for :py:func:`check_is_used`. Returns `True` always."""
e7d49180
CH
70 return True
71
72
e2e13005
CH
73def check_is_used(file_handle):
74 """
75 Check whether file is being written to.
76
77 To be implemented, e.g. using lsof.
e7d49180 78
e2e13005
CH
79 If beneficial could also easily supply python file object as arg.
80
81 :param int file_handle: OS-level file descriptor
e7d49180 82 """
e2e13005 83 raise NotImplementedError(file_handle)
e7d49180
CH
84
85
e2e13005 86#: counter for unknown sources in :py:func:`create_description`
e7d49180
CH
87_create_description_unknown_counter = 0
88
acc472a4 89
3e0b3965 90def create_description(file_obj, file_handle):
e2e13005
CH
91 """
92 Create some description for given file-like object / file descriptor.
e7d49180
CH
93
94 :param file_obj: file-like object
3e0b3965 95 :param int file_handle: os-level file descriptor
e2e13005
CH
96 :returns: Short description for file-like object
97 :rtype: string
e7d49180 98 """
e7d49180
CH
99 global _create_description_unknown_counter
100
101 try:
102 desc = file_obj.name
103 if desc:
104 return desc
105 except AttributeError:
106 pass
107
3e0b3965
CH
108 if file_handle is not None:
109 return 'file{0}'.format(file_handle)
e7d49180
CH
110 else:
111 _create_description_unknown_counter += 1
112 return 'unknown{0}'.format(_create_description_unknown_counter)
113
114
115#: error message for IterativeReader constructor
116_STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
117 'files --> use with open(file_name)!'
118
119
879f0150 120class IterativeReader(object):
e2e13005
CH
121 """
122 Read continuously from a given file.
e7d49180 123
e2e13005
CH
124 Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
125 changed or not; Always reads as much data as possible.
e7d49180 126
e2e13005 127 Does not care about closing files, so does not accept file names.
e7d49180
CH
128
129 This is the base for class :py:class:`LineReader` that just has to
e2e13005 130 implement a different :py:meth:`prepare_result` method.
e7d49180
CH
131 """
132
133 def __init__(self, sources, descs=None, return_when_done=False):
e2e13005
CH
134 """
135 Create a reader; do some basic checks on args.
e7d49180
CH
136
137 :param sources: iterable over sources. Sources can be opened file
138 objects or read-opened os-level file descriptors.
139 Calling code has to ensure they are closed properly, so
140 best use this within a "with open(file_name) as
141 file_handle:"-context. If sources is a single file
142 obj/descriptor, both source and desc will be converted
143 to lists of length 1
144 :param descs: can be anything of same length as sources. If sources is
145 a single source, then descs is also converted to a list
146 of length 1. If not given (i.e. None), will use
147 :py:func:`create_description` to guess descriptions
148 :param bool return_when_done: ignore file_handle if no-one is writing
149 to it any more. Return from iterator when
150 all watched files are done (not
151 implemented yet)
152 :raises: OSError when testing fstat on source
153 """
154 if not sources:
155 raise ValueError('need at least some source!')
156 elif is_str_or_byte(sources):
157 raise ValueError(_STR_ERR.format(sources))
158 elif is_file_obj(sources) or isinstance(sources, int):
159 source_input = [sources, ]
160 desc_input = [descs, ]
161 else:
162 source_input = sources # assume some iterable
163 desc_input = descs
164
165 # now divide sources into os-level file descriptors for os.fstat,
166 # and file objects for read()
167 self.file_objs = []
3e0b3965 168 self.file_handles = [] # file descriptOR, not descriptION
e7d49180
CH
169 for source in source_input:
170 if is_file_obj(source):
171 self.file_objs.append(source)
3e0b3965 172 self.file_handles.append(source.fileno())
e7d49180
CH
173 elif isinstance(source, int):
174 self.file_objs.append(os.fdopen(source))
3e0b3965 175 self.file_handles.append(source)
e7d49180
CH
176 elif is_str_or_byte(source):
177 raise ValueError(_STR_ERR.format(source))
178 else:
179 raise ValueError('source {0} is neither file obj nor file '
180 'descriptor!')
181
d910eba5 182 # try to fstat the new file descriptor just for testing
3e0b3965 183 os.fstat(self.file_handles[-1])
e7d49180
CH
184
185 # guess descriptions if not given
186 if not desc_input:
3e0b3965
CH
187 self.descriptions = [create_description(obj, file_handle)
188 for obj, file_handle
189 in zip(self.file_objs, self.file_handles)]
e7d49180
CH
190 else:
191 try:
192 if len(desc_input) != len(self.file_objs):
193 raise ValueError('need same number of sources and '
194 'descriptions!')
195 except TypeError:
196 pass # desc_input is generator or so
197
198 self.descriptions = []
3e0b3965
CH
199 for obj, file_handle, description in \
200 zip_longest(self.file_objs, self.file_handles, desc_input):
e7d49180
CH
201 if obj is None:
202 raise ValueError('more descriptions than sources!')
203 elif description is None:
204 self.descriptions.append(create_description(obj,
3e0b3965 205 file_handle))
e7d49180
CH
206 else:
207 self.descriptions.append(description)
208
209 self.last_sizes = [0 for _ in self.file_objs]
210 self.ignore = [False for _ in self.file_objs]
211
212 if return_when_done:
3e0b3965 213 self.is_used_func = check_is_used
e7d49180 214 else:
3e0b3965 215 self.is_used_func = true_func
e7d49180 216
3e0b3965
CH
217 for obj, file_handle, description in \
218 zip(self.file_objs, self.file_handles, self.descriptions):
77a07d09
CH
219 logging.debug('log_read initialized with file descriptor {0}, '
220 'file obj {1}, description "{2}"'
3e0b3965 221 .format(file_handle, obj, description))
e7d49180
CH
222
223 def n_sources(self):
e2e13005 224 """Return number of sources given to constructor."""
e7d49180
CH
225 return len(self.file_objs)
226
227 def n_active_sources(self):
e2e13005 228 """Return number of sources we are actually watching."""
e7d49180
CH
229 return len(self.ignore) - sum(self.ignore)
230
231 def __iter__(self):
e2e13005
CH
232 """
233 Continue reading from sources, yield results.
234
235 yields result of :py:meth:`prepare_result`, which depends on what sub
236 class you called this function from.
237 """
e7d49180 238 while True:
3e0b3965
CH
239 for idx, (obj, file_handle, description, last_size, do_ignore) in \
240 enumerate(zip(self.file_objs, self.file_handles,
e7d49180
CH
241 self.descriptions, self.last_sizes,
242 self.ignore)):
e7d49180
CH
243 if do_ignore:
244 continue
245
246 # get new file size
3e0b3965 247 new_size = os.fstat(file_handle).st_size
e7d49180
CH
248
249 # compare to old size
250 if new_size == last_size:
3e0b3965 251 if not self.is_used_func(file_handle):
e7d49180
CH
252 warn('no one is writing to {0} / {1} -- '
253 'stop watching it!'
3e0b3965 254 .format(file_handle, description),
e7d49180 255 category=LogReadWarning)
acc472a4 256 self.ignore[idx] = True
9f2fbfa7 257 else:
ea8b01a3
CH
258 if new_size < last_size: # happened at start of some tests
259 warn('{0} / {1} has become smaller ({2} --> {3})! '
260 .format(obj, description, last_size, new_size)
261 + 'Maybe you are reading from a half-initialized '
262 + 'file?',
9f2fbfa7 263 category=LogReadWarning)
e7d49180
CH
264 try:
265 new_data = obj.read()
266 except OSError as ose: # includes IOErrors
267 warn('io error reading from {0} / {1}: {2})'
268 .format(obj, description, ose),
269 category=LogReadWarning)
9451b2ce
CH
270 new_data = str(ose)
271 except UnicodeDecodeError as ude:
272 warn('unicode error reading from {0} / {1}: {2}'
273 .format(obj, description, ude),
274 category=LogReadWarning)
275 new_data = str(ude)
e7d49180
CH
276
277 # post-processing
278 to_yield = self.prepare_result(description, new_data, idx)
279 for result in to_yield:
280 yield result
281
282 # prepare next iteration
283 self.last_sizes[idx] = new_size
284
285 def prepare_result(self, description, data, idx):
e2e13005
CH
286 """
287 From raw new data create some yield-able results.
e7d49180 288
7628bc48 289 Intended for overwriting in subclasses.
e7d49180 290
e2e13005 291 This function is called from __iter__ for each new data that becomes
e7d49180
CH
292 available. It has to return some iterable whose entries are yielded
293 from iteration over objects of this class.
294
295 This base implementation just returns its input in a list, so new data
7628bc48
CH
296 is yielded from __iter__ as-is. Subclass implementations can also yield
297 tuples.
e2e13005
CH
298
299 :param str description: Description of source of lines, one of
300 :py:data:`self.descriptions`
7628bc48 301 :param str data: Text data read from source
e2e13005 302 :param idx: Index of data source
aef38def
CH
303 :returns: [(description, data, idx], same as input
304 :rtype [(str, str, int)]
e7d49180 305 """
aef38def 306 return [(description, data, idx), ]
e7d49180
CH
307
308
e2e13005 309#: characters to `rstrip()` from end of complete lines
e7d49180
CH
310LINE_SPLITTERS = '\n\r'
311
acc472a4 312
e7d49180 313class LineReader(IterativeReader):
e2e13005
CH
314 """
315 An :py:class:`IterativeReader` that returns new data line-wise.
edd68a74 316
e2e13005 317 This means buffering partial line data.
e7d49180
CH
318 """
319
320 def __init__(self, *args, **kwargs):
e2e13005 321 """Create an :py:class:`IterativeReader and buffers for sources."""
01fe1580 322 super(LineReader, self).__init__(*args, **kwargs)
d910eba5 323 self.line_buffers = ['' for _ in range(self.n_sources())]
e7d49180 324
d910eba5 325 def prepare_result(self, description, new_data, idx):
e2e13005
CH
326 """
327 Take raw new data and split it into lines.
e7d49180 328
e2e13005 329 If line is not complete, then buffer it.
e7d49180 330
e2e13005 331 Args: see super class method :py:meth:`IterativeReader.prepare_result`
aef38def
CH
332 :returns: list of 3-tuples `(description, line, idx)` where
333 `description` and `idx` are same as args, and `line` is
e2e13005 334 without trailing newline characters
aef38def 335 :rtype: [(str, str, int)]
e7d49180 336 """
e7d49180 337 all_data = self.line_buffers[idx] + new_data
d910eba5 338 self.line_buffers[idx] = ''
e7d49180
CH
339 result = []
340 should_be_no_new_lines = False
879f0150 341 for line in all_data.splitlines(True):
e7d49180 342 if line[-1] in LINE_SPLITTERS:
aef38def 343 result.append((description, line.rstrip(LINE_SPLITTERS), idx))
e7d49180 344 elif should_be_no_new_lines:
37288ebb
CH
345 # self-check
346 raise ValueError('Programming error: something went wrong with '
347 'line splitting/buffering.')
e7d49180
CH
348 else:
349 self.line_buffers[idx] = line
350 should_be_no_new_lines = True # (this should be the last)
351
352 return result
353
354
445afb23 355class LogParser(LineReader):
e2e13005
CH
356 """
357 Takes lines from :py:class:`LineReader` and parses their contents.
445afb23 358
e2e13005 359 Requires a pattern for log lines, auto-detection is not implemented yet.
445afb23
CH
360
361 Iteration returns re.match result or -- if matching failed -- the original
e2e13005 362 raw line.
445afb23
CH
363 """
364
365 def __init__(self, log_file, pattern=None):
e2e13005
CH
366 """
367 Create a LogParser.
445afb23
CH
368
369 :param str log_file: name of log file to parse (required!)
370 :param pattern: regexp to split log lines; None (default) to return
371 line as they are
e2e13005 372 :type pattern: str or None (default)
445afb23
CH
373 """
374 super(LogParser, self).__init__(log_file)
375
376 self.pattern = pattern
377
378 def prepare_result(self, *args):
e2e13005
CH
379 """
380 Try parsing lines.
381
382 Args: see super class method :py:meth:`IterativeReader.prepare_result`
aef38def
CH
383 :returns: 3-tuples `(description, line, idx)` where `description` and
384 `idx` are same as input args and `line` is either a
385 :py:class:`re.Match` if line matched :py:data:`self.pattern`
386 or just str if line did not match.
387 :rtype: [(str, :py:class:`re.Match` OR str, int)]
e2e13005 388 """
445afb23 389 # let super class split data into lines
aef38def
CH
390 for description, raw_line, idx in \
391 super(LogParser, self).prepare_result(*args):
445afb23
CH
392 result = re.match(self.pattern, raw_line)
393 if result:
7628bc48 394 return description, result, idx
445afb23 395 else:
7628bc48 396 return description, raw_line, idx