1 # The software in this package is distributed under the GNU General
2 # Public License version 2 (with a special exception described below).
4 # A copy of GNU General Public License (GPL) is included in this distribution,
5 # in the file COPYING.GPL.
7 # As a special exception, if other files instantiate templates or use macros
8 # or inline functions from this file, or you compile this file and link it
9 # with other works to produce a work based on this file, this file
10 # does not by itself cause the resulting work to be covered
11 # by the GNU General Public License.
13 # However the source code for this file must still be made available
14 # in accordance with section (3) of the GNU General Public License.
16 # This exception does not invalidate any other reasons why a work based
17 # on this file might be covered by the GNU General Public License.
19 # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
24 ------------------------------------------------------
25 Iterative reading of log files, similar to shell command `tail -f`.
27 Copyright: Intra2net AG
31 ------------------------------------------------------
33 Basic Functionality (class :py:class:`IterativeReader`):
34 Runs stat in a loop to find out whether file size has changed. Then reads the
35 new data and forwards that
37 .. todo:: Want to also use lsof to find out whether file/pipe/socket was
38 closed, so can automatically return from read loop.
40 :py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
41 it line-wise as is normal for log files
43 :py:class:`LogParser` takes those lines and tries to parse them into fields
44 like date, time, module name, urgency and message.
46 .. todo:: auto-detect log line layout
50 ------------------------------------------------------
57 from warnings import warn
59 from .iter_helpers import zip_longest
60 from .type_helpers import is_str_or_byte, is_file_obj
63 class LogReadWarning(UserWarning):
64 """Warnings issued by classes in this module."""
69 """Replacement for :py:func:`check_is_used`. Returns `True` always."""
74 """Replacement for :py:func:`check_is_used`. Returns `False` always."""
78 def check_is_used(file_handle):
80 Check whether file is being written to.
82 To be implemented, e.g. using lsof.
84 If beneficial could also easily supply python file object as arg.
86 :param int file_handle: OS-level file descriptor
88 raise NotImplementedError(file_handle)
91 #: counter for unknown sources in :py:func:`create_description`
92 _create_description_unknown_counter = 0
95 def create_description(file_obj, file_handle):
97 Create some description for given file-like object / file descriptor.
99 :param file_obj: file-like object
100 :param int file_handle: os-level file descriptor
101 :returns: Short description for file-like object
104 global _create_description_unknown_counter
110 except AttributeError:
113 if file_handle is not None:
114 return 'file{0}'.format(file_handle)
116 _create_description_unknown_counter += 1
117 return 'unknown{0}'.format(_create_description_unknown_counter)
120 #: error message for IterativeReader constructor
121 _STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
122 'files --> use with open(file_name)!'
125 class IterativeReader(object):
127 Read continuously from a given file.
129 Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
130 changed or not; Always reads as much data as possible.
132 Does not care about closing files, so does not accept file names.
134 This is the base for class :py:class:`LineReader` that just has to
135 implement a different :py:meth:`prepare_result` method.
138 def __init__(self, sources, descs=None, keep_watching=False):
140 Create a reader; do some basic checks on args.
142 :param sources: iterable over sources. Sources can be opened file
143 objects or read-opened os-level file descriptors.
144 Calling code has to ensure they are closed properly, so
145 best use this within a "with open(file_name) as
146 file_handle:"-context. If sources is a single file
147 obj/descriptor, both source and desc will be converted
149 :param descs: can be anything of same length as sources. If sources is
150 a single source, then descs is also converted to a list
151 of length 1. If not given (i.e. None), will use
152 :py:func:`create_description` to guess descriptions
153 :param bool keep_watching: keep watching file that is not changing in
154 size. Need to manually tell whether file
155 is being written to or not since auto-detect
156 is not implemented yet.
157 :raises: OSError when testing fstat on source
160 raise ValueError('need at least some source!')
161 elif is_str_or_byte(sources):
162 raise ValueError(_STR_ERR.format(sources))
163 elif is_file_obj(sources) or isinstance(sources, int):
164 source_input = [sources, ]
165 desc_input = [descs, ]
167 source_input = sources # assume some iterable
170 # now divide sources into os-level file descriptors for os.fstat,
171 # and file objects for read()
173 self.file_handles = [] # file descriptOR, not descriptION
174 for source in source_input:
175 if is_file_obj(source):
176 self.file_objs.append(source)
177 self.file_handles.append(source.fileno())
178 elif isinstance(source, int):
179 self.file_objs.append(os.fdopen(source))
180 self.file_handles.append(source)
181 elif is_str_or_byte(source):
182 raise ValueError(_STR_ERR.format(source))
184 raise ValueError('source {0} is neither file obj nor file '
187 # try to fstat the new file descriptor just for testing
188 os.fstat(self.file_handles[-1])
190 # guess descriptions if not given
192 self.descriptions = [create_description(obj, file_handle)
194 in zip(self.file_objs, self.file_handles)]
197 if len(desc_input) != len(self.file_objs):
198 raise ValueError('need same number of sources and '
201 pass # desc_input is generator or so
203 self.descriptions = []
204 for obj, file_handle, description in \
205 zip_longest(self.file_objs, self.file_handles, desc_input):
207 raise ValueError('more descriptions than sources!')
208 elif description is None:
209 self.descriptions.append(create_description(obj,
212 self.descriptions.append(description)
214 self.last_sizes = [0 for _ in self.file_objs]
215 self.ignore = [False for _ in self.file_objs]
218 self.is_used_func = true_func
220 self.is_used_func = false_func
221 # use some day: self.is_used_func = check_is_used
223 for obj, file_handle, description in \
224 zip(self.file_objs, self.file_handles, self.descriptions):
225 logging.debug('log_read initialized with file descriptor {0}, '
226 'file obj {1}, description "{2}"'
227 .format(file_handle, obj, description))
230 """Return number of sources given to constructor."""
231 return len(self.file_objs)
233 def n_active_sources(self):
234 """Return number of sources we are actually watching."""
235 return len(self.ignore) - sum(self.ignore)
239 Continue reading from sources, yield results.
241 yields result of :py:meth:`prepare_result`, which depends on what sub
242 class you called this function from.
248 for idx, (obj, file_handle, description, last_size, do_ignore) in \
249 enumerate(zip(self.file_objs, self.file_handles,
250 self.descriptions, self.last_sizes,
256 new_size = os.fstat(file_handle).st_size
258 # compare to old size
259 if new_size == last_size:
260 if not self.is_used_func(file_handle):
261 self.ignore[idx] = True
263 if new_size < last_size: # happened at start of some tests
264 warn('{0} / {1} has become smaller ({2} --> {3})! '
265 .format(obj, description, last_size, new_size)
266 + 'Maybe you are reading from a half-initialized '
268 category=LogReadWarning)
270 new_data = obj.read()
271 except OSError as ose: # includes IOErrors
272 warn('io error reading from {0} / {1}: {2})'
273 .format(obj, description, ose),
274 category=LogReadWarning)
276 except UnicodeDecodeError as ude:
277 warn('unicode error reading from {0} / {1}: {2}'
278 .format(obj, description, ude),
279 category=LogReadWarning)
283 to_yield = self.prepare_result(description, new_data, idx)
284 for result in to_yield:
287 # prepare next iteration
288 self.last_sizes[idx] = new_size
290 def prepare_result(self, description, data, idx):
292 From raw new data create some yield-able results.
294 Intended for overwriting in subclasses.
296 This function is called from __iter__ for each new data that becomes
297 available. It has to return some iterable whose entries are yielded
298 from iteration over objects of this class.
300 This base implementation just returns its input in a list, so new data
301 is yielded from __iter__ as-is. Subclass implementations can also yield
304 :param str description: Description of source of lines, one of
305 :py:data:`self.descriptions`
306 :param str data: Text data read from source
307 :param idx: Index of data source
308 :returns: [(description, data, idx], same as input
309 :rtype [(str, str, int)]
311 return [(description, data, idx), ]
314 #: characters to `rstrip()` from end of complete lines
315 LINE_SPLITTERS = '\n\r'
318 class LineReader(IterativeReader):
320 An :py:class:`IterativeReader` that returns new data line-wise.
322 This means buffering partial line data.
325 def __init__(self, *args, **kwargs):
326 """Create an :py:class:`IterativeReader and buffers for sources."""
327 super(LineReader, self).__init__(*args, **kwargs)
328 self.line_buffers = ['' for _ in range(self.n_sources())]
330 def prepare_result(self, description, new_data, idx):
332 Take raw new data and split it into lines.
334 If line is not complete, then buffer it.
336 Args: see super class method :py:meth:`IterativeReader.prepare_result`
337 :returns: list of 3-tuples `(description, line, idx)` where
338 `description` and `idx` are same as args, and `line` is
339 without trailing newline characters
340 :rtype: [(str, str, int)]
342 all_data = self.line_buffers[idx] + new_data
343 self.line_buffers[idx] = ''
345 should_be_no_new_lines = False
346 for line in all_data.splitlines(True):
347 if line[-1] in LINE_SPLITTERS:
348 result.append((description, line.rstrip(LINE_SPLITTERS), idx))
349 elif should_be_no_new_lines:
351 raise ValueError('Programming error: something went wrong with '
352 'line splitting/buffering.')
354 self.line_buffers[idx] = line
355 should_be_no_new_lines = True # (this should be the last)
360 class LogParser(LineReader):
362 Takes lines from :py:class:`LineReader` and parses their contents.
364 Requires a pattern for log lines, auto-detection is not implemented yet.
366 Iteration returns :py:class:`re.match` result or -- if matching failed --
367 the original raw line. Usage recommendation:
369 with open(log_file_name, 'rt') as file_handle:
370 for _, data, _ in log_read.LogParser(file_handle, pattern=my_pattern):
372 line_parts = data.groupdict()
373 except AttributeError: # no groupdict --> could not parse
374 print(f'Failed to parse line {data}')
376 ...do stuff with line_parts...
379 def __init__(self, log_file, pattern=None):
383 :param str log_file: name of log file to parse (required!)
384 :param pattern: regexp to split log lines; None (default) to return
386 :type pattern: str or None (default)
388 super(LogParser, self).__init__(log_file)
390 self.pattern = pattern
392 def prepare_result(self, *args):
396 Args: see super class method :py:meth:`IterativeReader.prepare_result`
397 :returns: 3-tuples `(description, line, idx)` where `description` and
398 `idx` are same as input args and `line` is either a
399 :py:class:`re.Match` if line matched :py:data:`self.pattern`
400 or just str if line did not match.
401 :rtype: [(str, :py:class:`re.Match` OR str, int)]
403 # let super class split data into lines
405 for description, raw_line, idx in \
406 super(LogParser, self).prepare_result(*args):
407 matches = re.match(self.pattern, raw_line)
409 result.append((description, matches, idx))
411 result.append((description, raw_line, idx))