1 # The software in this package is distributed under the GNU General
2 # Public License version 2 (with a special exception described below).
4 # A copy of GNU General Public License (GPL) is included in this distribution,
5 # in the file COPYING.GPL.
7 # As a special exception, if other files instantiate templates or use macros
8 # or inline functions from this file, or you compile this file and link it
9 # with other works to produce a work based on this file, this file
10 # does not by itself cause the resulting work to be covered
11 # by the GNU General Public License.
13 # However the source code for this file must still be made available
14 # in accordance with section (3) of the GNU General Public License.
16 # This exception does not invalidate any other reasons why a work based
17 # on this file might be covered by the GNU General Public License.
19 # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
24 ------------------------------------------------------
25 Iterative reading of log files, similar to shell command `tail -f`.
27 Copyright: Intra2net AG
31 ------------------------------------------------------
33 Basic Functionality (class :py:class:`IterativeReader`):
34 Runs stat in a loop to find out whether file size has changed. Then reads the
35 new data and forwards that
37 .. todo:: Want to also use lsof to find out whether file/pipe/socket was
38 closed, so can automatically return from read loop.
40 :py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
41 it line-wise as is normal for log files
43 :py:class:`LogParser` takes those lines and tries to parse them into fields
44 like date, time, module name, urgency and message.
46 .. todo:: auto-detect log line layout
50 ------------------------------------------------------
56 from warnings import warn
58 from contextlib import contextmanager
59 from .iter_helpers import zip_longest
60 from .type_helpers import is_str_or_byte, is_file_obj
63 class LogReadWarning(UserWarning):
64 """Warnings issued by classes in this module."""
69 """Replacement for :py:func:`check_is_used`. Returns `True` always."""
74 """Replacement for :py:func:`check_is_used`. Returns `False` always."""
78 def check_is_used(file_handle):
80 Check whether file is being written to.
82 To be implemented, e.g. using lsof.
84 If beneficial could also easily supply python file object as arg.
86 :param int file_handle: OS-level file descriptor
88 raise NotImplementedError(file_handle)
91 #: counter for unknown sources in :py:func:`create_description`
92 _create_description_unknown_counter = 0
95 def create_description(file_obj, file_handle):
97 Create some description for given file-like object / file descriptor.
99 :param file_obj: file-like object
100 :param int file_handle: os-level file descriptor
101 :returns: Short description for file-like object
104 global _create_description_unknown_counter
110 except AttributeError:
113 if file_handle is not None:
114 return 'file{0}'.format(file_handle)
116 _create_description_unknown_counter += 1
117 return 'unknown{0}'.format(_create_description_unknown_counter)
120 #: error message for IterativeReader constructor
121 _STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
122 'files --> use with open(file_name)!'
125 class IterativeReader(object):
127 Read continuously from a given file.
129 Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
130 changed or not; Always reads as much data as possible.
132 Does not care about closing files, so does not accept file names.
134 This is the base for class :py:class:`LineReader` that just has to
135 implement a different :py:meth:`prepare_result` method.
138 def __init__(self, sources, descs=None, keep_watching=False):
140 Create a reader; do some basic checks on args.
142 :param sources: iterable over sources. Sources can be opened file
143 objects or read-opened os-level file descriptors.
144 Calling code has to ensure they are closed properly, so
145 best use this within a "with open(file_name) as
146 file_handle:"-context. If sources is a single file
147 obj/descriptor, both source and desc will be converted
149 :param descs: can be anything of same length as sources. If sources is
150 a single source, then descs is also converted to a list
151 of length 1. If not given (i.e. None), will use
152 :py:func:`create_description` to guess descriptions
153 :param bool keep_watching: keep watching file that is not changing in
154 size. Need to manually tell whether file
155 is being written to or not since auto-detect
156 is not implemented yet.
157 :raises: OSError when testing fstat on source
160 raise ValueError('need at least some source!')
161 elif is_str_or_byte(sources):
162 raise ValueError(_STR_ERR.format(sources))
163 elif is_file_obj(sources) or isinstance(sources, int):
164 source_input = [sources, ]
165 desc_input = [descs, ]
167 source_input = sources # assume some iterable
170 # now divide sources into os-level file descriptors for os.fstat,
171 # and file objects for read()
173 self.file_handles = [] # file descriptOR, not descriptION
174 for source in source_input:
175 if is_file_obj(source):
176 self.file_objs.append(source)
177 self.file_handles.append(source.fileno())
178 elif isinstance(source, int):
179 self.file_objs.append(os.fdopen(source))
180 self.file_handles.append(source)
181 elif is_str_or_byte(source):
182 raise ValueError(_STR_ERR.format(source))
184 raise ValueError('source {0} is neither file obj nor file '
187 # try to fstat the new file descriptor just for testing
188 os.fstat(self.file_handles[-1])
190 # guess descriptions if not given
192 self.descriptions = [create_description(obj, file_handle)
194 in zip(self.file_objs, self.file_handles)]
197 if len(desc_input) != len(self.file_objs):
198 raise ValueError('need same number of sources and '
201 pass # desc_input is generator or so
203 self.descriptions = []
204 for obj, file_handle, description in \
205 zip_longest(self.file_objs, self.file_handles, desc_input):
207 raise ValueError('more descriptions than sources!')
208 elif description is None:
209 self.descriptions.append(create_description(obj,
212 self.descriptions.append(description)
214 self.last_sizes = [0 for _ in self.file_objs]
215 self.ignore = [False for _ in self.file_objs]
218 self.is_used_func = true_func
220 self.is_used_func = false_func
221 # use some day: self.is_used_func = check_is_used
223 for obj, file_handle, description in \
224 zip(self.file_objs, self.file_handles, self.descriptions):
225 logging.debug('log_read initialized with file descriptor {0}, '
226 'file obj {1}, description "{2}"'
227 .format(file_handle, obj, description))
230 """Return number of sources given to constructor."""
231 return len(self.file_objs)
233 def n_active_sources(self):
234 """Return number of sources we are actually watching."""
235 return len(self.ignore) - sum(self.ignore)
239 Continue reading from sources, yield results.
241 yields result of :py:meth:`prepare_result`, which depends on what
242 subclass you called this function from.
248 for idx, (obj, file_handle, description, last_size, do_ignore) in \
249 enumerate(zip(self.file_objs, self.file_handles,
250 self.descriptions, self.last_sizes,
256 new_size = os.fstat(file_handle).st_size
258 # compare to old size
259 if new_size == last_size:
260 if not self.is_used_func(file_handle):
261 self.ignore[idx] = True
263 if new_size < last_size: # happened at start of some tests
264 warn('{0} / {1} has become smaller ({2} --> {3})! '
265 .format(obj, description, last_size, new_size)
266 + 'Maybe you are reading from a half-initialized '
268 category=LogReadWarning)
270 new_data = obj.read()
271 except OSError as ose: # includes IOErrors
272 warn('io error reading from {0} / {1}: {2})'
273 .format(obj, description, ose),
274 category=LogReadWarning)
276 except UnicodeDecodeError as ude:
277 warn('unicode error reading from {0} / {1}: {2}'
278 .format(obj, description, ude),
279 category=LogReadWarning)
283 for result in self.prepare_result(description, new_data, idx):
286 # prepare next iteration
287 self.last_sizes[idx] = new_size
289 def prepare_result(self, description, data, idx):
291 From raw new data create some yield-able results.
293 Intended for overwriting in subclasses.
295 This function is called from __iter__ for each new data that becomes
296 available. It has to provide results which are forwarded to caller.
298 This base implementation just yields its input, so new data is yielded
299 from `__iter__` as-is.
301 :param str description: Description of source of lines, one of
302 :py:data:`self.descriptions`
303 :param str data: Text data read from source
304 :param idx: Index of data source
305 :returns: nothing but yields [(description, data, idx], same as input
307 yield description, data, idx
310 #: characters to `rstrip()` from end of complete lines
311 LINE_SPLITTERS = '\n\r'
314 class LineReader(IterativeReader):
316 An :py:class:`IterativeReader` that returns new data line-wise.
318 This means buffering partial line data.
321 def __init__(self, *args, **kwargs):
322 """Create an :py:class:`IterativeReader and buffers for sources."""
323 super(LineReader, self).__init__(*args, **kwargs)
324 self.line_buffers = ['' for _ in range(self.n_sources())]
326 def prepare_result(self, description, new_data, idx):
328 Take raw new data and split it into lines.
330 If line is not complete, then buffer it.
332 Args: see super class method :py:meth:`IterativeReader.prepare_result`
333 :returns: list of 3-tuples `(description, line, idx)` where
334 `description` and `idx` are same as args, and `line` is
335 without trailing newline characters
336 :rtype: [(str, str, int)]
338 all_data = self.line_buffers[idx] + new_data
339 self.line_buffers[idx] = ''
340 should_be_no_new_lines = False
341 for line in all_data.splitlines(True):
342 if line[-1] in LINE_SPLITTERS:
343 yield description, line.rstrip(LINE_SPLITTERS), idx
344 elif should_be_no_new_lines:
346 raise ValueError('Programming error: something went wrong with '
347 'line splitting/buffering.')
349 self.line_buffers[idx] = line
350 should_be_no_new_lines = True # (this should be the last)
353 class LogParser(LineReader):
355 Takes lines from :py:class:`LineReader` and parses their contents.
357 Requires a pattern for log lines, auto-detection is not implemented yet.
359 Iteration returns :py:class:`re.match` result or -- if matching failed --
360 None. The latest unparsed line is available as `self.last_unparsed_line`.
361 Usage recommendation:
363 with open(log_file_name, 'rt') as file_handle:
364 parser = log_read.LogParser(file_handle, pattern=my_pattern):
365 for _, data, _ in parser:
367 print(f'Failed to parse line {parser.last_unparsed_line}')
369 line_parts = data.groupdict()
370 ...do stuff with line_parts...
373 def __init__(self, log_file, pattern=None):
377 :param str log_file: name of log file to parse (required!)
378 :param pattern: regexp to split log lines; None (default) to return
380 :type pattern: str or None (default)
382 super(LogParser, self).__init__(log_file)
384 self.pattern = pattern
385 self.last_unparsed_line = ''
387 def prepare_result(self, *args):
391 Args: see super class method :py:meth:`IterativeReader.prepare_result`
392 :returns: 3-tuples `(description, line, idx)` where `description` and
393 `idx` are same as input args and `line` is either a
394 :py:class:`re.Match` if line matched :py:data:`self.pattern`
395 or just str if line did not match.
396 :rtype: [(str, :py:class:`re.Match` OR str, int)]
398 # let super class split data into lines
399 for description, raw_line, idx in \
400 super(LogParser, self).prepare_result(*args):
401 matches = re.match(self.pattern, raw_line)
403 yield description, matches, idx
405 self.last_unparsed_line = raw_line
406 yield description, None, idx
410 def create_for(cls, filename, *args, **kwargs):
412 Open single file, yield LogParser. Ensures file is closed afterwards.
414 This allows opening file and creation LogParser for it to one line:
416 with LogParser.create_for('/var/log/messages', SYS_LOG_PATTERN) as parser:
417 for _, matches, _ in parser:
419 print(matches.groupdict())
421 print(f'UNPARSED: {parser.last_unparsed_line}')
423 :param str filename: something that :py:meth:`open` accepts
424 :param args: Forwarded to constructor
425 :param kwargs: Forwarded to constructor
427 with open(filename) as file_handle:
428 yield cls(file_handle, *args, **kwargs)