1 # The software in this package is distributed under the GNU General
2 # Public License version 2 (with a special exception described below).
4 # A copy of GNU General Public License (GPL) is included in this distribution,
5 # in the file COPYING.GPL.
7 # As a special exception, if other files instantiate templates or use macros
8 # or inline functions from this file, or you compile this file and link it
9 # with other works to produce a work based on this file, this file
10 # does not by itself cause the resulting work to be covered
11 # by the GNU General Public License.
13 # However the source code for this file must still be made available
14 # in accordance with section (3) of the GNU General Public License.
16 # This exception does not invalidate any other reasons why a work based
17 # on this file might be covered by the GNU General Public License.
19 # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
24 ------------------------------------------------------
25 Iterative reading of log files, similar to shell command `tail -f`.
27 Copyright: Intra2net AG
31 ------------------------------------------------------
33 Basic Functionality (class :py:class:`IterativeReader`):
34 Runs stat in a loop to find out whether file size has changed. Then reads the
35 new data and forwards that
37 .. todo:: Want to also use lsof to find out whether file/pipe/socket was
38 closed, so can return from read loop
40 :py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
41 it line-wise as is normal for log files
43 :py:class:`LogParser` takes those lines and tries to parse them into fields
44 like date, time, module name, urgency and message.
46 .. todo:: auto-detect log line layout
50 ------------------------------------------------------
57 from warnings import warn
59 from .iter_helpers import zip_longest
60 from .type_helpers import is_str_or_byte, is_file_obj
63 class LogReadWarning(UserWarning):
64 """Warnings issued by classes in this module."""
69 """Replacement for :py:func:`check_is_used`. Returns `True` always."""
73 def check_is_used(file_handle):
75 Check whether file is being written to.
77 To be implemented, e.g. using lsof.
79 If beneficial could also easily supply python file object as arg.
81 :param int file_handle: OS-level file descriptor
83 raise NotImplementedError(file_handle)
86 #: counter for unknown sources in :py:func:`create_description`
87 _create_description_unknown_counter = 0
90 def create_description(file_obj, file_handle):
92 Create some description for given file-like object / file descriptor.
94 :param file_obj: file-like object
95 :param int file_handle: os-level file descriptor
96 :returns: Short description for file-like object
99 global _create_description_unknown_counter
105 except AttributeError:
108 if file_handle is not None:
109 return 'file{0}'.format(file_handle)
111 _create_description_unknown_counter += 1
112 return 'unknown{0}'.format(_create_description_unknown_counter)
115 #: error message for IterativeReader constructor
116 _STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
117 'files --> use with open(file_name)!'
120 class IterativeReader(object):
122 Read continuously from a given file.
124 Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
125 changed or not; Always reads as much data as possible.
127 Does not care about closing files, so does not accept file names.
129 This is the base for class :py:class:`LineReader` that just has to
130 implement a different :py:meth:`prepare_result` method.
133 def __init__(self, sources, descs=None, return_when_done=False):
135 Create a reader; do some basic checks on args.
137 :param sources: iterable over sources. Sources can be opened file
138 objects or read-opened os-level file descriptors.
139 Calling code has to ensure they are closed properly, so
140 best use this within a "with open(file_name) as
141 file_handle:"-context. If sources is a single file
142 obj/descriptor, both source and desc will be converted
144 :param descs: can be anything of same length as sources. If sources is
145 a single source, then descs is also converted to a list
146 of length 1. If not given (i.e. None), will use
147 :py:func:`create_description` to guess descriptions
148 :param bool return_when_done: ignore file_handle if no-one is writing
149 to it any more. Return from iterator when
150 all watched files are done (not
152 :raises: OSError when testing fstat on source
155 raise ValueError('need at least some source!')
156 elif is_str_or_byte(sources):
157 raise ValueError(_STR_ERR.format(sources))
158 elif is_file_obj(sources) or isinstance(sources, int):
159 source_input = [sources, ]
160 desc_input = [descs, ]
162 source_input = sources # assume some iterable
165 # now divide sources into os-level file descriptors for os.fstat,
166 # and file objects for read()
168 self.file_handles = [] # file descriptOR, not descriptION
169 for source in source_input:
170 if is_file_obj(source):
171 self.file_objs.append(source)
172 self.file_handles.append(source.fileno())
173 elif isinstance(source, int):
174 self.file_objs.append(os.fdopen(source))
175 self.file_handles.append(source)
176 elif is_str_or_byte(source):
177 raise ValueError(_STR_ERR.format(source))
179 raise ValueError('source {0} is neither file obj nor file '
182 # try to fstat the new file descriptor just for testing
183 os.fstat(self.file_handles[-1])
185 # guess descriptions if not given
187 self.descriptions = [create_description(obj, file_handle)
189 in zip(self.file_objs, self.file_handles)]
192 if len(desc_input) != len(self.file_objs):
193 raise ValueError('need same number of sources and '
196 pass # desc_input is generator or so
198 self.descriptions = []
199 for obj, file_handle, description in \
200 zip_longest(self.file_objs, self.file_handles, desc_input):
202 raise ValueError('more descriptions than sources!')
203 elif description is None:
204 self.descriptions.append(create_description(obj,
207 self.descriptions.append(description)
209 self.last_sizes = [0 for _ in self.file_objs]
210 self.ignore = [False for _ in self.file_objs]
213 self.is_used_func = check_is_used
215 self.is_used_func = true_func
217 for obj, file_handle, description in \
218 zip(self.file_objs, self.file_handles, self.descriptions):
219 logging.debug('log_read initialized with file descriptor {0}, '
220 'file obj {1}, description "{2}"'
221 .format(file_handle, obj, description))
224 """Return number of sources given to constructor."""
225 return len(self.file_objs)
227 def n_active_sources(self):
228 """Return number of sources we are actually watching."""
229 return len(self.ignore) - sum(self.ignore)
233 Continue reading from sources, yield results.
235 yields result of :py:meth:`prepare_result`, which depends on what sub
236 class you called this function from.
239 for idx, (obj, file_handle, description, last_size, do_ignore) in \
240 enumerate(zip(self.file_objs, self.file_handles,
241 self.descriptions, self.last_sizes,
247 new_size = os.fstat(file_handle).st_size
249 # compare to old size
250 if new_size == last_size:
251 if not self.is_used_func(file_handle):
252 warn('no one is writing to {0} / {1} -- '
254 .format(file_handle, description),
255 category=LogReadWarning)
256 self.ignore[idx] = True
258 if new_size < last_size: # happened at start of some tests
259 warn('{0} / {1} has become smaller ({2} --> {3})! '
260 .format(obj, description, last_size, new_size)
261 + 'Maybe you are reading from a half-initialized '
263 category=LogReadWarning)
265 new_data = obj.read()
266 except OSError as ose: # includes IOErrors
267 warn('io error reading from {0} / {1}: {2})'
268 .format(obj, description, ose),
269 category=LogReadWarning)
271 except UnicodeDecodeError as ude:
272 warn('unicode error reading from {0} / {1}: {2}'
273 .format(obj, description, ude),
274 category=LogReadWarning)
278 to_yield = self.prepare_result(description, new_data, idx)
279 for result in to_yield:
282 # prepare next iteration
283 self.last_sizes[idx] = new_size
285 def prepare_result(self, description, data, idx):
287 From raw new data create some yield-able results.
289 Intended for overwriting in sub-classes.
291 This function is called from __iter__ for each new data that becomes
292 available. It has to return some iterable whose entries are yielded
293 from iteration over objects of this class.
295 The result must be an iterable of objects, which are yielded as-is, so
298 This base implementation just returns its input in a list, so new data
299 is yielded from __iter__ as-is.
301 Subclass implementations can also yield tuples.
303 :param str description: Description of source of lines, one of
304 :py:data:`self.descriptions`
305 :param str new_data: Text data read from source
306 :param idx: Index of data source
307 :returns: [(description, data)], same as input
310 return [(description, data), ]
313 #: characters to `rstrip()` from end of complete lines
314 LINE_SPLITTERS = '\n\r'
317 class LineReader(IterativeReader):
319 An :py:class:`IterativeReader` that returns new data line-wise.
321 This means buffering partial line data.
324 def __init__(self, *args, **kwargs):
325 """Create an :py:class:`IterativeReader and buffers for sources."""
326 super(LineReader, self).__init__(*args, **kwargs)
327 self.line_buffers = ['' for _ in range(self.n_sources())]
329 def prepare_result(self, description, new_data, idx):
331 Take raw new data and split it into lines.
333 If line is not complete, then buffer it.
335 Args: see super class method :py:meth:`IterativeReader.prepare_result`
336 :returns: list of 2-tuples `(description, line)` where
337 `description` is same as arg, and `line` is
338 without trailing newline characters
341 all_data = self.line_buffers[idx] + new_data
342 self.line_buffers[idx] = ''
344 should_be_no_new_lines = False
345 for line in all_data.splitlines(True):
346 if line[-1] in LINE_SPLITTERS:
347 result.append((description, line.rstrip(LINE_SPLITTERS)))
348 elif should_be_no_new_lines:
349 raise ValueError('line splitters are not compatible with'
352 self.line_buffers[idx] = line
353 should_be_no_new_lines = True # (this should be the last)
358 class LogParser(LineReader):
360 Takes lines from :py:class:`LineReader` and parses their contents.
362 Requires a pattern for log lines, auto-detection is not implemented yet.
364 Iteration returns re.match result or -- if matching failed -- the original
368 def __init__(self, log_file, pattern=None):
372 :param str log_file: name of log file to parse (required!)
373 :param pattern: regexp to split log lines; None (default) to return
375 :type pattern: str or None (default)
377 super(LogParser, self).__init__(log_file)
379 self.pattern = pattern
381 def prepare_result(self, *args):
385 Args: see super class method :py:meth:`IterativeReader.prepare_result`
386 :returns: either a :py:class:`re.Match` if line matched
387 :py:data:`self.pattern` or just str if line did not match.
388 :rtype: :py:class:`re.Match` OR str
390 # let super class split data into lines
391 for _, raw_line in super(LogParser, self).prepare_result(*args):
392 result = re.match(self.pattern, raw_line)