log_read: correct error message
[pyi2ncommon] / src / log_read.py
CommitLineData
3237d2a6
CH
1# The software in this package is distributed under the GNU General
2# Public License version 2 (with a special exception described below).
3#
4# A copy of GNU General Public License (GPL) is included in this distribution,
5# in the file COPYING.GPL.
6#
7# As a special exception, if other files instantiate templates or use macros
8# or inline functions from this file, or you compile this file and link it
9# with other works to produce a work based on this file, this file
10# does not by itself cause the resulting work to be covered
11# by the GNU General Public License.
12#
13# However the source code for this file must still be made available
14# in accordance with section (3) of the GNU General Public License.
15#
16# This exception does not invalidate any other reasons why a work based
17# on this file might be covered by the GNU General Public License.
f365f614
CH
18#
19# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
3237d2a6 20
e2e13005
CH
21"""
22
23SUMMARY
24------------------------------------------------------
25Iterative reading of log files, similar to shell command `tail -f`.
26
27Copyright: Intra2net AG
28
29
30CONTENTS
31------------------------------------------------------
e7d49180 32
7c362208 33Basic Functionality (class :py:class:`IterativeReader`):
e7d49180
CH
34Runs stat in a loop to find out whether file size has changed. Then reads the
35new data and forwards that
36
e2e13005
CH
37.. todo:: Want to also use lsof to find out whether file/pipe/socket was
38 closed, so can return from read loop
e7d49180
CH
39
40:py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
41it line-wise as is normal for log files
42
43:py:class:`LogParser` takes those lines and tries to parse them into fields
44like date, time, module name, urgency and message.
45
2713b352 46.. todo:: auto-detect log line layout
e2e13005
CH
47
48
49INTERFACE
50------------------------------------------------------
51
e7d49180
CH
52"""
53
54import os
215a12fd 55import os.path
445afb23 56import re
e7d49180 57from warnings import warn
77a07d09 58import logging
1242b1cf 59from .iter_helpers import zip_longest
1242b1cf 60from .type_helpers import is_str_or_byte, is_file_obj
e7d49180
CH
61
62
63class LogReadWarning(UserWarning):
e2e13005 64 """Warnings issued by classes in this module."""
e7d49180
CH
65 pass
66
67
e2e13005
CH
68def true_func(_):
69 """Replacement for :py:func:`check_is_used`. Returns `True` always."""
e7d49180
CH
70 return True
71
72
e2e13005
CH
73def check_is_used(file_handle):
74 """
75 Check whether file is being written to.
76
77 To be implemented, e.g. using lsof.
e7d49180 78
e2e13005
CH
79 If beneficial could also easily supply python file object as arg.
80
81 :param int file_handle: OS-level file descriptor
e7d49180 82 """
e2e13005 83 raise NotImplementedError(file_handle)
e7d49180
CH
84
85
e2e13005 86#: counter for unknown sources in :py:func:`create_description`
e7d49180
CH
87_create_description_unknown_counter = 0
88
acc472a4 89
3e0b3965 90def create_description(file_obj, file_handle):
e2e13005
CH
91 """
92 Create some description for given file-like object / file descriptor.
e7d49180
CH
93
94 :param file_obj: file-like object
3e0b3965 95 :param int file_handle: os-level file descriptor
e2e13005
CH
96 :returns: Short description for file-like object
97 :rtype: string
e7d49180 98 """
e7d49180
CH
99 global _create_description_unknown_counter
100
101 try:
102 desc = file_obj.name
103 if desc:
104 return desc
105 except AttributeError:
106 pass
107
3e0b3965
CH
108 if file_handle is not None:
109 return 'file{0}'.format(file_handle)
e7d49180
CH
110 else:
111 _create_description_unknown_counter += 1
112 return 'unknown{0}'.format(_create_description_unknown_counter)
113
114
115#: error message for IterativeReader constructor
116_STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
117 'files --> use with open(file_name)!'
118
119
879f0150 120class IterativeReader(object):
e2e13005
CH
121 """
122 Read continuously from a given file.
e7d49180 123
e2e13005
CH
124 Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
125 changed or not; Always reads as much data as possible.
e7d49180 126
e2e13005 127 Does not care about closing files, so does not accept file names.
e7d49180
CH
128
129 This is the base for class :py:class:`LineReader` that just has to
e2e13005 130 implement a different :py:meth:`prepare_result` method.
e7d49180
CH
131 """
132
133 def __init__(self, sources, descs=None, return_when_done=False):
e2e13005
CH
134 """
135 Create a reader; do some basic checks on args.
e7d49180
CH
136
137 :param sources: iterable over sources. Sources can be opened file
138 objects or read-opened os-level file descriptors.
139 Calling code has to ensure they are closed properly, so
140 best use this within a "with open(file_name) as
141 file_handle:"-context. If sources is a single file
142 obj/descriptor, both source and desc will be converted
143 to lists of length 1
144 :param descs: can be anything of same length as sources. If sources is
145 a single source, then descs is also converted to a list
146 of length 1. If not given (i.e. None), will use
147 :py:func:`create_description` to guess descriptions
148 :param bool return_when_done: ignore file_handle if no-one is writing
149 to it any more. Return from iterator when
150 all watched files are done (not
151 implemented yet)
152 :raises: OSError when testing fstat on source
153 """
154 if not sources:
155 raise ValueError('need at least some source!')
156 elif is_str_or_byte(sources):
157 raise ValueError(_STR_ERR.format(sources))
158 elif is_file_obj(sources) or isinstance(sources, int):
159 source_input = [sources, ]
160 desc_input = [descs, ]
161 else:
162 source_input = sources # assume some iterable
163 desc_input = descs
164
165 # now divide sources into os-level file descriptors for os.fstat,
166 # and file objects for read()
167 self.file_objs = []
3e0b3965 168 self.file_handles = [] # file descriptOR, not descriptION
e7d49180
CH
169 for source in source_input:
170 if is_file_obj(source):
171 self.file_objs.append(source)
3e0b3965 172 self.file_handles.append(source.fileno())
e7d49180
CH
173 elif isinstance(source, int):
174 self.file_objs.append(os.fdopen(source))
3e0b3965 175 self.file_handles.append(source)
e7d49180
CH
176 elif is_str_or_byte(source):
177 raise ValueError(_STR_ERR.format(source))
178 else:
179 raise ValueError('source {0} is neither file obj nor file '
180 'descriptor!')
181
d910eba5 182 # try to fstat the new file descriptor just for testing
3e0b3965 183 os.fstat(self.file_handles[-1])
e7d49180
CH
184
185 # guess descriptions if not given
186 if not desc_input:
3e0b3965
CH
187 self.descriptions = [create_description(obj, file_handle)
188 for obj, file_handle
189 in zip(self.file_objs, self.file_handles)]
e7d49180
CH
190 else:
191 try:
192 if len(desc_input) != len(self.file_objs):
193 raise ValueError('need same number of sources and '
194 'descriptions!')
195 except TypeError:
196 pass # desc_input is generator or so
197
198 self.descriptions = []
3e0b3965
CH
199 for obj, file_handle, description in \
200 zip_longest(self.file_objs, self.file_handles, desc_input):
e7d49180
CH
201 if obj is None:
202 raise ValueError('more descriptions than sources!')
203 elif description is None:
204 self.descriptions.append(create_description(obj,
3e0b3965 205 file_handle))
e7d49180
CH
206 else:
207 self.descriptions.append(description)
208
209 self.last_sizes = [0 for _ in self.file_objs]
210 self.ignore = [False for _ in self.file_objs]
211
212 if return_when_done:
3e0b3965 213 self.is_used_func = check_is_used
e7d49180 214 else:
3e0b3965 215 self.is_used_func = true_func
e7d49180 216
3e0b3965
CH
217 for obj, file_handle, description in \
218 zip(self.file_objs, self.file_handles, self.descriptions):
77a07d09
CH
219 logging.debug('log_read initialized with file descriptor {0}, '
220 'file obj {1}, description "{2}"'
3e0b3965 221 .format(file_handle, obj, description))
e7d49180
CH
222
223 def n_sources(self):
e2e13005 224 """Return number of sources given to constructor."""
e7d49180
CH
225 return len(self.file_objs)
226
227 def n_active_sources(self):
e2e13005 228 """Return number of sources we are actually watching."""
e7d49180
CH
229 return len(self.ignore) - sum(self.ignore)
230
231 def __iter__(self):
e2e13005
CH
232 """
233 Continue reading from sources, yield results.
234
235 yields result of :py:meth:`prepare_result`, which depends on what sub
236 class you called this function from.
237 """
e7d49180 238 while True:
3e0b3965
CH
239 for idx, (obj, file_handle, description, last_size, do_ignore) in \
240 enumerate(zip(self.file_objs, self.file_handles,
e7d49180
CH
241 self.descriptions, self.last_sizes,
242 self.ignore)):
e7d49180
CH
243 if do_ignore:
244 continue
245
246 # get new file size
3e0b3965 247 new_size = os.fstat(file_handle).st_size
e7d49180
CH
248
249 # compare to old size
250 if new_size == last_size:
3e0b3965 251 if not self.is_used_func(file_handle):
e7d49180
CH
252 warn('no one is writing to {0} / {1} -- '
253 'stop watching it!'
3e0b3965 254 .format(file_handle, description),
e7d49180 255 category=LogReadWarning)
acc472a4 256 self.ignore[idx] = True
9f2fbfa7 257 else:
ea8b01a3
CH
258 if new_size < last_size: # happened at start of some tests
259 warn('{0} / {1} has become smaller ({2} --> {3})! '
260 .format(obj, description, last_size, new_size)
261 + 'Maybe you are reading from a half-initialized '
262 + 'file?',
9f2fbfa7 263 category=LogReadWarning)
e7d49180
CH
264 try:
265 new_data = obj.read()
266 except OSError as ose: # includes IOErrors
267 warn('io error reading from {0} / {1}: {2})'
268 .format(obj, description, ose),
269 category=LogReadWarning)
9451b2ce
CH
270 new_data = str(ose)
271 except UnicodeDecodeError as ude:
272 warn('unicode error reading from {0} / {1}: {2}'
273 .format(obj, description, ude),
274 category=LogReadWarning)
275 new_data = str(ude)
e7d49180
CH
276
277 # post-processing
278 to_yield = self.prepare_result(description, new_data, idx)
279 for result in to_yield:
280 yield result
281
282 # prepare next iteration
283 self.last_sizes[idx] = new_size
284
285 def prepare_result(self, description, data, idx):
e2e13005
CH
286 """
287 From raw new data create some yield-able results.
e7d49180 288
e2e13005 289 Intended for overwriting in sub-classes.
e7d49180 290
e2e13005 291 This function is called from __iter__ for each new data that becomes
e7d49180
CH
292 available. It has to return some iterable whose entries are yielded
293 from iteration over objects of this class.
294
7c362208 295 The result must be an iterable of objects, which are yielded as-is, so
e2e13005 296 can have any form.
7c362208 297
e7d49180 298 This base implementation just returns its input in a list, so new data
e2e13005
CH
299 is yielded from __iter__ as-is.
300
301 Subclass implementations can also yield tuples.
302
303 :param str description: Description of source of lines, one of
304 :py:data:`self.descriptions`
305 :param str new_data: Text data read from source
306 :param idx: Index of data source
aef38def
CH
307 :returns: [(description, data, idx], same as input
308 :rtype [(str, str, int)]
e7d49180 309 """
aef38def 310 return [(description, data, idx), ]
e7d49180
CH
311
312
e2e13005 313#: characters to `rstrip()` from end of complete lines
e7d49180
CH
314LINE_SPLITTERS = '\n\r'
315
acc472a4 316
e7d49180 317class LineReader(IterativeReader):
e2e13005
CH
318 """
319 An :py:class:`IterativeReader` that returns new data line-wise.
edd68a74 320
e2e13005 321 This means buffering partial line data.
e7d49180
CH
322 """
323
324 def __init__(self, *args, **kwargs):
e2e13005 325 """Create an :py:class:`IterativeReader and buffers for sources."""
01fe1580 326 super(LineReader, self).__init__(*args, **kwargs)
d910eba5 327 self.line_buffers = ['' for _ in range(self.n_sources())]
e7d49180 328
d910eba5 329 def prepare_result(self, description, new_data, idx):
e2e13005
CH
330 """
331 Take raw new data and split it into lines.
e7d49180 332
e2e13005 333 If line is not complete, then buffer it.
e7d49180 334
e2e13005 335 Args: see super class method :py:meth:`IterativeReader.prepare_result`
aef38def
CH
336 :returns: list of 3-tuples `(description, line, idx)` where
337 `description` and `idx` are same as args, and `line` is
e2e13005 338 without trailing newline characters
aef38def 339 :rtype: [(str, str, int)]
e7d49180 340 """
e7d49180 341 all_data = self.line_buffers[idx] + new_data
d910eba5 342 self.line_buffers[idx] = ''
e7d49180
CH
343 result = []
344 should_be_no_new_lines = False
879f0150 345 for line in all_data.splitlines(True):
e7d49180 346 if line[-1] in LINE_SPLITTERS:
aef38def 347 result.append((description, line.rstrip(LINE_SPLITTERS), idx))
e7d49180 348 elif should_be_no_new_lines:
37288ebb
CH
349 # self-check
350 raise ValueError('Programming error: something went wrong with '
351 'line splitting/buffering.')
e7d49180
CH
352 else:
353 self.line_buffers[idx] = line
354 should_be_no_new_lines = True # (this should be the last)
355
356 return result
357
358
445afb23 359class LogParser(LineReader):
e2e13005
CH
360 """
361 Takes lines from :py:class:`LineReader` and parses their contents.
445afb23 362
e2e13005 363 Requires a pattern for log lines, auto-detection is not implemented yet.
445afb23
CH
364
365 Iteration returns re.match result or -- if matching failed -- the original
e2e13005 366 raw line.
445afb23
CH
367 """
368
369 def __init__(self, log_file, pattern=None):
e2e13005
CH
370 """
371 Create a LogParser.
445afb23
CH
372
373 :param str log_file: name of log file to parse (required!)
374 :param pattern: regexp to split log lines; None (default) to return
375 line as they are
e2e13005 376 :type pattern: str or None (default)
445afb23
CH
377 """
378 super(LogParser, self).__init__(log_file)
379
380 self.pattern = pattern
381
382 def prepare_result(self, *args):
e2e13005
CH
383 """
384 Try parsing lines.
385
386 Args: see super class method :py:meth:`IterativeReader.prepare_result`
aef38def
CH
387 :returns: 3-tuples `(description, line, idx)` where `description` and
388 `idx` are same as input args and `line` is either a
389 :py:class:`re.Match` if line matched :py:data:`self.pattern`
390 or just str if line did not match.
391 :rtype: [(str, :py:class:`re.Match` OR str, int)]
e2e13005 392 """
445afb23 393 # let super class split data into lines
aef38def
CH
394 for description, raw_line, idx in \
395 super(LogParser, self).prepare_result(*args):
445afb23
CH
396 result = re.match(self.pattern, raw_line)
397 if result:
aef38def 398 return (description, result, idx)
445afb23 399 else:
aef38def 400 return (description, raw_line, idx)