Commit | Line | Data |
---|---|---|
3237d2a6 CH |
1 | # The software in this package is distributed under the GNU General |
2 | # Public License version 2 (with a special exception described below). | |
3 | # | |
4 | # A copy of GNU General Public License (GPL) is included in this distribution, | |
5 | # in the file COPYING.GPL. | |
6 | # | |
7 | # As a special exception, if other files instantiate templates or use macros | |
8 | # or inline functions from this file, or you compile this file and link it | |
9 | # with other works to produce a work based on this file, this file | |
10 | # does not by itself cause the resulting work to be covered | |
11 | # by the GNU General Public License. | |
12 | # | |
13 | # However the source code for this file must still be made available | |
14 | # in accordance with section (3) of the GNU General Public License. | |
15 | # | |
16 | # This exception does not invalidate any other reasons why a work based | |
17 | # on this file might be covered by the GNU General Public License. | |
f365f614 CH |
18 | # |
19 | # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com> | |
3237d2a6 | 20 | |
e2e13005 CH |
21 | """ |
22 | ||
23 | SUMMARY | |
24 | ------------------------------------------------------ | |
25 | Iterative reading of log files, similar to shell command `tail -f`. | |
26 | ||
27 | Copyright: Intra2net AG | |
28 | ||
29 | ||
30 | CONTENTS | |
31 | ------------------------------------------------------ | |
e7d49180 | 32 | |
7c362208 | 33 | Basic Functionality (class :py:class:`IterativeReader`): |
e7d49180 CH |
34 | Runs stat in a loop to find out whether file size has changed. Then reads the |
35 | new data and forwards that | |
36 | ||
e2e13005 | 37 | .. todo:: Want to also use lsof to find out whether file/pipe/socket was |
545f3458 | 38 | closed, so can automatically return from read loop. |
e7d49180 CH |
39 | |
40 | :py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns | |
41 | it line-wise as is normal for log files | |
42 | ||
43 | :py:class:`LogParser` takes those lines and tries to parse them into fields | |
44 | like date, time, module name, urgency and message. | |
45 | ||
2713b352 | 46 | .. todo:: auto-detect log line layout |
e2e13005 CH |
47 | |
48 | ||
49 | INTERFACE | |
50 | ------------------------------------------------------ | |
51 | ||
e7d49180 | 52 | """ |
e7d49180 | 53 | import os |
215a12fd | 54 | import os.path |
445afb23 | 55 | import re |
e7d49180 | 56 | from warnings import warn |
77a07d09 | 57 | import logging |
a51235e2 | 58 | from contextlib import contextmanager |
1242b1cf | 59 | from .iter_helpers import zip_longest |
1242b1cf | 60 | from .type_helpers import is_str_or_byte, is_file_obj |
e7d49180 CH |
61 | |
62 | ||
63 | class LogReadWarning(UserWarning): | |
e2e13005 | 64 | """Warnings issued by classes in this module.""" |
e7d49180 CH |
65 | pass |
66 | ||
67 | ||
e2e13005 CH |
68 | def true_func(_): |
69 | """Replacement for :py:func:`check_is_used`. Returns `True` always.""" | |
e7d49180 CH |
70 | return True |
71 | ||
72 | ||
545f3458 CH |
73 | def false_func(_): |
74 | """Replacement for :py:func:`check_is_used`. Returns `False` always.""" | |
75 | return False | |
76 | ||
77 | ||
e2e13005 CH |
78 | def check_is_used(file_handle): |
79 | """ | |
80 | Check whether file is being written to. | |
81 | ||
82 | To be implemented, e.g. using lsof. | |
e7d49180 | 83 | |
e2e13005 CH |
84 | If beneficial could also easily supply python file object as arg. |
85 | ||
86 | :param int file_handle: OS-level file descriptor | |
e7d49180 | 87 | """ |
e2e13005 | 88 | raise NotImplementedError(file_handle) |
e7d49180 CH |
89 | |
90 | ||
e2e13005 | 91 | #: counter for unknown sources in :py:func:`create_description` |
e7d49180 CH |
92 | _create_description_unknown_counter = 0 |
93 | ||
acc472a4 | 94 | |
3e0b3965 | 95 | def create_description(file_obj, file_handle): |
e2e13005 CH |
96 | """ |
97 | Create some description for given file-like object / file descriptor. | |
e7d49180 CH |
98 | |
99 | :param file_obj: file-like object | |
3e0b3965 | 100 | :param int file_handle: os-level file descriptor |
e2e13005 CH |
101 | :returns: Short description for file-like object |
102 | :rtype: string | |
e7d49180 | 103 | """ |
e7d49180 CH |
104 | global _create_description_unknown_counter |
105 | ||
106 | try: | |
107 | desc = file_obj.name | |
108 | if desc: | |
109 | return desc | |
110 | except AttributeError: | |
111 | pass | |
112 | ||
3e0b3965 CH |
113 | if file_handle is not None: |
114 | return 'file{0}'.format(file_handle) | |
e7d49180 CH |
115 | else: |
116 | _create_description_unknown_counter += 1 | |
117 | return 'unknown{0}'.format(_create_description_unknown_counter) | |
118 | ||
119 | ||
120 | #: error message for IterativeReader constructor | |
121 | _STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \ | |
122 | 'files --> use with open(file_name)!' | |
123 | ||
124 | ||
879f0150 | 125 | class IterativeReader(object): |
e2e13005 CH |
126 | """ |
127 | Read continuously from a given file. | |
e7d49180 | 128 | |
e2e13005 CH |
129 | Use `os.stat(file_obj.fileno()).st_size` as measure whether file has |
130 | changed or not; Always reads as much data as possible. | |
e7d49180 | 131 | |
e2e13005 | 132 | Does not care about closing files, so does not accept file names. |
e7d49180 CH |
133 | |
134 | This is the base for class :py:class:`LineReader` that just has to | |
e2e13005 | 135 | implement a different :py:meth:`prepare_result` method. |
e7d49180 CH |
136 | """ |
137 | ||
545f3458 | 138 | def __init__(self, sources, descs=None, keep_watching=False): |
e2e13005 CH |
139 | """ |
140 | Create a reader; do some basic checks on args. | |
e7d49180 CH |
141 | |
142 | :param sources: iterable over sources. Sources can be opened file | |
143 | objects or read-opened os-level file descriptors. | |
144 | Calling code has to ensure they are closed properly, so | |
145 | best use this within a "with open(file_name) as | |
146 | file_handle:"-context. If sources is a single file | |
147 | obj/descriptor, both source and desc will be converted | |
148 | to lists of length 1 | |
149 | :param descs: can be anything of same length as sources. If sources is | |
150 | a single source, then descs is also converted to a list | |
151 | of length 1. If not given (i.e. None), will use | |
152 | :py:func:`create_description` to guess descriptions | |
545f3458 CH |
153 | :param bool keep_watching: keep watching file that is not changing in |
154 | size. Need to manually tell whether file | |
155 | is being written to or not since auto-detect | |
156 | is not implemented yet. | |
e7d49180 CH |
157 | :raises: OSError when testing fstat on source |
158 | """ | |
159 | if not sources: | |
160 | raise ValueError('need at least some source!') | |
161 | elif is_str_or_byte(sources): | |
162 | raise ValueError(_STR_ERR.format(sources)) | |
163 | elif is_file_obj(sources) or isinstance(sources, int): | |
164 | source_input = [sources, ] | |
165 | desc_input = [descs, ] | |
166 | else: | |
167 | source_input = sources # assume some iterable | |
168 | desc_input = descs | |
169 | ||
170 | # now divide sources into os-level file descriptors for os.fstat, | |
171 | # and file objects for read() | |
172 | self.file_objs = [] | |
3e0b3965 | 173 | self.file_handles = [] # file descriptOR, not descriptION |
e7d49180 CH |
174 | for source in source_input: |
175 | if is_file_obj(source): | |
176 | self.file_objs.append(source) | |
3e0b3965 | 177 | self.file_handles.append(source.fileno()) |
e7d49180 CH |
178 | elif isinstance(source, int): |
179 | self.file_objs.append(os.fdopen(source)) | |
3e0b3965 | 180 | self.file_handles.append(source) |
e7d49180 CH |
181 | elif is_str_or_byte(source): |
182 | raise ValueError(_STR_ERR.format(source)) | |
183 | else: | |
184 | raise ValueError('source {0} is neither file obj nor file ' | |
185 | 'descriptor!') | |
186 | ||
d910eba5 | 187 | # try to fstat the new file descriptor just for testing |
3e0b3965 | 188 | os.fstat(self.file_handles[-1]) |
e7d49180 CH |
189 | |
190 | # guess descriptions if not given | |
191 | if not desc_input: | |
3e0b3965 CH |
192 | self.descriptions = [create_description(obj, file_handle) |
193 | for obj, file_handle | |
194 | in zip(self.file_objs, self.file_handles)] | |
e7d49180 CH |
195 | else: |
196 | try: | |
197 | if len(desc_input) != len(self.file_objs): | |
198 | raise ValueError('need same number of sources and ' | |
199 | 'descriptions!') | |
200 | except TypeError: | |
201 | pass # desc_input is generator or so | |
202 | ||
203 | self.descriptions = [] | |
3e0b3965 CH |
204 | for obj, file_handle, description in \ |
205 | zip_longest(self.file_objs, self.file_handles, desc_input): | |
e7d49180 CH |
206 | if obj is None: |
207 | raise ValueError('more descriptions than sources!') | |
208 | elif description is None: | |
209 | self.descriptions.append(create_description(obj, | |
3e0b3965 | 210 | file_handle)) |
e7d49180 CH |
211 | else: |
212 | self.descriptions.append(description) | |
213 | ||
214 | self.last_sizes = [0 for _ in self.file_objs] | |
215 | self.ignore = [False for _ in self.file_objs] | |
216 | ||
545f3458 | 217 | if keep_watching: |
3e0b3965 | 218 | self.is_used_func = true_func |
545f3458 CH |
219 | else: |
220 | self.is_used_func = false_func | |
221 | # use some day: self.is_used_func = check_is_used | |
e7d49180 | 222 | |
3e0b3965 CH |
223 | for obj, file_handle, description in \ |
224 | zip(self.file_objs, self.file_handles, self.descriptions): | |
77a07d09 CH |
225 | logging.debug('log_read initialized with file descriptor {0}, ' |
226 | 'file obj {1}, description "{2}"' | |
3e0b3965 | 227 | .format(file_handle, obj, description)) |
e7d49180 CH |
228 | |
229 | def n_sources(self): | |
e2e13005 | 230 | """Return number of sources given to constructor.""" |
e7d49180 CH |
231 | return len(self.file_objs) |
232 | ||
233 | def n_active_sources(self): | |
e2e13005 | 234 | """Return number of sources we are actually watching.""" |
e7d49180 CH |
235 | return len(self.ignore) - sum(self.ignore) |
236 | ||
237 | def __iter__(self): | |
e2e13005 CH |
238 | """ |
239 | Continue reading from sources, yield results. | |
240 | ||
44353454 CH |
241 | yields result of :py:meth:`prepare_result`, which depends on what |
242 | subclass you called this function from. | |
e2e13005 | 243 | """ |
e7d49180 | 244 | while True: |
545f3458 CH |
245 | if all(self.ignore): |
246 | break | |
247 | ||
3e0b3965 CH |
248 | for idx, (obj, file_handle, description, last_size, do_ignore) in \ |
249 | enumerate(zip(self.file_objs, self.file_handles, | |
e7d49180 CH |
250 | self.descriptions, self.last_sizes, |
251 | self.ignore)): | |
e7d49180 CH |
252 | if do_ignore: |
253 | continue | |
254 | ||
255 | # get new file size | |
3e0b3965 | 256 | new_size = os.fstat(file_handle).st_size |
e7d49180 CH |
257 | |
258 | # compare to old size | |
259 | if new_size == last_size: | |
3e0b3965 | 260 | if not self.is_used_func(file_handle): |
acc472a4 | 261 | self.ignore[idx] = True |
9f2fbfa7 | 262 | else: |
ea8b01a3 CH |
263 | if new_size < last_size: # happened at start of some tests |
264 | warn('{0} / {1} has become smaller ({2} --> {3})! ' | |
265 | .format(obj, description, last_size, new_size) | |
266 | + 'Maybe you are reading from a half-initialized ' | |
267 | + 'file?', | |
9f2fbfa7 | 268 | category=LogReadWarning) |
e7d49180 CH |
269 | try: |
270 | new_data = obj.read() | |
271 | except OSError as ose: # includes IOErrors | |
272 | warn('io error reading from {0} / {1}: {2})' | |
273 | .format(obj, description, ose), | |
274 | category=LogReadWarning) | |
9451b2ce CH |
275 | new_data = str(ose) |
276 | except UnicodeDecodeError as ude: | |
277 | warn('unicode error reading from {0} / {1}: {2}' | |
278 | .format(obj, description, ude), | |
279 | category=LogReadWarning) | |
280 | new_data = str(ude) | |
e7d49180 CH |
281 | |
282 | # post-processing | |
44353454 | 283 | for result in self.prepare_result(description, new_data, idx): |
e7d49180 CH |
284 | yield result |
285 | ||
286 | # prepare next iteration | |
287 | self.last_sizes[idx] = new_size | |
288 | ||
289 | def prepare_result(self, description, data, idx): | |
e2e13005 CH |
290 | """ |
291 | From raw new data create some yield-able results. | |
e7d49180 | 292 | |
7628bc48 | 293 | Intended for overwriting in subclasses. |
e7d49180 | 294 | |
e2e13005 | 295 | This function is called from __iter__ for each new data that becomes |
44353454 | 296 | available. It has to provide results which are forwarded to caller. |
e7d49180 | 297 | |
44353454 CH |
298 | This base implementation just yields its input, so new data is yielded |
299 | from `__iter__` as-is. | |
e2e13005 CH |
300 | |
301 | :param str description: Description of source of lines, one of | |
302 | :py:data:`self.descriptions` | |
7628bc48 | 303 | :param str data: Text data read from source |
e2e13005 | 304 | :param idx: Index of data source |
44353454 | 305 | :returns: nothing but yields [(description, data, idx], same as input |
e7d49180 | 306 | """ |
44353454 | 307 | yield description, data, idx |
e7d49180 CH |
308 | |
309 | ||
e2e13005 | 310 | #: characters to `rstrip()` from end of complete lines |
e7d49180 CH |
311 | LINE_SPLITTERS = '\n\r' |
312 | ||
acc472a4 | 313 | |
e7d49180 | 314 | class LineReader(IterativeReader): |
e2e13005 CH |
315 | """ |
316 | An :py:class:`IterativeReader` that returns new data line-wise. | |
edd68a74 | 317 | |
e2e13005 | 318 | This means buffering partial line data. |
e7d49180 CH |
319 | """ |
320 | ||
321 | def __init__(self, *args, **kwargs): | |
e2e13005 | 322 | """Create an :py:class:`IterativeReader and buffers for sources.""" |
01fe1580 | 323 | super(LineReader, self).__init__(*args, **kwargs) |
d910eba5 | 324 | self.line_buffers = ['' for _ in range(self.n_sources())] |
e7d49180 | 325 | |
d910eba5 | 326 | def prepare_result(self, description, new_data, idx): |
e2e13005 CH |
327 | """ |
328 | Take raw new data and split it into lines. | |
e7d49180 | 329 | |
e2e13005 | 330 | If line is not complete, then buffer it. |
e7d49180 | 331 | |
e2e13005 | 332 | Args: see super class method :py:meth:`IterativeReader.prepare_result` |
aef38def CH |
333 | :returns: list of 3-tuples `(description, line, idx)` where |
334 | `description` and `idx` are same as args, and `line` is | |
e2e13005 | 335 | without trailing newline characters |
aef38def | 336 | :rtype: [(str, str, int)] |
e7d49180 | 337 | """ |
e7d49180 | 338 | all_data = self.line_buffers[idx] + new_data |
d910eba5 | 339 | self.line_buffers[idx] = '' |
e7d49180 | 340 | should_be_no_new_lines = False |
879f0150 | 341 | for line in all_data.splitlines(True): |
e7d49180 | 342 | if line[-1] in LINE_SPLITTERS: |
44353454 | 343 | yield description, line.rstrip(LINE_SPLITTERS), idx |
e7d49180 | 344 | elif should_be_no_new_lines: |
37288ebb CH |
345 | # self-check |
346 | raise ValueError('Programming error: something went wrong with ' | |
347 | 'line splitting/buffering.') | |
e7d49180 CH |
348 | else: |
349 | self.line_buffers[idx] = line | |
350 | should_be_no_new_lines = True # (this should be the last) | |
351 | ||
e7d49180 | 352 | |
445afb23 | 353 | class LogParser(LineReader): |
e2e13005 CH |
354 | """ |
355 | Takes lines from :py:class:`LineReader` and parses their contents. | |
445afb23 | 356 | |
e2e13005 | 357 | Requires a pattern for log lines, auto-detection is not implemented yet. |
445afb23 | 358 | |
545f3458 | 359 | Iteration returns :py:class:`re.match` result or -- if matching failed -- |
44353454 CH |
360 | None. The latest unparsed line is available as `self.last_unparsed_line`. |
361 | Usage recommendation: | |
545f3458 CH |
362 | |
363 | with open(log_file_name, 'rt') as file_handle: | |
44353454 CH |
364 | parser = log_read.LogParser(file_handle, pattern=my_pattern): |
365 | for _, data, _ in parser: | |
366 | if data is None: | |
367 | print(f'Failed to parse line {parser.last_unparsed_line}') | |
545f3458 | 368 | continue |
44353454 | 369 | line_parts = data.groupdict() |
545f3458 | 370 | ...do stuff with line_parts... |
445afb23 CH |
371 | """ |
372 | ||
373 | def __init__(self, log_file, pattern=None): | |
e2e13005 CH |
374 | """ |
375 | Create a LogParser. | |
445afb23 CH |
376 | |
377 | :param str log_file: name of log file to parse (required!) | |
378 | :param pattern: regexp to split log lines; None (default) to return | |
379 | line as they are | |
e2e13005 | 380 | :type pattern: str or None (default) |
445afb23 CH |
381 | """ |
382 | super(LogParser, self).__init__(log_file) | |
383 | ||
384 | self.pattern = pattern | |
44353454 | 385 | self.last_unparsed_line = '' |
445afb23 CH |
386 | |
387 | def prepare_result(self, *args): | |
e2e13005 CH |
388 | """ |
389 | Try parsing lines. | |
390 | ||
391 | Args: see super class method :py:meth:`IterativeReader.prepare_result` | |
aef38def CH |
392 | :returns: 3-tuples `(description, line, idx)` where `description` and |
393 | `idx` are same as input args and `line` is either a | |
394 | :py:class:`re.Match` if line matched :py:data:`self.pattern` | |
395 | or just str if line did not match. | |
396 | :rtype: [(str, :py:class:`re.Match` OR str, int)] | |
e2e13005 | 397 | """ |
445afb23 | 398 | # let super class split data into lines |
aef38def CH |
399 | for description, raw_line, idx in \ |
400 | super(LogParser, self).prepare_result(*args): | |
1a537f05 CH |
401 | matches = re.match(self.pattern, raw_line) |
402 | if matches: | |
44353454 | 403 | yield description, matches, idx |
445afb23 | 404 | else: |
44353454 CH |
405 | self.last_unparsed_line = raw_line |
406 | yield description, None, idx | |
a51235e2 CH |
407 | |
408 | @classmethod | |
409 | @contextmanager | |
410 | def create_for(cls, filename, *args, **kwargs): | |
411 | """ | |
412 | Open single file, yield LogParser. Ensures file is closed afterwards. | |
413 | ||
414 | This allows opening file and creation LogParser for it to one line: | |
415 | ||
416 | with LogParser.create_for('/var/log/messages', SYS_LOG_PATTERN) as parser: | |
417 | for _, matches, _ in parser: | |
418 | try: | |
419 | print(matches.groupdict()) | |
420 | except Exception: | |
421 | print(f'UNPARSED: {parser.last_unparsed_line}') | |
422 | ||
423 | :param str filename: something that :py:meth:`open` accepts | |
424 | :param args: Forwarded to constructor | |
425 | :param kwargs: Forwarded to constructor | |
426 | """ | |
427 | with open(filename) as file_handle: | |
428 | yield cls(file_handle, *args, **kwargs) |