Commit | Line | Data |
---|---|---|
3237d2a6 CH |
1 | # The software in this package is distributed under the GNU General |
2 | # Public License version 2 (with a special exception described below). | |
3 | # | |
4 | # A copy of GNU General Public License (GPL) is included in this distribution, | |
5 | # in the file COPYING.GPL. | |
6 | # | |
7 | # As a special exception, if other files instantiate templates or use macros | |
8 | # or inline functions from this file, or you compile this file and link it | |
9 | # with other works to produce a work based on this file, this file | |
10 | # does not by itself cause the resulting work to be covered | |
11 | # by the GNU General Public License. | |
12 | # | |
13 | # However the source code for this file must still be made available | |
14 | # in accordance with section (3) of the GNU General Public License. | |
15 | # | |
16 | # This exception does not invalidate any other reasons why a work based | |
17 | # on this file might be covered by the GNU General Public License. | |
f365f614 CH |
18 | # |
19 | # Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com> | |
3237d2a6 | 20 | |
e2e13005 CH |
21 | """ |
22 | ||
23 | SUMMARY | |
24 | ------------------------------------------------------ | |
25 | Iterative reading of log files, similar to shell command `tail -f`. | |
26 | ||
27 | Copyright: Intra2net AG | |
28 | ||
29 | ||
30 | CONTENTS | |
31 | ------------------------------------------------------ | |
e7d49180 | 32 | |
7c362208 | 33 | Basic Functionality (class :py:class:`IterativeReader`): |
e7d49180 CH |
34 | Runs stat in a loop to find out whether file size has changed. Then reads the |
35 | new data and forwards that | |
36 | ||
e2e13005 CH |
37 | .. todo:: Want to also use lsof to find out whether file/pipe/socket was |
38 | closed, so can return from read loop | |
e7d49180 CH |
39 | |
40 | :py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns | |
41 | it line-wise as is normal for log files | |
42 | ||
43 | :py:class:`LogParser` takes those lines and tries to parse them into fields | |
44 | like date, time, module name, urgency and message. | |
45 | ||
2713b352 | 46 | .. todo:: auto-detect log line layout |
e2e13005 CH |
47 | |
48 | ||
49 | INTERFACE | |
50 | ------------------------------------------------------ | |
51 | ||
e7d49180 CH |
52 | """ |
53 | ||
54 | import os | |
215a12fd | 55 | import os.path |
445afb23 | 56 | import re |
e7d49180 | 57 | from warnings import warn |
77a07d09 | 58 | import logging |
1242b1cf | 59 | from .iter_helpers import zip_longest |
1242b1cf | 60 | from .type_helpers import is_str_or_byte, is_file_obj |
e7d49180 CH |
61 | |
62 | ||
63 | class LogReadWarning(UserWarning): | |
e2e13005 | 64 | """Warnings issued by classes in this module.""" |
e7d49180 CH |
65 | pass |
66 | ||
67 | ||
e2e13005 CH |
68 | def true_func(_): |
69 | """Replacement for :py:func:`check_is_used`. Returns `True` always.""" | |
e7d49180 CH |
70 | return True |
71 | ||
72 | ||
e2e13005 CH |
73 | def check_is_used(file_handle): |
74 | """ | |
75 | Check whether file is being written to. | |
76 | ||
77 | To be implemented, e.g. using lsof. | |
e7d49180 | 78 | |
e2e13005 CH |
79 | If beneficial could also easily supply python file object as arg. |
80 | ||
81 | :param int file_handle: OS-level file descriptor | |
e7d49180 | 82 | """ |
e2e13005 | 83 | raise NotImplementedError(file_handle) |
e7d49180 CH |
84 | |
85 | ||
e2e13005 | 86 | #: counter for unknown sources in :py:func:`create_description` |
e7d49180 CH |
87 | _create_description_unknown_counter = 0 |
88 | ||
acc472a4 | 89 | |
3e0b3965 | 90 | def create_description(file_obj, file_handle): |
e2e13005 CH |
91 | """ |
92 | Create some description for given file-like object / file descriptor. | |
e7d49180 CH |
93 | |
94 | :param file_obj: file-like object | |
3e0b3965 | 95 | :param int file_handle: os-level file descriptor |
e2e13005 CH |
96 | :returns: Short description for file-like object |
97 | :rtype: string | |
e7d49180 | 98 | """ |
e7d49180 CH |
99 | global _create_description_unknown_counter |
100 | ||
101 | try: | |
102 | desc = file_obj.name | |
103 | if desc: | |
104 | return desc | |
105 | except AttributeError: | |
106 | pass | |
107 | ||
3e0b3965 CH |
108 | if file_handle is not None: |
109 | return 'file{0}'.format(file_handle) | |
e7d49180 CH |
110 | else: |
111 | _create_description_unknown_counter += 1 | |
112 | return 'unknown{0}'.format(_create_description_unknown_counter) | |
113 | ||
114 | ||
115 | #: error message for IterativeReader constructor | |
116 | _STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \ | |
117 | 'files --> use with open(file_name)!' | |
118 | ||
119 | ||
879f0150 | 120 | class IterativeReader(object): |
e2e13005 CH |
121 | """ |
122 | Read continuously from a given file. | |
e7d49180 | 123 | |
e2e13005 CH |
124 | Use `os.stat(file_obj.fileno()).st_size` as measure whether file has |
125 | changed or not; Always reads as much data as possible. | |
e7d49180 | 126 | |
e2e13005 | 127 | Does not care about closing files, so does not accept file names. |
e7d49180 CH |
128 | |
129 | This is the base for class :py:class:`LineReader` that just has to | |
e2e13005 | 130 | implement a different :py:meth:`prepare_result` method. |
e7d49180 CH |
131 | """ |
132 | ||
133 | def __init__(self, sources, descs=None, return_when_done=False): | |
e2e13005 CH |
134 | """ |
135 | Create a reader; do some basic checks on args. | |
e7d49180 CH |
136 | |
137 | :param sources: iterable over sources. Sources can be opened file | |
138 | objects or read-opened os-level file descriptors. | |
139 | Calling code has to ensure they are closed properly, so | |
140 | best use this within a "with open(file_name) as | |
141 | file_handle:"-context. If sources is a single file | |
142 | obj/descriptor, both source and desc will be converted | |
143 | to lists of length 1 | |
144 | :param descs: can be anything of same length as sources. If sources is | |
145 | a single source, then descs is also converted to a list | |
146 | of length 1. If not given (i.e. None), will use | |
147 | :py:func:`create_description` to guess descriptions | |
148 | :param bool return_when_done: ignore file_handle if no-one is writing | |
149 | to it any more. Return from iterator when | |
150 | all watched files are done (not | |
151 | implemented yet) | |
152 | :raises: OSError when testing fstat on source | |
153 | """ | |
154 | if not sources: | |
155 | raise ValueError('need at least some source!') | |
156 | elif is_str_or_byte(sources): | |
157 | raise ValueError(_STR_ERR.format(sources)) | |
158 | elif is_file_obj(sources) or isinstance(sources, int): | |
159 | source_input = [sources, ] | |
160 | desc_input = [descs, ] | |
161 | else: | |
162 | source_input = sources # assume some iterable | |
163 | desc_input = descs | |
164 | ||
165 | # now divide sources into os-level file descriptors for os.fstat, | |
166 | # and file objects for read() | |
167 | self.file_objs = [] | |
3e0b3965 | 168 | self.file_handles = [] # file descriptOR, not descriptION |
e7d49180 CH |
169 | for source in source_input: |
170 | if is_file_obj(source): | |
171 | self.file_objs.append(source) | |
3e0b3965 | 172 | self.file_handles.append(source.fileno()) |
e7d49180 CH |
173 | elif isinstance(source, int): |
174 | self.file_objs.append(os.fdopen(source)) | |
3e0b3965 | 175 | self.file_handles.append(source) |
e7d49180 CH |
176 | elif is_str_or_byte(source): |
177 | raise ValueError(_STR_ERR.format(source)) | |
178 | else: | |
179 | raise ValueError('source {0} is neither file obj nor file ' | |
180 | 'descriptor!') | |
181 | ||
d910eba5 | 182 | # try to fstat the new file descriptor just for testing |
3e0b3965 | 183 | os.fstat(self.file_handles[-1]) |
e7d49180 CH |
184 | |
185 | # guess descriptions if not given | |
186 | if not desc_input: | |
3e0b3965 CH |
187 | self.descriptions = [create_description(obj, file_handle) |
188 | for obj, file_handle | |
189 | in zip(self.file_objs, self.file_handles)] | |
e7d49180 CH |
190 | else: |
191 | try: | |
192 | if len(desc_input) != len(self.file_objs): | |
193 | raise ValueError('need same number of sources and ' | |
194 | 'descriptions!') | |
195 | except TypeError: | |
196 | pass # desc_input is generator or so | |
197 | ||
198 | self.descriptions = [] | |
3e0b3965 CH |
199 | for obj, file_handle, description in \ |
200 | zip_longest(self.file_objs, self.file_handles, desc_input): | |
e7d49180 CH |
201 | if obj is None: |
202 | raise ValueError('more descriptions than sources!') | |
203 | elif description is None: | |
204 | self.descriptions.append(create_description(obj, | |
3e0b3965 | 205 | file_handle)) |
e7d49180 CH |
206 | else: |
207 | self.descriptions.append(description) | |
208 | ||
209 | self.last_sizes = [0 for _ in self.file_objs] | |
210 | self.ignore = [False for _ in self.file_objs] | |
211 | ||
212 | if return_when_done: | |
3e0b3965 | 213 | self.is_used_func = check_is_used |
e7d49180 | 214 | else: |
3e0b3965 | 215 | self.is_used_func = true_func |
e7d49180 | 216 | |
3e0b3965 CH |
217 | for obj, file_handle, description in \ |
218 | zip(self.file_objs, self.file_handles, self.descriptions): | |
77a07d09 CH |
219 | logging.debug('log_read initialized with file descriptor {0}, ' |
220 | 'file obj {1}, description "{2}"' | |
3e0b3965 | 221 | .format(file_handle, obj, description)) |
e7d49180 CH |
222 | |
223 | def n_sources(self): | |
e2e13005 | 224 | """Return number of sources given to constructor.""" |
e7d49180 CH |
225 | return len(self.file_objs) |
226 | ||
227 | def n_active_sources(self): | |
e2e13005 | 228 | """Return number of sources we are actually watching.""" |
e7d49180 CH |
229 | return len(self.ignore) - sum(self.ignore) |
230 | ||
231 | def __iter__(self): | |
e2e13005 CH |
232 | """ |
233 | Continue reading from sources, yield results. | |
234 | ||
235 | yields result of :py:meth:`prepare_result`, which depends on what sub | |
236 | class you called this function from. | |
237 | """ | |
e7d49180 | 238 | while True: |
3e0b3965 CH |
239 | for idx, (obj, file_handle, description, last_size, do_ignore) in \ |
240 | enumerate(zip(self.file_objs, self.file_handles, | |
e7d49180 CH |
241 | self.descriptions, self.last_sizes, |
242 | self.ignore)): | |
e7d49180 CH |
243 | if do_ignore: |
244 | continue | |
245 | ||
246 | # get new file size | |
3e0b3965 | 247 | new_size = os.fstat(file_handle).st_size |
e7d49180 CH |
248 | |
249 | # compare to old size | |
250 | if new_size == last_size: | |
3e0b3965 | 251 | if not self.is_used_func(file_handle): |
e7d49180 CH |
252 | warn('no one is writing to {0} / {1} -- ' |
253 | 'stop watching it!' | |
3e0b3965 | 254 | .format(file_handle, description), |
e7d49180 | 255 | category=LogReadWarning) |
acc472a4 | 256 | self.ignore[idx] = True |
9f2fbfa7 | 257 | else: |
ea8b01a3 CH |
258 | if new_size < last_size: # happened at start of some tests |
259 | warn('{0} / {1} has become smaller ({2} --> {3})! ' | |
260 | .format(obj, description, last_size, new_size) | |
261 | + 'Maybe you are reading from a half-initialized ' | |
262 | + 'file?', | |
9f2fbfa7 | 263 | category=LogReadWarning) |
e7d49180 CH |
264 | try: |
265 | new_data = obj.read() | |
266 | except OSError as ose: # includes IOErrors | |
267 | warn('io error reading from {0} / {1}: {2})' | |
268 | .format(obj, description, ose), | |
269 | category=LogReadWarning) | |
9451b2ce CH |
270 | new_data = str(ose) |
271 | except UnicodeDecodeError as ude: | |
272 | warn('unicode error reading from {0} / {1}: {2}' | |
273 | .format(obj, description, ude), | |
274 | category=LogReadWarning) | |
275 | new_data = str(ude) | |
e7d49180 CH |
276 | |
277 | # post-processing | |
278 | to_yield = self.prepare_result(description, new_data, idx) | |
279 | for result in to_yield: | |
280 | yield result | |
281 | ||
282 | # prepare next iteration | |
283 | self.last_sizes[idx] = new_size | |
284 | ||
285 | def prepare_result(self, description, data, idx): | |
e2e13005 CH |
286 | """ |
287 | From raw new data create some yield-able results. | |
e7d49180 | 288 | |
e2e13005 | 289 | Intended for overwriting in sub-classes. |
e7d49180 | 290 | |
e2e13005 | 291 | This function is called from __iter__ for each new data that becomes |
e7d49180 CH |
292 | available. It has to return some iterable whose entries are yielded |
293 | from iteration over objects of this class. | |
294 | ||
7c362208 | 295 | The result must be an iterable of objects, which are yielded as-is, so |
e2e13005 | 296 | can have any form. |
7c362208 | 297 | |
e7d49180 | 298 | This base implementation just returns its input in a list, so new data |
e2e13005 CH |
299 | is yielded from __iter__ as-is. |
300 | ||
301 | Subclass implementations can also yield tuples. | |
302 | ||
303 | :param str description: Description of source of lines, one of | |
304 | :py:data:`self.descriptions` | |
305 | :param str new_data: Text data read from source | |
306 | :param idx: Index of data source | |
aef38def CH |
307 | :returns: [(description, data, idx], same as input |
308 | :rtype [(str, str, int)] | |
e7d49180 | 309 | """ |
aef38def | 310 | return [(description, data, idx), ] |
e7d49180 CH |
311 | |
312 | ||
e2e13005 | 313 | #: characters to `rstrip()` from end of complete lines |
e7d49180 CH |
314 | LINE_SPLITTERS = '\n\r' |
315 | ||
acc472a4 | 316 | |
e7d49180 | 317 | class LineReader(IterativeReader): |
e2e13005 CH |
318 | """ |
319 | An :py:class:`IterativeReader` that returns new data line-wise. | |
edd68a74 | 320 | |
e2e13005 | 321 | This means buffering partial line data. |
e7d49180 CH |
322 | """ |
323 | ||
324 | def __init__(self, *args, **kwargs): | |
e2e13005 | 325 | """Create an :py:class:`IterativeReader and buffers for sources.""" |
01fe1580 | 326 | super(LineReader, self).__init__(*args, **kwargs) |
d910eba5 | 327 | self.line_buffers = ['' for _ in range(self.n_sources())] |
e7d49180 | 328 | |
d910eba5 | 329 | def prepare_result(self, description, new_data, idx): |
e2e13005 CH |
330 | """ |
331 | Take raw new data and split it into lines. | |
e7d49180 | 332 | |
e2e13005 | 333 | If line is not complete, then buffer it. |
e7d49180 | 334 | |
e2e13005 | 335 | Args: see super class method :py:meth:`IterativeReader.prepare_result` |
aef38def CH |
336 | :returns: list of 3-tuples `(description, line, idx)` where |
337 | `description` and `idx` are same as args, and `line` is | |
e2e13005 | 338 | without trailing newline characters |
aef38def | 339 | :rtype: [(str, str, int)] |
e7d49180 | 340 | """ |
e7d49180 | 341 | all_data = self.line_buffers[idx] + new_data |
d910eba5 | 342 | self.line_buffers[idx] = '' |
e7d49180 CH |
343 | result = [] |
344 | should_be_no_new_lines = False | |
879f0150 | 345 | for line in all_data.splitlines(True): |
e7d49180 | 346 | if line[-1] in LINE_SPLITTERS: |
aef38def | 347 | result.append((description, line.rstrip(LINE_SPLITTERS), idx)) |
e7d49180 | 348 | elif should_be_no_new_lines: |
37288ebb CH |
349 | # self-check |
350 | raise ValueError('Programming error: something went wrong with ' | |
351 | 'line splitting/buffering.') | |
e7d49180 CH |
352 | else: |
353 | self.line_buffers[idx] = line | |
354 | should_be_no_new_lines = True # (this should be the last) | |
355 | ||
356 | return result | |
357 | ||
358 | ||
445afb23 | 359 | class LogParser(LineReader): |
e2e13005 CH |
360 | """ |
361 | Takes lines from :py:class:`LineReader` and parses their contents. | |
445afb23 | 362 | |
e2e13005 | 363 | Requires a pattern for log lines, auto-detection is not implemented yet. |
445afb23 CH |
364 | |
365 | Iteration returns re.match result or -- if matching failed -- the original | |
e2e13005 | 366 | raw line. |
445afb23 CH |
367 | """ |
368 | ||
369 | def __init__(self, log_file, pattern=None): | |
e2e13005 CH |
370 | """ |
371 | Create a LogParser. | |
445afb23 CH |
372 | |
373 | :param str log_file: name of log file to parse (required!) | |
374 | :param pattern: regexp to split log lines; None (default) to return | |
375 | line as they are | |
e2e13005 | 376 | :type pattern: str or None (default) |
445afb23 CH |
377 | """ |
378 | super(LogParser, self).__init__(log_file) | |
379 | ||
380 | self.pattern = pattern | |
381 | ||
382 | def prepare_result(self, *args): | |
e2e13005 CH |
383 | """ |
384 | Try parsing lines. | |
385 | ||
386 | Args: see super class method :py:meth:`IterativeReader.prepare_result` | |
aef38def CH |
387 | :returns: 3-tuples `(description, line, idx)` where `description` and |
388 | `idx` are same as input args and `line` is either a | |
389 | :py:class:`re.Match` if line matched :py:data:`self.pattern` | |
390 | or just str if line did not match. | |
391 | :rtype: [(str, :py:class:`re.Match` OR str, int)] | |
e2e13005 | 392 | """ |
445afb23 | 393 | # let super class split data into lines |
aef38def CH |
394 | for description, raw_line, idx in \ |
395 | super(LogParser, self).prepare_result(*args): | |
445afb23 CH |
396 | result = re.match(self.pattern, raw_line) |
397 | if result: | |
aef38def | 398 | return (description, result, idx) |
445afb23 | 399 | else: |
aef38def | 400 | return (description, raw_line, idx) |