[pyi2ncommon] / src / log_read.py

# The software in this package is distributed under the GNU General
# Public License version 2 (with a special exception described below).
#
# A copy of GNU General Public License (GPL) is included in this distribution,
# in the file COPYING.GPL.
#
# As a special exception, if other files instantiate templates or use macros
# or inline functions from this file, or you compile this file and link it
# with other works to produce a work based on this file, this file
# does not by itself cause the resulting work to be covered
# by the GNU General Public License.
#
# However the source code for this file must still be made available
# in accordance with section (3) of the GNU General Public License.
#
# This exception does not invalidate any other reasons why a work based
# on this file might be covered by the GNU General Public License.
#
# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>

"""
Iterative reading of log files, similar to shell command `tail -f`.

Copyright: Intra2net AG

Basic Functionality (class :py:class:`IterativeReader`):
Runs stat in a loop to find out whether file size has changed. Then reads the
new data and forwards that

.. todo:: Want to also use lsof to find out whether file/pipe/socket was
          closed, so can automatically return from read loop.

:py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
it line-wise as is normal for log files

:py:class:`LogParser` takes those lines and tries to parse them into fields
like date, time, module name, urgency and message.

.. todo:: auto-detect log line layout
"""
import os
import os.path
import re
from warnings import warn
import logging
from contextlib import contextmanager
from .iter_helpers import zip_longest
from .type_helpers import is_str_or_byte, is_file_obj


class LogReadWarning(UserWarning):
    """Warnings issued by classes in this module."""
    pass


def true_func(_):
    """Replacement for :py:func:`check_is_used`. Returns `True` always."""
    return True


def false_func(_):
    """Replacement for :py:func:`check_is_used`. Returns `False` always."""
    return False


def check_is_used(file_handle):
    """
    Check whether file is being written to.

    To be implemented, e.g. using lsof.

    If beneficial could also easily supply python file object as arg.

    :param int file_handle: OS-level file descriptor
    """
    raise NotImplementedError(file_handle)


#: counter for unknown sources in :py:func:`create_description`
_create_description_unknown_counter = 0


def create_description(file_obj, file_handle):
    """
    Create some description for given file-like object / file descriptor.

    :param file_obj: file-like object
    :param int file_handle: os-level file descriptor
    :returns: Short description for file-like object
    :rtype: string
    """
    global _create_description_unknown_counter

    try:
        desc = file_obj.name
        if desc:
            return desc
    except AttributeError:
        pass

    if file_handle is not None:
        return 'file{0}'.format(file_handle)
    else:
        _create_description_unknown_counter += 1
        return 'unknown{0}'.format(_create_description_unknown_counter)


#: error message for IterativeReader constructor
_STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
           'files --> use with open(file_name)!'


class IterativeReader(object):
    """
    Read continuously from a given file.

    Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
    changed or not; Always reads as much data as possible.

    Does not care about closing files, so does not accept file names.

    This is the base for class :py:class:`LineReader` that just has to
    implement a different :py:meth:`prepare_result` method.
    """

    def __init__(self, sources, descs=None, keep_watching=False):
        """
        Create a reader; do some basic checks on args.

        :param sources: iterable over sources. Sources can be opened file
                        objects or read-opened os-level file descriptors.
                        Calling code has to ensure they are closed properly, so
                        best use this within a "with open(file_name) as
                        file_handle:"-context. If sources is a single file
                        obj/descriptor, both source and desc will be converted
                        to lists of length 1
        :param descs: can be anything of same length as sources. If sources is
                      a single source, then descs is also converted to a list
                      of length 1. If not given (i.e. None), will use
                      :py:func:`create_description` to guess descriptions
        :param bool keep_watching: keep watching file that is not changing in
                                   size. Need to manually tell whether file
                                   is being written to or not since auto-detect
                                   is not implemented yet.
        :raises: OSError when testing fstat on source
        """
        if not sources:
            raise ValueError('need at least some source!')
        elif is_str_or_byte(sources):
            raise ValueError(_STR_ERR.format(sources))
        elif is_file_obj(sources) or isinstance(sources, int):
            source_input = [sources, ]
            desc_input = [descs, ]
        else:
            source_input = sources  # assume some iterable
            desc_input = descs

        # now divide sources into os-level file descriptors for os.fstat,
        # and file objects for read()
        self.file_objs = []
        self.file_handles = []          # file descriptOR, not descriptION
        for source in source_input:
            if is_file_obj(source):
                self.file_objs.append(source)
                self.file_handles.append(source.fileno())
            elif isinstance(source, int):
                self.file_objs.append(os.fdopen(source))
                self.file_handles.append(source)
            elif is_str_or_byte(source):
                raise ValueError(_STR_ERR.format(source))
            else:
                raise ValueError('source {0} is neither file obj nor file '
                                 'descriptor!')

            # try to fstat the new file descriptor just for testing
            os.fstat(self.file_handles[-1])

        # guess descriptions if not given
        if not desc_input:
            self.descriptions = [create_description(obj, file_handle)
                                 for obj, file_handle
                                 in zip(self.file_objs, self.file_handles)]
        else:
            try:
                if len(desc_input) != len(self.file_objs):
                    raise ValueError('need same number of sources and '
                                     'descriptions!')
            except TypeError:
                pass  # desc_input is generator or so

            self.descriptions = []
            for obj, file_handle, description in \
                    zip_longest(self.file_objs, self.file_handles, desc_input):
                if obj is None:
                    raise ValueError('more descriptions than sources!')
                elif description is None:
                    self.descriptions.append(create_description(obj,
                                                                file_handle))
                else:
                    self.descriptions.append(description)

        self.last_sizes = [0 for _ in self.file_objs]
        self.ignore = [False for _ in self.file_objs]

        if keep_watching:
            self.is_used_func = true_func
        else:
            self.is_used_func = false_func
        # use some day: self.is_used_func = check_is_used

        for obj, file_handle, description in \
                zip(self.file_objs, self.file_handles, self.descriptions):
            logging.debug('log_read initialized with file descriptor {0}, '
                          'file obj {1}, description "{2}"'
                          .format(file_handle, obj, description))

    def n_sources(self):
        """Return number of sources given to constructor."""
        return len(self.file_objs)

    def n_active_sources(self):
        """Return number of sources we are actually watching."""
        return len(self.ignore) - sum(self.ignore)

    def __iter__(self):
        """
        Continue reading from sources, yield results.

        yields result of :py:meth:`prepare_result`, which depends on what
        subclass you called this function from.
        """
        while True:
            if all(self.ignore):
                break

            for idx, (obj, file_handle, description, last_size, do_ignore) in \
                    enumerate(zip(self.file_objs, self.file_handles,
                                  self.descriptions, self.last_sizes,
                                  self.ignore)):
                if do_ignore:
                    continue

                # get new file size
                new_size = os.fstat(file_handle).st_size

                # compare to old size
                if new_size == last_size:
                    if not self.is_used_func(file_handle):
                        self.ignore[idx] = True
                else:
                    if new_size < last_size:  # happened at start of some tests
                        warn('{0} / {1} has become smaller ({2} --> {3})! '
                             .format(obj, description, last_size, new_size)
                             + 'Maybe you are reading from a half-initialized '
                             + 'file?',
                             category=LogReadWarning)
                    try:
                        new_data = obj.read()
                    except OSError as ose:    # includes IOErrors
                        warn('io error reading from {0} / {1}: {2})'
                             .format(obj, description, ose),
                             category=LogReadWarning)
                        new_data = str(ose)
                    except UnicodeDecodeError as ude:
                        warn('unicode error reading from {0} / {1}: {2}'
                             .format(obj, description, ude),
                             category=LogReadWarning)
                        new_data = str(ude)

                    # post-processing
                    for result in self.prepare_result(description, new_data, idx):
                        yield result

                    # prepare next iteration
                    self.last_sizes[idx] = new_size

    def prepare_result(self, description, data, idx):
        """
        From raw new data create some yield-able results.

        Intended for overwriting in subclasses.

        This function is called from `__iter__` for each new data that becomes
        available. It has to provide results which are forwarded to caller.

        This base implementation just yields its input, so new data is yielded
        from `__iter__` as-is.

        :param str description: Description of source of lines, one of
                                :py:data:`self.descriptions`
        :param str data: Text data read from source
        :param idx: Index of data source
        :returns: nothing but yields [(description, data, idx)], same as input
        """
        yield description, data, idx


#: characters to `rstrip()` from end of complete lines
LINE_SPLITTERS = '\n\r'


class LineReader(IterativeReader):
    """
    An :py:class:`IterativeReader` that returns new data line-wise.

    This means buffering partial line data.
    """

    def __init__(self, *args, **kwargs):
        """Create an :py:class:`IterativeReader and buffers for sources."""
        super(LineReader, self).__init__(*args, **kwargs)
        self.line_buffers = ['' for _ in range(self.n_sources())]

    def prepare_result(self, description, new_data, idx):
        """
        Take raw new data and split it into lines.

        If line is not complete, then buffer it.

        Args: see super class method :py:meth:`IterativeReader.prepare_result`

        :returns: list of 3-tuples `(description, line, idx)` where
                  `description` and `idx` are same as args, and `line` is
                  without trailing newline characters
        :rtype: [(str, str, int)]
        """
        all_data = self.line_buffers[idx] + new_data
        self.line_buffers[idx] = ''
        should_be_no_new_lines = False
        for line in all_data.splitlines(True):
            if line[-1] in LINE_SPLITTERS:
                yield description, line.rstrip(LINE_SPLITTERS), idx
            elif should_be_no_new_lines:
                # self-check
                raise ValueError('Programming error: something went wrong with '
                                 'line splitting/buffering.')
            else:
                self.line_buffers[idx] = line
                should_be_no_new_lines = True  # (this should be the last)


class LogParser(LineReader):
    """
    Takes lines from :py:class:`LineReader` and parses their contents.

    Requires a pattern for log lines, auto-detection is not implemented yet.

    Iteration returns :py:class:`re.match` result or -- if matching failed --
    None. The latest unparsed line is available as `self.last_unparsed_line`.
    Usage recommendation::

        with open(log_file_name, 'rt') as file_handle:
            parser = log_read.LogParser(file_handle, pattern=my_pattern):
            for _, data, _ in parser:
                if data is None:
                    print(f'Failed to parse line {parser.last_unparsed_line}')
                    continue
                line_parts = data.groupdict()
                ...do stuff with line_parts...
    """

    def __init__(self, log_file, pattern=None):
        """
        Create a LogParser.

        :param log_file: source of log lines to parse
        :type log_file: see arg `sources` of constructor of :py:class:`IterativeReader`
        :param pattern: regexp to parse log lines; None (default) to return
                        line as they are
        :type pattern: str or None (default)
        """
        super(LogParser, self).__init__(log_file)

        self.pattern = pattern
        self.last_unparsed_line = ''

    def prepare_result(self, *args):
        """
        Try parsing lines.

        Args: see super class method :py:meth:`IterativeReader.prepare_result`

        :returns: 3-tuples `(description, line, idx)` where `description` and
                  `idx` are same as input args and `line` is either a
                  :py:class:`re.Match` if line matched :py:data:`self.pattern`
                  or just str if line did not match.
        :rtype: [(str, :py:class:`re.Match` OR str, int)]
        """
        # let super class split data into lines
        for description, raw_line, idx in \
                super(LogParser, self).prepare_result(*args):
            matches = re.match(self.pattern, raw_line)
            if matches:
                yield description, matches, idx
            else:
                self.last_unparsed_line = raw_line
                yield description, None, idx

    @classmethod
    @contextmanager
    def create_for(cls, filename, *args, **kwargs):
        """
        Open single file, yield LogParser. Ensures file is closed afterwards.

        This allows opening file and creation LogParser for it to one line::

            with LogParser.create_for('/var/log/messages', SYS_LOG_PATTERN) as parser:
                for _, matches, _ in parser:
                    try:
                        print(matches.groupdict())
                    except Exception:
                        print(f'UNPARSED: {parser.last_unparsed_line}')

        :param str filename: something that :py:meth:`open` accepts
        :param args: Forwarded to constructor
        :param kwargs: Forwarded to constructor
        """
        with open(filename) as file_handle:
            yield cls(file_handle, *args, **kwargs)


################################################################################
# PATTERNS FOR FREQUENT LOG FILES
################################################################################

# pattern of squid proxy logs. group names are best guesses
PROXY_LOG_PATTERN = \
    r'\s*(?P<timestamp>\d+\.\d+\.\d+\s+\d+:\d+:\d+|\d+\.\d+)\s+(?P<size1>\d+)\s+' \
    + r'(?P<ip>\d+\.\d+\.\d+\.\d+)\s+(?P<status_text>[A-Z_]+)/(?P<status_code>\d+)\s+' \
    + r'(?P<size2>\d+)\s+(?P<command>\S+)\s+(?P<url>\S+)\s+(?P<user>\S+)\s+' \
    + r'(?P<action>[A-Z_]+)/(?P<origin>\S+)\s+(?P<mimetype>\S+)\s+(?P<unknown>.*)\s*'

# pattern for linux system logs (usually "messages" or "syslog" also "maillog"
SYS_LOG_PATTERN = \
    r'\s*(?P<timestamp>\w{3} +\d{1,2} \d{2}:\d{2}:\d{2}) (?P<hostname>\S+) ' \
    + r'(?P<procname>[^\[\]:]+)(?:\[(?P<pid>\d+)\])?: (?P<message>.*)'
Commit	Line	Data
	1	# The software in this package is distributed under the GNU General
	2	# Public License version 2 (with a special exception described below).
	3	#
	4	# A copy of GNU General Public License (GPL) is included in this distribution,
	5	# in the file COPYING.GPL.
	6	#
	7	# As a special exception, if other files instantiate templates or use macros
	8	# or inline functions from this file, or you compile this file and link it
	9	# with other works to produce a work based on this file, this file
	10	# does not by itself cause the resulting work to be covered
	11	# by the GNU General Public License.
	12	#
	13	# However the source code for this file must still be made available
	14	# in accordance with section (3) of the GNU General Public License.
	15	#
	16	# This exception does not invalidate any other reasons why a work based
	17	# on this file might be covered by the GNU General Public License.
	18	#
	19	# Copyright (c) 2016-2018 Intra2net AG <info@intra2net.com>
	20
	21	"""
	22	Iterative reading of log files, similar to shell command `tail -f`.
	23
	24	Copyright: Intra2net AG
	25
	26	Basic Functionality (class :py:class:`IterativeReader`):
	27	Runs stat in a loop to find out whether file size has changed. Then reads the
	28	new data and forwards that
	29
	30	.. todo:: Want to also use lsof to find out whether file/pipe/socket was
	31	closed, so can automatically return from read loop.
	32
	33	:py:class:`LineReader` takes output of :py:class:`IterativeReader` and returns
	34	it line-wise as is normal for log files
	35
	36	:py:class:`LogParser` takes those lines and tries to parse them into fields
	37	like date, time, module name, urgency and message.
	38
	39	.. todo:: auto-detect log line layout
	40	"""
	41	import os
	42	import os.path
	43	import re
	44	from warnings import warn
	45	import logging
	46	from contextlib import contextmanager
	47	from .iter_helpers import zip_longest
	48	from .type_helpers import is_str_or_byte, is_file_obj
	49
	50
	51	class LogReadWarning(UserWarning):
	52	"""Warnings issued by classes in this module."""
	53	pass
	54
	55
	56	def true_func(_):
	57	"""Replacement for :py:func:`check_is_used`. Returns `True` always."""
	58	return True
	59
	60
	61	def false_func(_):
	62	"""Replacement for :py:func:`check_is_used`. Returns `False` always."""
	63	return False
	64
	65
	66	def check_is_used(file_handle):
	67	"""
	68	Check whether file is being written to.
	69
	70	To be implemented, e.g. using lsof.
	71
	72	If beneficial could also easily supply python file object as arg.
	73
	74	:param int file_handle: OS-level file descriptor
	75	"""
	76	raise NotImplementedError(file_handle)
	77
	78
	79	#: counter for unknown sources in :py:func:`create_description`
	80	_create_description_unknown_counter = 0
	81
	82
	83	def create_description(file_obj, file_handle):
	84	"""
	85	Create some description for given file-like object / file descriptor.
	86
	87	:param file_obj: file-like object
	88	:param int file_handle: os-level file descriptor
	89	:returns: Short description for file-like object
	90	:rtype: string
	91	"""
	92	global _create_description_unknown_counter
	93
	94	try:
	95	desc = file_obj.name
	96	if desc:
	97	return desc
	98	except AttributeError:
	99	pass
	100
	101	if file_handle is not None:
	102	return 'file{0}'.format(file_handle)
	103	else:
	104	_create_description_unknown_counter += 1
	105	return 'unknown{0}'.format(_create_description_unknown_counter)
	106
	107
	108	#: error message for IterativeReader constructor
	109	_STR_ERR = 'not accepting file name "{0}" since cannot guarantee closing ' \
	110	'files --> use with open(file_name)!'
	111
	112
	113	class IterativeReader(object):
	114	"""
	115	Read continuously from a given file.
	116
	117	Use `os.stat(file_obj.fileno()).st_size` as measure whether file has
	118	changed or not; Always reads as much data as possible.
	119
	120	Does not care about closing files, so does not accept file names.
	121
	122	This is the base for class :py:class:`LineReader` that just has to
	123	implement a different :py:meth:`prepare_result` method.
	124	"""
	125
	126	def __init__(self, sources, descs=None, keep_watching=False):
	127	"""
	128	Create a reader; do some basic checks on args.
	129
	130	:param sources: iterable over sources. Sources can be opened file
	131	objects or read-opened os-level file descriptors.
	132	Calling code has to ensure they are closed properly, so
	133	best use this within a "with open(file_name) as
	134	file_handle:"-context. If sources is a single file
	135	obj/descriptor, both source and desc will be converted
	136	to lists of length 1
	137	:param descs: can be anything of same length as sources. If sources is
	138	a single source, then descs is also converted to a list
	139	of length 1. If not given (i.e. None), will use
	140	:py:func:`create_description` to guess descriptions
	141	:param bool keep_watching: keep watching file that is not changing in
	142	size. Need to manually tell whether file
	143	is being written to or not since auto-detect
	144	is not implemented yet.
	145	:raises: OSError when testing fstat on source
	146	"""
	147	if not sources:
	148	raise ValueError('need at least some source!')
	149	elif is_str_or_byte(sources):
	150	raise ValueError(_STR_ERR.format(sources))
	151	elif is_file_obj(sources) or isinstance(sources, int):
	152	source_input = [sources, ]
	153	desc_input = [descs, ]
	154	else:
	155	source_input = sources # assume some iterable
	156	desc_input = descs
	157
	158	# now divide sources into os-level file descriptors for os.fstat,
	159	# and file objects for read()
	160	self.file_objs = []
	161	self.file_handles = [] # file descriptOR, not descriptION
	162	for source in source_input:
	163	if is_file_obj(source):
	164	self.file_objs.append(source)
	165	self.file_handles.append(source.fileno())
	166	elif isinstance(source, int):
	167	self.file_objs.append(os.fdopen(source))
	168	self.file_handles.append(source)
	169	elif is_str_or_byte(source):
	170	raise ValueError(_STR_ERR.format(source))
	171	else:
	172	raise ValueError('source {0} is neither file obj nor file '
	173	'descriptor!')
	174
	175	# try to fstat the new file descriptor just for testing
	176	os.fstat(self.file_handles[-1])
	177
	178	# guess descriptions if not given
	179	if not desc_input:
	180	self.descriptions = [create_description(obj, file_handle)
	181	for obj, file_handle
	182	in zip(self.file_objs, self.file_handles)]
	183	else:
	184	try:
	185	if len(desc_input) != len(self.file_objs):
	186	raise ValueError('need same number of sources and '
	187	'descriptions!')
	188	except TypeError:
	189	pass # desc_input is generator or so
	190
	191	self.descriptions = []
	192	for obj, file_handle, description in \
	193	zip_longest(self.file_objs, self.file_handles, desc_input):
	194	if obj is None:
	195	raise ValueError('more descriptions than sources!')
	196	elif description is None:
	197	self.descriptions.append(create_description(obj,
	198	file_handle))
	199	else:
	200	self.descriptions.append(description)
	201
	202	self.last_sizes = [0 for _ in self.file_objs]
	203	self.ignore = [False for _ in self.file_objs]
	204
	205	if keep_watching:
	206	self.is_used_func = true_func
	207	else:
	208	self.is_used_func = false_func
	209	# use some day: self.is_used_func = check_is_used
	210
	211	for obj, file_handle, description in \
	212	zip(self.file_objs, self.file_handles, self.descriptions):
	213	logging.debug('log_read initialized with file descriptor {0}, '
	214	'file obj {1}, description "{2}"'
	215	.format(file_handle, obj, description))
	216
	217	def n_sources(self):
	218	"""Return number of sources given to constructor."""
	219	return len(self.file_objs)
	220
	221	def n_active_sources(self):
	222	"""Return number of sources we are actually watching."""
	223	return len(self.ignore) - sum(self.ignore)
	224
	225	def __iter__(self):
	226	"""
	227	Continue reading from sources, yield results.
	228
	229	yields result of :py:meth:`prepare_result`, which depends on what
	230	subclass you called this function from.
	231	"""
	232	while True:
	233	if all(self.ignore):
	234	break
	235
	236	for idx, (obj, file_handle, description, last_size, do_ignore) in \
	237	enumerate(zip(self.file_objs, self.file_handles,
	238	self.descriptions, self.last_sizes,
	239	self.ignore)):
	240	if do_ignore:
	241	continue
	242
	243	# get new file size
	244	new_size = os.fstat(file_handle).st_size
	245
	246	# compare to old size
	247	if new_size == last_size:
	248	if not self.is_used_func(file_handle):
	249	self.ignore[idx] = True
	250	else:
	251	if new_size < last_size: # happened at start of some tests
	252	warn('{0} / {1} has become smaller ({2} --> {3})! '
	253	.format(obj, description, last_size, new_size)
	254	+ 'Maybe you are reading from a half-initialized '
	255	+ 'file?',
	256	category=LogReadWarning)
	257	try:
	258	new_data = obj.read()
	259	except OSError as ose: # includes IOErrors
	260	warn('io error reading from {0} / {1}: {2})'
	261	.format(obj, description, ose),
	262	category=LogReadWarning)
	263	new_data = str(ose)
	264	except UnicodeDecodeError as ude:
	265	warn('unicode error reading from {0} / {1}: {2}'
	266	.format(obj, description, ude),
	267	category=LogReadWarning)
	268	new_data = str(ude)
	269
	270	# post-processing
	271	for result in self.prepare_result(description, new_data, idx):
	272	yield result
	273
	274	# prepare next iteration
	275	self.last_sizes[idx] = new_size
	276
	277	def prepare_result(self, description, data, idx):
	278	"""
	279	From raw new data create some yield-able results.
	280
	281	Intended for overwriting in subclasses.
	282
	283	This function is called from `__iter__` for each new data that becomes
	284	available. It has to provide results which are forwarded to caller.
	285
	286	This base implementation just yields its input, so new data is yielded
	287	from `__iter__` as-is.
	288
	289	:param str description: Description of source of lines, one of
	290	:py:data:`self.descriptions`
	291	:param str data: Text data read from source
	292	:param idx: Index of data source
	293	:returns: nothing but yields [(description, data, idx)], same as input
	294	"""
	295	yield description, data, idx
	296
	297
	298	#: characters to `rstrip()` from end of complete lines
	299	LINE_SPLITTERS = '\n\r'
	300
	301
	302	class LineReader(IterativeReader):
	303	"""
	304	An :py:class:`IterativeReader` that returns new data line-wise.
	305
	306	This means buffering partial line data.
	307	"""
	308
	309	def __init__(self, args, *kwargs):
	310	"""Create an :py:class:`IterativeReader and buffers for sources."""
	311	super(LineReader, self).__init__(args, *kwargs)
	312	self.line_buffers = ['' for _ in range(self.n_sources())]
	313
	314	def prepare_result(self, description, new_data, idx):
	315	"""
	316	Take raw new data and split it into lines.
	317
	318	If line is not complete, then buffer it.
	319
	320	Args: see super class method :py:meth:`IterativeReader.prepare_result`
	321
	322	:returns: list of 3-tuples `(description, line, idx)` where
	323	`description` and `idx` are same as args, and `line` is
	324	without trailing newline characters
	325	:rtype: [(str, str, int)]
	326	"""
	327	all_data = self.line_buffers[idx] + new_data
	328	self.line_buffers[idx] = ''
	329	should_be_no_new_lines = False
	330	for line in all_data.splitlines(True):
	331	if line[-1] in LINE_SPLITTERS:
	332	yield description, line.rstrip(LINE_SPLITTERS), idx
	333	elif should_be_no_new_lines:
	334	# self-check
	335	raise ValueError('Programming error: something went wrong with '
	336	'line splitting/buffering.')
	337	else:
	338	self.line_buffers[idx] = line
	339	should_be_no_new_lines = True # (this should be the last)
	340
	341
	342	class LogParser(LineReader):
	343	"""
	344	Takes lines from :py:class:`LineReader` and parses their contents.
	345
	346	Requires a pattern for log lines, auto-detection is not implemented yet.
	347
	348	Iteration returns :py:class:`re.match` result or -- if matching failed --
	349	None. The latest unparsed line is available as `self.last_unparsed_line`.
	350	Usage recommendation::
	351
	352	with open(log_file_name, 'rt') as file_handle:
	353	parser = log_read.LogParser(file_handle, pattern=my_pattern):
	354	for _, data, _ in parser:
	355	if data is None:
	356	print(f'Failed to parse line {parser.last_unparsed_line}')
	357	continue
	358	line_parts = data.groupdict()
	359	...do stuff with line_parts...
	360	"""
	361
	362	def __init__(self, log_file, pattern=None):
	363	"""
	364	Create a LogParser.
	365
	366	:param log_file: source of log lines to parse
	367	:type log_file: see arg `sources` of constructor of :py:class:`IterativeReader`
	368	:param pattern: regexp to parse log lines; None (default) to return
	369	line as they are
	370	:type pattern: str or None (default)
	371	"""
	372	super(LogParser, self).__init__(log_file)
	373
	374	self.pattern = pattern
	375	self.last_unparsed_line = ''
	376
	377	def prepare_result(self, *args):
	378	"""
	379	Try parsing lines.
	380
	381	Args: see super class method :py:meth:`IterativeReader.prepare_result`
	382
	383	:returns: 3-tuples `(description, line, idx)` where `description` and
	384	`idx` are same as input args and `line` is either a
	385	:py:class:`re.Match` if line matched :py:data:`self.pattern`
	386	or just str if line did not match.
	387	:rtype: [(str, :py:class:`re.Match` OR str, int)]
	388	"""
	389	# let super class split data into lines
	390	for description, raw_line, idx in \
	391	super(LogParser, self).prepare_result(*args):
	392	matches = re.match(self.pattern, raw_line)
	393	if matches:
	394	yield description, matches, idx
	395	else:
	396	self.last_unparsed_line = raw_line
	397	yield description, None, idx
	398
	399	@classmethod
	400	@contextmanager
	401	def create_for(cls, filename, args, *kwargs):
	402	"""
	403	Open single file, yield LogParser. Ensures file is closed afterwards.
	404
	405	This allows opening file and creation LogParser for it to one line::
	406
	407	with LogParser.create_for('/var/log/messages', SYS_LOG_PATTERN) as parser:
	408	for _, matches, _ in parser:
	409	try:
	410	print(matches.groupdict())
	411	except Exception:
	412	print(f'UNPARSED: {parser.last_unparsed_line}')
	413
	414	:param str filename: something that :py:meth:`open` accepts
	415	:param args: Forwarded to constructor
	416	:param kwargs: Forwarded to constructor
	417	"""
	418	with open(filename) as file_handle:
	419	yield cls(file_handle, args, *kwargs)
	420
	421
	422	################################################################################
	423	# PATTERNS FOR FREQUENT LOG FILES
	424	################################################################################
	425
	426	# pattern of squid proxy logs. group names are best guesses
	427	PROXY_LOG_PATTERN = \
	428	r'\s*(?P<timestamp>\d+\.\d+\.\d+\s+\d+:\d+:\d+\|\d+\.\d+)\s+(?P<size1>\d+)\s+' \
	429	+ r'(?P<ip>\d+\.\d+\.\d+\.\d+)\s+(?P<status_text>[A-Z_]+)/(?P<status_code>\d+)\s+' \
	430	+ r'(?P<size2>\d+)\s+(?P<command>\S+)\s+(?P<url>\S+)\s+(?P<user>\S+)\s+' \
	431	+ r'(?P<action>[A-Z_]+)/(?P<origin>\S+)\s+(?P<mimetype>\S+)\s+(?P<unknown>.)\s'
	432
	433	# pattern for linux system logs (usually "messages" or "syslog" also "maillog"
	434	SYS_LOG_PATTERN = \
	435	r'\s*(?P<timestamp>\w{3} +\d{1,2} \d{2}:\d{2}:\d{2}) (?P<hostname>\S+) ' \
	436	+ r'(?P<procname>[^\[\]:]+)(?:\[(?P<pid>\d+)\])?: (?P<message>.*)'