From: Christian Herdtweck Date: Wed, 18 May 2022 11:48:50 +0000 (+0200) Subject: Return matches or None from LogParser iteration X-Git-Tag: v1.7.1~2^2~2 X-Git-Url: http://developer.intra2net.com/git/?p=pyi2ncommon;a=commitdiff_plain;h=443534547fe50cab598d6850b2e879a0e9a3ab7b Return matches or None from LogParser iteration Yielding match-object or string is bad style. Iteration over LogParser now yields either match-object or None. If None is returned (i.e. parsing line failed), then the problematic input is saved in LogParser object's attribute last_unparsed_line (until next matching fails). To make this work we need to run prepare_result on every single line, so prepare_result cannot return multiple lines at once. Therefore, made prepare_result also a generator (which makes more sense, anyway). --- diff --git a/src/log_read.py b/src/log_read.py index 5ece416..e50e046 100644 --- a/src/log_read.py +++ b/src/log_read.py @@ -238,8 +238,8 @@ class IterativeReader(object): """ Continue reading from sources, yield results. - yields result of :py:meth:`prepare_result`, which depends on what sub - class you called this function from. + yields result of :py:meth:`prepare_result`, which depends on what + subclass you called this function from. """ while True: if all(self.ignore): @@ -280,8 +280,7 @@ class IterativeReader(object): new_data = str(ude) # post-processing - to_yield = self.prepare_result(description, new_data, idx) - for result in to_yield: + for result in self.prepare_result(description, new_data, idx): yield result # prepare next iteration @@ -294,21 +293,18 @@ class IterativeReader(object): Intended for overwriting in subclasses. This function is called from __iter__ for each new data that becomes - available. It has to return some iterable whose entries are yielded - from iteration over objects of this class. + available. It has to provide results which are forwarded to caller. - This base implementation just returns its input in a list, so new data - is yielded from __iter__ as-is. Subclass implementations can also yield - tuples. + This base implementation just yields its input, so new data is yielded + from `__iter__` as-is. :param str description: Description of source of lines, one of :py:data:`self.descriptions` :param str data: Text data read from source :param idx: Index of data source - :returns: [(description, data, idx], same as input - :rtype [(str, str, int)] + :returns: nothing but yields [(description, data, idx], same as input """ - return [(description, data, idx), ] + yield description, data, idx #: characters to `rstrip()` from end of complete lines @@ -341,11 +337,10 @@ class LineReader(IterativeReader): """ all_data = self.line_buffers[idx] + new_data self.line_buffers[idx] = '' - result = [] should_be_no_new_lines = False for line in all_data.splitlines(True): if line[-1] in LINE_SPLITTERS: - result.append((description, line.rstrip(LINE_SPLITTERS), idx)) + yield description, line.rstrip(LINE_SPLITTERS), idx elif should_be_no_new_lines: # self-check raise ValueError('Programming error: something went wrong with ' @@ -354,8 +349,6 @@ class LineReader(IterativeReader): self.line_buffers[idx] = line should_be_no_new_lines = True # (this should be the last) - return result - class LogParser(LineReader): """ @@ -364,15 +357,16 @@ class LogParser(LineReader): Requires a pattern for log lines, auto-detection is not implemented yet. Iteration returns :py:class:`re.match` result or -- if matching failed -- - the original raw line. Usage recommendation: + None. The latest unparsed line is available as `self.last_unparsed_line`. + Usage recommendation: with open(log_file_name, 'rt') as file_handle: - for _, data, _ in log_read.LogParser(file_handle, pattern=my_pattern): - try: - line_parts = data.groupdict() - except AttributeError: # no groupdict --> could not parse - print(f'Failed to parse line {data}') + parser = log_read.LogParser(file_handle, pattern=my_pattern): + for _, data, _ in parser: + if data is None: + print(f'Failed to parse line {parser.last_unparsed_line}') continue + line_parts = data.groupdict() ...do stuff with line_parts... """ @@ -388,6 +382,7 @@ class LogParser(LineReader): super(LogParser, self).__init__(log_file) self.pattern = pattern + self.last_unparsed_line = '' def prepare_result(self, *args): """ @@ -401,12 +396,11 @@ class LogParser(LineReader): :rtype: [(str, :py:class:`re.Match` OR str, int)] """ # let super class split data into lines - result = [] for description, raw_line, idx in \ super(LogParser, self).prepare_result(*args): matches = re.match(self.pattern, raw_line) if matches: - result.append((description, matches, idx)) + yield description, matches, idx else: - result.append((description, raw_line, idx)) - return result + self.last_unparsed_line = raw_line + yield description, None, idx