From: Christian Herdtweck Date: Fri, 16 Jan 2015 17:01:13 +0000 (+0100) Subject: use logging for all output, improve understanding of connd, improved process terminat... X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=3daedf04d4b0aaefb2fc73134aedd4cf19f0998e;p=pingcheck use logging for all output, improve understanding of connd, improved process termination, simplify code --- diff --git a/test/connd_state.py b/test/connd_state.py new file mode 100755 index 0000000..0d0b903 --- /dev/null +++ b/test/connd_state.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +""" Representation for connd state as returned by tell-connd --status + +Christian Herdtweck, Intra2net, January 2015 +(c) Intra2net AG 2015 +""" + +# Version History +# 16/01/15 Christian Herdtweck: started creation + +import subprocess +from re import match as regexp +from os import EX_OK + +tell_connd_binary = '/usr/intranator/bin/tell-connd' +timeout = 1 + +class ConndState: + """ representation of connd's status as returned by tell-connd --status """ + + online_mode = None + default_provider = None + subsys_online = None + subsys_offline = None + subsys_disabled = None + connections = None + actions = None + online_ips = None + connected_vpns = None + + def __str__(self): + return '[ConndState: {0} (default {1}), {2} conn\'s, {3} ips, {4} vpns ]'.format(\ + self.online_mode, self.default_provider, len(self.connections), \ + len(self.online_ips), len(self.connected_vpns)) + + + @staticmethod + def run_tell_connd(): + """ run tell-connd --status, return output iterator """ + try: + output = subprocess.check_output([tell_connd_binary, '--status'], \ + stderr=subprocess.STDOUT, universal_newlines=True, shell=False, timeout=timeout) + return output.splitlines(), EX_OK + except subprocess.CalledProcessError as cpe: # non-zero return status + warn('tell-connd exited with status {0}'.format(cpe.returncode)) + return cpe.output.splitlines(), cpe.returncode + except subprocess.TimeoutExpired as te: + warn('tell-connd timed out after {0}s. Returning -1'.format(te.timeout)) + return te.output.splitlines(), -1 + #end: ConndState.run_tell_connd + + + @staticmethod + def get_state(): + + state = ConndState() + + all_lines, err_code = ConndState.run_tell_connd() + output = iter(all_lines) + + # first section + line = next(output).strip() + state.online_mode = regexp('online mode\s*:\s*(.+)$', line).groups()[0] + line = next(output).strip() + state.default_provider = regexp('default provider\s*:\s*(.*)$', \ + line).groups()[0] + if len(state.default_provider) == 0: + state.default_provider = None + line = next(output).strip() + assert( len(line) == 0 ) + + # subsys + line = next(output).strip() + assert( line == 'subsys' ) + line = next(output).strip() + state.subsys_online = regexp( 'online\s*:\s*(.*)$', line).groups()[0].split() + line = next(output).strip() + state.subsys_offline = regexp( 'offline\s*:\s*(.*)$', line).groups()[0].split() + line = next(output).strip() + state.subsys_disabled = regexp('disabled\s*:\s*(.*)$', line).groups()[0].split() + line = next(output).strip() + assert( len(line) == 0 ) + + # connection map + state.connections = [] + line = next(output).strip() + assert( line == 'connection map:' ) + expect_new = True + for line in output: + line = line.strip() + if len(line) == 0: + continue + if expect_new: + if line == 'end of connection map': + break + conn_name, conn_info = regexp('\[\s*(.+)\s*\]\s*:\s*\(\s*(.*)\s*\)', line).groups() + expect_new = False + else: + conn_actions = regexp('actions\s*:\s*\[\s*(.+)\s*\]', line) + state.connections.append( (conn_name, conn_info, conn_actions) ) + expect_new = True + #end: for lines + assert( expect_new ) + line = next(output).strip() + assert( len(line) == 0 ) + + # actions + line = next(output).strip() + state.actions = regexp('actions\s*:\s*(.*)', line).groups()[0].split() + if len(state.actions) == 1 and state.actions[0].strip() == '-': + state.actions = [] + line = next(output).strip() + assert( len(line) == 0 ) + + # online IPs + line = next(output).strip() + state.online_ips = regexp('list of online ips\s*:\s*(.*)', \ + line).groups()[0].split() + if len(state.online_ips) == 1 and state.online_ips[0].strip() == 'NONE': + state.online_ips = [] + line = next(output).strip() + assert( len(line) == 0 ) + + # VPNs + state.connected_vpns = [] + line = next(output).strip() + assert( line == 'vpns connected:' ) + for line in output: + line = line.strip() + if len(line) == 0: + continue + elif line == 'end of list of connected vpns': + break + else: + state.connected_vpns.append(line) + #end: for lines + + # done + line = next(output).strip() + assert( len(line) == 0 ) + line = next(output).strip() + assert( line == 'Done.' ) + + return state + #end: ConndState.get_state + +#end: class ConndState + + +def test(): + state = ConndState.get_state() + print(state) + print(state.subsys_online) + print(state.subsys_offline) + print(state.subsys_disabled) + +def main(): + """ Main function, called when running file as script; runs test() + """ + test() +#end: function main + + +if __name__ == '__main__': + main() + +# (created using vim -- the world's best text editor) + diff --git a/test/long_term_test.py b/test/long_term_test.py index 06415fe..cac9a44 100755 --- a/test/long_term_test.py +++ b/test/long_term_test.py @@ -15,24 +15,30 @@ Christian Herdtweck, Intra2net, January 2015 # 15/01/15 Christian Herdtweck: started creation import subprocess -from traceback import print_exc +import sys import select from re import match as regexp from time import sleep from datetime import timedelta, datetime as dt from warnings import warn from random import randint +import logging +from os.path import expanduser, dirname, isdir +from os import mkdir, EX_OK + +logger_base = logging.getLogger('long_term_test') ######################################################################## ### CONSTANTS ######################################################################## -version = '1.0' +version = '1.1' #default_pingcheck_bin = '/usr/intranator/bin/pingcheck' default_pingcheck_bin = '/root/git/pingcheck/build/src/pingcheck' default_tell_connd_bin = '/usr/intranator/bin/tell-connd' -default_config_file = 'test.conf' +default_output_prefix = './long_term_test/{0:%d%m%y_%H%M}' +# for parsing log messages LEVEL_DEBUG = 'dbug' LEVEL_INFO = 'info' LEVEL_WARN = 'warn' @@ -46,36 +52,86 @@ text_to_level['WARN'] = LEVEL_WARN text_to_level['NOTE'] = LEVEL_NOTICE text_to_level['ERR'] = LEVEL_ERROR +# duration in which to remain online/offline MIN_DURATION_SAME_STATE = 10 # [seconds] MAX_DURATION_SAME_STATE = 50 # [seconds] ######################################################################## -### TESTER +#### for config file ######################################################################## -class ParserError(Exception): - def __init__(self, line): - super(ParserError, self).__init__(\ - 'ParserError: line "{0}" did not match!'.format(line)) - -class ParserError_NotUnique(Exception): - def __init__(self, line): - super(ParserError_NotUnique, self).__init__(\ - 'ParserError: no unique match for line "{0}"!'.format(line)) - -class PingProc(subprocess.Popen): - """ wrapper around pingcheck binary """ +PING_INTERVAL = 3 # [seconds] +config_file_pattern = 'long_term_test_{0:%d%m%y_%H%M%S}.conf' +conf_start = """# config file for pingcheck testing - def __init__(self, binary, conf_file): - super(PingProc, self).__init__(\ - [binary, '--log-level', 'INFO', '--log-output', 'TERMINAL', '--config-file', conf_file], \ - stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - print('started pingcheck with PID {0}'.format(self.pid)) +default-source-network-interface=eth1 +nameserver=127.0.0.1 +hosts-down-limit={hosts_down_limit:d} +ping-fail-limit=25 +status-notifier-cmd=echo host status is ${status} +link-up-interval=2 +""" +conf_host = """ +[host] +name={0} +interval={1:d} +ping-protocol=ICMPv4 +port=80 +source-network-interface=default +""" +host_list = (\ + 'intranator.m.i2n', \ + 'intraroute.m.i2n', \ + 'websync.m.i2n', \ + 'aqua.m.i2n', \ + 'intratdsl.m.i2n', \ + 'beholder.m.i2n', \ + 'eagle.m.i2n', \ + 'osdist.m.i2n', \ + 'tokyo.m.i2n', \ + 'blizzard.m.i2n', \ + 'phonehelp.m.i2n', \ + 'virt-dmz.m.i2n', \ + 'virt01.m.i2n', \ + 'virt02.m.i2n', \ + 'tornado.n.i2n', \ + 'auth.m.i2n', \ + 'forum.intra2net.com', \ + 'update.intranator.com', \ + 'demo-intranator.intra2net.com', \ + 'intra2net.com', \ + 'partner.intra2net.com', \ + 'demo-securitygateway.intra2net.com', \ + 'demo-networksecurity.intra2net.com', \ + ) + +def create_conf_file(file_prefix): + """ create config file using constants above """ + + file_name = file_prefix + '_config.conf' + + n_hosts = len(host_list) + + with open(file_name, 'wt') as file_handle: + file_handle.write(conf_start.format(hosts_down_limit=int(n_hosts * 0.7), status='{status}')) + for host in host_list: + file_handle.write(conf_host.format(host, PING_INTERVAL)) + + return file_name +#end: create_conf_file -#end: PingProc +######################################################################## +### PARSER +######################################################################## class ProcOutputParser: + """ parser for output from pingcheck process via pipes + + uses select and polling + should never silently swallow log output, main function parse should yield all lines + or emit errors + """ _out = None _err = None @@ -118,29 +174,17 @@ class ProcOutputParser: if not line: continue + # if reach here, we have a new line of text - yield src, dt.now(), line.strip() + yield src, line.strip() #end: ProcOutputParser._read - def _parse_line(self, line): - match = regexp(self._regexp, line) - if match is None: - raise ParserError(line) - text_time, text_level, message = match.groups() - try: - return text_time, text_to_level[text_level], message - except KeyError: - warn('unknown level: {0}'.format(text_level)) - return text_time, text_level, message - def parse(self): - for src, timestamp, line in self._read(): - if src == self.SRC_ERR: - yield src, timestamp, self._parse_line(line) - elif src == self.SRC_OUT and regexp("RECV BUF \(\s*\d+\):[\d \|]+", line): - pass - else: - warn('Unexpected line and/or source: {0}, "{1}"'.format(src, line)) + for src, line in self._read(): + try: + yield src, regexp(self._regexp, line).groups() + except: + yield src, line def __iter__(self): return self.parse() @@ -164,6 +208,10 @@ class ProcOutputParser: #end: class ProcOutputParser +######################################################################## +### TESTER +######################################################################## + class Tester: """ """ @@ -176,8 +224,11 @@ class Tester: _tell_connd_bin = None _prev_state_changes = None _fails = None + _log = None + _log_ping = None + max_fails = None - def __init__(self, line_iter, tell_connd_bin): + def __init__(self, line_iter, tell_connd_bin, max_fails): self._line_iter = line_iter self._last_request = {} self._last_reply = {} @@ -186,32 +237,60 @@ class Tester: self._tell_connd_bin = tell_connd_bin self._prev_state_changes = [] self._fails = [] + self.max_fails = max_fails + + self._log = logger_base.getChild('Tester') + self._log_ping = {} + self._log_ping[ProcOutputParser.SRC_OUT] = logger_base.getChild('pingOut') + self._log_ping[ProcOutputParser.SRC_ERR] = logger_base.getChild('pingErr') def test(self): - self._handle_change_state() + # preparations + current_state = self._is_online() + while current_state is None: + sleep(1) + current_state = self._is_online() + self._log.info('Start with state online = {0}'.format(current_state)) + + # start looping over output from observed process + for src, text_info in self._line_iter: + if isinstance(text_info, str): + # ProcOutputParser could not parse + msg = 'unparsed: {0}'.format(text_info) + if text_info.startswith("RECV BUF ("): + level = LEVEL_DEBUG + else: + level = LEVEL_WARN + else: + text_time, level, message = text_info + # first of all: write to text log so nothing is lost + msg = '{0} {1} (orig time {2})'.format(level, message, text_time) - for src, timestamp, (text_time, level, message) in self._line_iter: - # always print warnings and errors if level in (LEVEL_WARN, LEVEL_ERROR): + # most common warning/error is only logged at debug level if message.endswith('could not be resolved.'): + self._log_ping[src].debug(msg) continue if message.endswith('fail sending ping data. Network is unreachable'): + self._log_ping[src].debug(msg) continue - else: - self._print('{0}: {1}'.format(level, message)) + else: # unexpected errors and warnings from pingcheck are logged at info level + self._log_ping[src].info(msg) continue - if self._handle_new_ip(message): - continue + else: # normal messages are logged at debug level + self._log_ping[src].debug(msg) + + # now deal with message contents if self._handle_request(message): continue if self._handle_reply(message): continue if self._handle_change_state(): continue - - #self._print('ignoring message "{0}"'.format(message)) - #end: for all lines from log + if self._handle_new_ip(message): + continue + #end: for all lines from pingcheck output #end: Tester.test @@ -221,7 +300,7 @@ class Tester: matches = regexp('Host is already an IP: (\d+\.\d+\.\d+\.\d+)', message) if matches: ip = matches.groups()[0] - self._print('remember new IP {0}'.format(ip)) + self._log.info('remember new IP {0}'.format(ip)) self._last_request[ip] = dt.now() self._last_reply[ip] = dt.now() self._n_requests[ip] = 0 @@ -229,45 +308,31 @@ class Tester: return True return False + def _handle_request(self, message): matches = regexp('(\d+\.\d+\.\d+\.\d+): sending ping', message) if matches is not None: ip = matches.groups()[0] - self._print('pinging IP {0}'.format(ip)) + self._log.info('pinging IP {0}'.format(ip)) self._last_request[ip] = dt.now() self._n_requests[ip] += 1 return True return False + def _handle_reply(self, message): matches = regexp('(\d+\.\d+\.\d+\.\d+): Received reply', message) if matches is not None: ip = matches.groups()[0] - self._print('received reply from {0}'.format(ip)) + self._log.info('received reply from {0}'.format(ip)) self._last_reply[ip] = dt.now() self._n_replies[ip] += 1 return True return False - def _handle_change_state(self): - if self._next_state_change is None: - # called at start of test() - n_secs = randint(MIN_DURATION_SAME_STATE, MAX_DURATION_SAME_STATE) - self._next_state_change = dt.now() + timedelta(seconds=n_secs) - current_state = self._is_online() - if current_state is not None: - self._print('Start with state online = {0}'.format(current_state)) - self._prev_state_changes.append((current_state, dt.now())) - self._print('schedule next state change for {0:%H:%M:%S} (in {1}s)'.format(\ - self._next_state_change, n_secs)) - - elif dt.now() > self._next_state_change: - - # check if there were too many fails; if so, stop - if len(self._fails) > 100: - self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!")) - raise ValueError("Too many failures/warnings!") + def _handle_change_state(self): + if dt.now() > self._next_state_change: is_online = self._is_online() if is_online is None: return False # try again later @@ -277,16 +342,22 @@ class Tester: else: new_val = True if self._set_online(new_val): - self._prev_state_changes.append((new_val, dt.now())) - self._next_state_change = dt.now() + timedelta(\ - seconds=randint(MIN_DURATION_SAME_STATE, MAX_DURATION_SAME_STATE)) - self._print('schedule next state change for {0:%H:%M:%S}'.format(self._next_state_change)) + self._schedule_next_state_change(is_online) return True # if call to tell_connd fails, try again later return False #end: _handle_change_state + def _schedule_next_state_change(self, current_state): + self._prev_state_changes.append((current_state, dt.now())) + n_secs = randint(MIN_DURATION_SAME_STATE, MAX_DURATION_SAME_STATE) + self._next_state_change = dt.now() + timedelta(seconds=n_secs) + self._log.info('schedule next state change for {0:%H:%M:%S} (in {1:d}s)'.format(\ + self._next_state_change, n_secs)) + #end: Tester._schedule_next_state_change + + def _is_online(self): """ determine state from tell-connd --status @@ -297,18 +368,14 @@ class Tester: output = subprocess.check_output([self._tell_connd_bin, '--status'], \ universal_newlines=True) except subprocess.CalledProcessError as cpe: # non-zero exit status - text = 'Non-zero exit status asking tell-connd for its --status: {0}'.format(cpe.returncode) - warn(text) - self._fails.append(("WARN", dt.now(), text)) + self._warn('Non-zero exit status asking tell-connd for its --status: {0}'.format(cpe.returncode)) return None except subprocess.TimeoutExpired as te: - text = 'Timeout expired asking tell-connd for its --status. Output was {0}'.format(te.output) - warn(text) - self._fails.append(("WARN", dt.now(), text)) + self._warn('Timeout expired asking tell-connd for its --status. Output was {0}'.format(te.output)) return None except: - self._print('Trouble asking tell-connd for its --status:') - print_exc() + self._info('Trouble asking tell-connd for its --status:') + self._warn(sys.exc_info()) return None line_iter = output.splitlines() @@ -318,23 +385,19 @@ class Tester: if matches: matches = matches.groups() if len(matches) != 1: - raise ValueError("Could not interpret online mode from \"{0}\"".format(line)) + self._warn('Could not interpret online mode from \"{0}\"'.format(line)) else: online_mode = matches[0] break if online_mode is None: - text = 'Could not determine online mode of connd! Is connd running?!' - warn(text) - self._fails.append(("WARN", dt.now(), text)) + self._warn('Could not determine online mode of connd! Is connd running?!') return None elif online_mode in ('always online', ): return True elif online_mode in ('offline', 'always offline', ): return False else: - text = 'Unknown online mode: {0}'.format(online_mode) - warn(text) - self._fails.append(("WARN", dt.now(), text)) + self._warn('Unknown online mode: {0}'.format(online_mode)) return None #end: Tester._is_online @@ -343,26 +406,24 @@ class Tester: def _set_online(self, new_val): """ run tell-connd --online/offline P1 """ if new_val: - self._print('going online') + self._log.info('going online') args = [self._tell_connd_bin, '--online', 'P1'] else: - self._print('going offline') + self._log.info('going offline') args = [self._tell_connd_bin, '--offline'] try: return_code = subprocess.call(args, stderr=subprocess.STDOUT, timeout=1) if return_code == 0: return True else: - self._print('tell-connd returned {0} --> online state probably unchanged!'.format(return_code)) + self._log.info('tell-connd returned {0} --> online state probably unchanged!'.format(return_code)) return False except subprocess.TimeoutExpired as te: - text = 'Timeout expired running {0}. Output was {1}'.format(args, te.output) - warn(text) - self._fails.append(("WARN", dt.now(), text)) + self._warn('Timeout expired running {0}. Output was {1}'.format(args, te.output)) return False except: - self._print('Trouble calling tell-connd:') - print_exc() + self._log.info('Trouble calling tell-connd:') + self._warn(sys.exc_info()) return False #end: Tester._set_online @@ -381,37 +442,61 @@ class Tester: # never been online --> nothing to check yet return - self._print('checking whether all hosts were pinged between {0:%H:%M:%S} and now ({1:%H:%M:%S})'.format(\ + self._log.info('checking whether all hosts were pinged between {0:%H:%M:%S} and now ({1:%H:%M:%S})'.format(\ time_went_online, time_go_offline)) for ip, reply_time in self._last_reply.items(): if reply_time < time_went_online: - text = 'Did not ping host {0} between {1:%H:%M:%S} and now ({2:%H:%M:%S})!'.format(\ - ip, time_went_online, time_go_offline) - warn(text) + self._log.warn('Did not ping host {0} between {1:%H:%M:%S} and now ({2:%H:%M:%S})!'.format(\ + ip, time_went_online, time_go_offline)) self._fails.append((ip, time_went_online, time_go_offline)) - + self._log.info('remembered as failure number {0} / {1}'.format(\ + len(self._fails), self.max_fails)) + + # check if there were too many fails; if so, stop + if len(self._fails) > self.max_fails: + self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!")) + raise ValueError("Too many failures/warnings!") + #end: if not pinged recently + #end: for all known ips #end: Tester._ensure_all_pinged - def _print(self, message): - now = dt.now() - if self._next_state_change is not None: - print('{0:%H:%M:%S};next{1:+5.1f}s;{2} fails: {3}'.format(now, \ - (now - self._next_state_change).total_seconds(), len(self._fails), message)) + def _warn(self, message): + + if isinstance(message, str): + self._log.warning(message) else: - print('{0:%H:%M:%S};next-?????;{1} fails: {2}'.format(now, len(self._fails), message)) + self._log.formatException(sys.exc_info()) + self._fails.append(("WARN", dt.now(), message)) + self._log.info('remembered as failure number {0} / {1}'.format(\ + len(self._fails), self.max_fails)) + # check if there were too many fails; if so, stop + if len(self._fails) > self.max_fails: + self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!")) + raise ValueError("Too many failures/warnings!") + #end: Tester._warn - def print_failures(self): + + #def _print(self, message): + # now = dt.now() + # if self._next_state_change is not None: + # print('{0:%H:%M:%S};next{1:+5.1f}s;{2} fails: {3}'.format(now, \ + # (now - self._next_state_change).total_seconds(), len(self._fails), message)) + # else: + # print('{0:%H:%M:%S};next-?????;{1} fails: {2}'.format(now, len(self._fails), message)) + + + def print_failures(self, log): if len(self._fails) == 0: - print('No fails! :-)') + log.info('No fails! :-)') else: - print('Recorded the following failures and warnings:') + log.info('Recorded the following failures and warnings:') for ip, time_went_online, time_go_offline in self._fails: if ip == 'WARN': - print('Warning at {0:%H:%M:%S}: {1}'.format(time_went_online, time_go_offline)) + log.info('Warning at {0:%H:%M:%S}: {1}'.format(time_went_online, time_go_offline)) else: - print('Failed to ping {0} between {1:%H:%M:%S} and {2:%H:%M:%S} ({3}s)'.format(\ + log.info('Failed to ping {0} between {1:%H:%M:%S} and {2:%H:%M:%S} ({3}s)'.format(\ ip, time_went_online, time_go_offline, \ (time_go_offline-time_went_online).total_seconds())) #end: class Tester @@ -435,67 +520,169 @@ def parse_args(): default=default_tell_connd_bin, \ help='path to tell-connd binary') - parser.add_argument('--config_file', '-c', metavar='PATH', \ - default=default_config_file, \ - help='path to config file for pingcheck') - - # default + # output + parser.add_argument('--output_prefix', '-o', metavar='PATH', \ + default=default_output_prefix, \ + help='path and/or file prefix to config and log files; ~=home, 0=prog start datetime; ' \ + + 'default="%(default)s"') + parser.add_argument('--log_to_console', '-c', metavar='0|1', \ + choices=(0, 1), type=int, default=1, \ + help='print all output to console? default=%(default)s') + parser.add_argument('--log_level_console', metavar='LEVEL', \ + type=int, default=logging.INFO, \ + help='log level for console output; debug={0}, info={1}, warn={2}; default: %(default)s'.format(\ + logging.DEBUG, logging.INFO, logging.WARN)) + parser.add_argument('--log_to_file', '-f', metavar='0|1', \ + choices=(0, 1), type=int, default=1, \ + help='print all output to log file? default=%(default)s') + parser.add_argument('--log_level_file', metavar='LEVEL', \ + type=int, default=logging.DEBUG, \ + help='log level for console output; debug={0}, info={1}, warn={2}; default: %(default)s'.format(\ + logging.DEBUG, logging.INFO, logging.WARN)) + parser.add_argument('--log_file_suffix', '-l', metavar='SUFF.IX', \ + type=str, default='.log', \ + help='2nd part of log file name after --output_prefix; default="%(default)s"') + + parser.add_argument('--max_fails', '-m', metavar='N', type=int, default=100, \ + help='max number of failures/warnings before test will end; default: %(default)s') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(version)) - return parser.parse_args() + + # parse + opts = parser.parse_args() + opts.output_prefix = expanduser(opts.output_prefix).format(dt.now()) + return opts #end: parse_args +def setup_logging(opts): + """ create main logger and handlers for base logger """ + + # create logger for main + log = logger_base.getChild('main') + + # remove all existing handlers + for old_handler in logger_base.handlers: + print('removing handler from base') + logger_base.removeHandler(old_handler) + for old_handler in log.handlers: + print('removing handler from log') + log.removeHandler(old_handler) + + logger_base.setLevel(logging.DEBUG) + + # create handlers and format for base logger + log_file = None + if opts.log_to_console: + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(opts.log_level_console) + formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s', \ + datefmt='%d.%m,%H:%M:%S') + handler.setFormatter(formatter) + logger_base.addHandler(handler) + + if opts.log_to_file: + log_file = opts.output_prefix + opts.log_file_suffix + handler = logging.FileHandler(log_file) + handler.setLevel(opts.log_level_file) + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', \ + datefmt='%d.%m,%H:%M:%S') + handler.setFormatter(formatter) + logger_base.addHandler(handler) + + # log info + log.info('Log for {0} in {1} created on {2}'.format(__name__, __file__, dt.now())) + if opts.log_to_console: + log.info('logging to console at level {0}'.format(opts.log_level_console)) + else: + log.info('not logging to console') + if opts.log_to_file: + log.info('logging to file {1} at level {0}'.format(opts.log_level_file, log_file)) + else: + log.info('not logging to file') + + # return log_file only if makes sense to talk about it in main + if opts.log_to_console: + return log, log_file + else: + return log, None +#end: setup_logging + + def main(): - """ Main function, called when running file as script; currently raises a NotImplementedError + """ parses args, create a Pinger and Parser and Tester and runs them; cleans up """ opts = parse_args() + base_dir = dirname(opts.output_prefix + "_test") + if not isdir(base_dir): + mkdir(base_dir) + + log, log_file = setup_logging(opts) + pinger = None return_code = None tester = None try: - pinger = PingProc(opts.pingcheck_binary, opts.config_file) + config_file = create_conf_file(opts.output_prefix) + log.info('created config file {0}'.format(config_file)) + + # pingcheck subprocess + pinger = subprocess.Popen([opts.pingcheck_binary, '--log-level', 'DEBUG', \ + '--log-output', 'TERMINAL', '--config-file', config_file], \ + stdout=subprocess.PIPE, stderr=subprocess.PIPE, \ + universal_newlines=True) + log.info('started pingcheck with PID {0}'.format(pinger.pid)) + parser = ProcOutputParser(pinger.stdout, pinger.stderr) line_iter = iter(parser) - tester = Tester(line_iter, opts.tell_connd_binary) + tester = Tester(line_iter, opts.tell_connd_binary, opts.max_fails) tester.test() except KeyboardInterrupt: - print('stopped by user') + log.info('stopped by user') except: - if tester is not None: - tester.print_failures() raise finally: if pinger is None: - print('failed to create pinger') + log.warn('Error before pinger was created') return_code = 254 else: + log.info("Testing ended") + if tester is not None: + tester.print_failures(log) + try: - print('telling pinger to terminate') + log.info('telling pinger to terminate') pinger.terminate() - return_code = pinger.wait(3) - print('terminated with return code {0}'.format(return_code)) + #return_code = pinger.wait(3) may block! + outs, errs = pinger.communicate(timeout=3) + return_code = pinger.returncode # may still be None + log.info('terminated with return code {0}'.format(return_code)) except subprocess.TimeoutExpired as te: - print('Timeout expired waiting for pinger to terminate: {0}'.format(te)) + log.warn('Timeout expired waiting for pinger to terminate: {0}'.format(te)) return_code = 255 # a.k.a. -1 except: - print('trouble terminating pinger:') - print_exc() + log.warn('trouble terminating pinger:') + log.formatException(sys.exc_info()) if return_code is None: # kill it try: + log.info('killing pinger the hard way') pinger.kill() - outs, errs = pinger.communicate() + outs, errs = pinger.communicate(timeout=3) except: - print('trouble killing pinger:') - print_exc() + log.warn('trouble killing pinger:') + log.formatException(sys.exc_info()) #end: if have pinger #end: global clean-up if tester is not None: - tester.print_failures() + tester.print_failures(log) + + log.info('Will stop now') + if log_file is not None: + log.info('might find more log output in {0}'.format(log_file)) return return_code @@ -504,6 +691,7 @@ def main(): if __name__ == '__main__': main() + logging.shutdown() # (created using vim -- the world's best text editor)