From 0feda3c561f0dacf743c86ed9f5df22788dde2eb Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Wed, 22 Jul 2015 10:02:38 +0200 Subject: [PATCH] moved tests --- test/connd_state.py | 324 ----------------- test/long_term_test.py | 914 ------------------------------------------------ test/strftimedelta.py | 103 ------ 3 files changed, 0 insertions(+), 1341 deletions(-) delete mode 100755 test/connd_state.py delete mode 100755 test/long_term_test.py delete mode 100644 test/strftimedelta.py diff --git a/test/connd_state.py b/test/connd_state.py deleted file mode 100755 index 51d7034..0000000 --- a/test/connd_state.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python - -""" Representation for connd state as returned by tell-connd --status - -Christian Herdtweck, Intra2net, January 2015 -(c) Intra2net AG 2015 -""" - -from __future__ import print_function -import subprocess -from re import match as regexp -from os import EX_OK - -# constants -DEFAULT_TELL_CONND_BINARY = '/usr/intranator/bin/tell-connd' -TIMEOUT = 1 - -ONLINE_STATE_ALWAYS_ONLINE = 'always online' -ONLINE_STATE_ALWAYS_OFFLINE = 'always offline' -ONLINE_STATE_DIAL_ON_COMMAND = 'dial on command' -ONLINE_STATE_DIAL_ON_DEMAND = 'dial on demand' - -SUBSYS_DNS = 'dns' -SUBSYS_DYNDNS = 'dyndns' -SUBSYS_MAIL = 'mail' -SUBSYS_NTP = 'ntp' -SUBSYS_SOCKS = 'socks' -SUBSYS_VPN = 'vpn' -SUBSYS_WEBPROXY = 'webproxy' -SUBSYS_PINGCHECK = 'pingcheck' -SUBSYS_IPONLINE = 'iponline' -ALL_SUBSYS = (SUBSYS_DNS, SUBSYS_DYNDNS, SUBSYS_MAIL, SUBSYS_NTP, - SUBSYS_SOCKS, SUBSYS_VPN, SUBSYS_WEBPROXY, SUBSYS_PINGCHECK, - SUBSYS_IPONLINE) - -ALL_STATES = (ONLINE_STATE_DIAL_ON_DEMAND, ONLINE_STATE_DIAL_ON_COMMAND, - ONLINE_STATE_ALWAYS_OFFLINE, ONLINE_STATE_ALWAYS_ONLINE) - - -class ConndState(object): - """ representation of connd's status as returned by tell-connd --status """ - - online_mode = None - default_provider = None - subsys_online = None - subsys_offline = None - subsys_disabled = None - connections = None - actions = None - online_ips = None - connected_vpns = None - log_level = None - log_file = None - - def __str__(self): - return \ - '[ConndState: {0} (default {1}), {2} conn\'s, {3} ips, {4} vpns ]'\ - .format(self.online_mode, self.default_provider, - len(self.connections), len(self.online_ips), - len(self.connected_vpns)) - - def complete_str(self): - """ return a string representating the complete state """ - - # general - parts = [ - 'ConndState: online mode = "{0}" (default provider: {1})\n' - .format(self.online_mode, self.default_provider), ] - - # subsys - # ' connctns: (repeated here for correct aligning) - parts.append(' subsys: online: ') - if self.subsys_online: - for subsys in self.subsys_online: - parts.append(subsys + ' ') - else: - parts.append('None ') - parts.append('; offline: ') - if self.subsys_offline: - for subsys in self.subsys_offline: - parts.append(subsys + ' ') - else: - parts.append('None ') - parts.append('; disabled: ') - if self.subsys_disabled: - for subsys in self.subsys_disabled: - parts.append(subsys + ' ') - else: - parts.append('None') - parts.append('\n') - - # connections - parts.append(' conns: ') - if self.connections: - name, info, actions = self.connections[0] - parts.append('{0}: {1}, {2}\n'.format(name, info, actions)) - else: - parts.append('None\n') - for name, info, actions in self.connections[1:]: - # ' connctns: (repeated here for correct aligning) - parts.append(' {0}: {1}, {2}\n'.format(name, info, - actions)) - - # actions - # ' connctns: (repeated here for correct aligning) - parts.append(' actions: ') - if self.actions: - parts.append(self.actions[0] + '\n') - else: - parts.append('None\n') - for action in self.actions[1:]: - # ' connctns: (repeated here for correct aligning) - parts.append(' {0}\n'.format(action)) - - # online IPs - # ' connctns: (repeated here for correct aligning) - parts.append(' IPs: ') - if self.online_ips: - parts.append(self.online_ips[0]) - for curr_ip in self.online_ips[1:]: - parts.append(', {0}'.format(curr_ip)) - else: - parts.append('None') - parts.append('\n') - - # VPNs - # ' connctns: (repeated here for correct aligning) - parts.append(' VPNs: ') - if self.connected_vpns: - parts.append(self.connected_vpns[0]) - for vpn in self.connected_vpns[1:]: - parts.append(', {0}'.format(vpn)) - else: - parts.append('None') - parts.append('\n') - - # log level and target: - # ' connctns: (repeated here for correct aligning) - parts.append(' Log: level {0}'.format(self.log_level)) - if self.log_file: - parts.append(' to {0}'.format(self.log_file)) - parts.append('\n') - - return ''.join(parts) - # end: ConndState.complete_str - - @staticmethod - def run_tell_connd(tell_connd_binary=DEFAULT_TELL_CONND_BINARY): - """ run tell-connd --status, return output iterator and return code - - catches all it can, so should usually return (output, return_code) - where output = [line1, line2, ...] - if return_code != 0, output's first line(s) is error message - """ - try: - output = subprocess.check_output( - [tell_connd_binary, '--status'], stderr=subprocess.STDOUT, - universal_newlines=True, shell=False, timeout=TIMEOUT) - return EX_OK, output.splitlines() - except subprocess.CalledProcessError as cpe: # non-zero return status - output = [ - 'tell-connd exited with status {0}'.format(cpe.returncode), ] - output.extend(cpe.output.splitlines()) - return cpe.returncode, output - # not python-2-compatible: - # except subprocess.TimeoutExpired as texp: - # output = [ - # 'tell-connd timed out after {0}s. Returning -1'.format( - # texp.timeout), ] - # output.extend(te.output.splitlines()) - # return -1, output - except Exception as exp: - output = [str(exp), ] - return -1, output - # end: ConndState.run_tell_connd - - @staticmethod - def get_state(tell_connd_binary=DEFAULT_TELL_CONND_BINARY): - """ get actual state from tell-connd --status - - returns (err_code, output_lines) if something goes wrong running - binary; raises assertion if output from tell-connd does not match - expected format - """ - - state = ConndState() - - err_code, all_lines = ConndState.run_tell_connd(tell_connd_binary) - if err_code != EX_OK: - return err_code, all_lines - - output = iter(all_lines) - - # first section - line = next(output).strip() - state.online_mode = regexp('online mode\s*:\s*(.+)$', line).groups()[0] - assert state.online_mode in ALL_STATES, \ - 'unexpected online mode: {0}'.format(state.online_mode) - - line = next(output).strip() - state.default_provider = regexp('default provider\s*:\s*(.*)$', - line).groups()[0] - if len(state.default_provider) == 0: - state.default_provider = None - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # subsys - line = next(output).strip() - assert line == 'subsys', 'expected subsys but got {0}'.format(line) - line = next(output).strip() - state.subsys_online = regexp('online\s*:\s*(.*)$', line)\ - .groups()[0].split() - for subsys in state.subsys_online: - assert subsys in ALL_SUBSYS, \ - 'unexpected subsys: {0}'.format(subsys) - line = next(output).strip() - state.subsys_offline = regexp('offline\s*:\s*(.*)$', line)\ - .groups()[0].split() - for subsys in state.subsys_offline: - assert subsys in ALL_SUBSYS, \ - 'unexpected subsys: {0}'.format(subsys) - line = next(output).strip() - state.subsys_disabled = regexp('disabled\s*:\s*(.*)$', line)\ - .groups()[0].split() - for subsys in state.subsys_disabled: - assert subsys in ALL_SUBSYS, \ - 'unexpected subsys: {0}'.format(subsys) - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # connection map - state.connections = [] - line = next(output).strip() - assert line == 'connection map:', \ - 'expected connection map but got {0}'.format(line) - expect_new = True - for line in output: - line = line.strip() - if len(line) == 0: - continue - if expect_new: - if line == 'end of connection map': - break - conn_name, conn_info = regexp( - '\[\s*(.+)\s*\]\s*:\s*\(\s*(.*)\s*\)', line).groups() - expect_new = False - else: - conn_actions = regexp('actions\s*:\s*\[\s*(.+)\s*\]', line)\ - .groups() - state.connections.append((conn_name, conn_info, conn_actions)) - expect_new = True - # end: for lines - assert expect_new - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # actions - line = next(output).strip() - state.actions = regexp('actions\s*:\s*(.*)', line).groups()[0].split() - if len(state.actions) == 1 and state.actions[0].strip() == '-': - state.actions = [] - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # online IPs - line = next(output).strip() - state.online_ips = regexp('list of online ips\s*:\s*(.*)', line)\ - .groups()[0].split() - if len(state.online_ips) == 1 \ - and state.online_ips[0].strip() == 'NONE': - state.online_ips = [] - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # VPNs - state.connected_vpns = [] - line = next(output).strip() - assert line == 'vpns connected:', \ - 'expected vpns connected, got {0}'.format(line) - for line in output: - line = line.strip() - if len(line) == 0: - continue - elif line == 'end of list of connected vpns': - break - else: - state.connected_vpns.append(line) - # end: for lines - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - - # log level - line = next(output).strip() - state.log_level, state.log_file = \ - regexp('Logging with level (.+)(?:\s+to\s+(.+))?', line).groups() - - # done - line = next(output).strip() - assert len(line) == 0, 'expected empty line, but got {0}'.format(line) - line = next(output).strip() - assert line == 'Done.', 'expect Done but got {0}'.format(line) - - return state - # end: ConndState.get_state - -# end: class ConndState - - -def test(): - """ get state and print it """ - state = ConndState.get_state() - print(state) - print(state.complete_str()) - - -def main(): - """ Main function, called when running file as script; runs test() """ - test() -# end: function main - - -if __name__ == '__main__': - main() diff --git a/test/long_term_test.py b/test/long_term_test.py deleted file mode 100755 index 01b1d92..0000000 --- a/test/long_term_test.py +++ /dev/null @@ -1,914 +0,0 @@ -#!/usr/bin/env python3 - -""" A more long-term testing of pingcheck - -Runs pingcheck parsing its log output; goes online and offline repeatedly to see if - pinging stays correct - -see also: connd/stresstest_pingcheck.py - -Christian Herdtweck, Intra2net, January 2015 -(c) Intra2net AG 2015 -""" - -# Version History -# 15/01/15 Christian Herdtweck: started creation -# 21/1/15 ch: running 3rd long-term test, seems to work nicely now - -import subprocess -import sys -import select -from re import match as regexp -from time import sleep -from datetime import timedelta, datetime as dt -from warnings import warn -from random import randint -import logging -from os.path import expanduser, dirname, isdir -from os import mkdir, EX_OK -from connd_state import ConndState -#import signal -import pickle -from strftimedelta import strftimedelta - -logger_base = logging.getLogger('long_term_test') - -######################################################################## -### CONSTANTS -######################################################################## - -version = '1.1' -default_pingcheck_bin = '/usr/intranator/bin/pingcheck' -default_tell_connd_bin = '/usr/intranator/bin/tell-connd' -default_output_prefix = './long_term_test/{0:%d%m%y_%H%M}' - -# for parsing log messages -LEVEL_DEBUG = 'dbug' -LEVEL_INFO = 'info' -LEVEL_WARN = 'warn' -LEVEL_NOTICE = 'note' -LEVEL_ERROR = 'err ' - -text_to_level = {} -text_to_level['INFO'] = LEVEL_INFO -text_to_level['DBUG'] = LEVEL_DEBUG -text_to_level['WARN'] = LEVEL_WARN -text_to_level['NOTE'] = LEVEL_NOTICE -text_to_level['ERR'] = LEVEL_ERROR - -# duration in which to remain online/offline -MIN_DURATION_SAME_STATE = 10 # [seconds] -MAX_DURATION_SAME_STATE = 50 # [seconds] - -######################################################################## -#### for config file -######################################################################## - -PING_INTERVAL = 3 # [seconds] -config_file_pattern = 'long_term_test_{0:%d%m%y_%H%M%S}.conf' -conf_start = """# config file for pingcheck testing - -default-source-network-interface=eth1 -nameserver=127.0.0.1 -hosts-down-limit={hosts_down_limit:d} -ping-fail-limit=25 -status-notifier-cmd=echo host status is ${status} -link-up-interval=2 -ping-reply-timeout=3 -""" -conf_host = """ -[host] -name={0} -interval={1:d} -ping-protocol=ICMPv4 -port=80 -source-network-interface=default -""" -host_list = (\ - 'intranator.m.i2n', \ - 'intraroute.m.i2n', \ - 'websync.m.i2n', \ - 'aqua.m.i2n', \ - 'intratdsl.m.i2n', \ - 'beholder.m.i2n', \ - 'eagle.m.i2n', \ - 'osdist.m.i2n', \ - 'tokyo.m.i2n', \ - 'blizzard.m.i2n', \ - 'phonehelp.m.i2n', \ - 'virt-dmz.m.i2n', \ - 'virt01.m.i2n', \ - 'virt02.m.i2n', \ - 'tornado.n.i2n', \ - 'auth.m.i2n', \ - 'forum.intra2net.com', \ - 'update.intranator.com', \ - 'demo-intranator.intra2net.com', \ - 'intra2net.com', \ - #'partner.intra2net.com', has same IP as intra2net.com --> trouble in Tester._host_for_ip \ - 'demo-securitygateway.intra2net.com', \ - 'demo-networksecurity.intra2net.com', \ - ) - -def create_conf_file(file_prefix): - """ create config file using constants above """ - - file_name = file_prefix + '_config.conf' - - n_hosts = len(host_list) - - with open(file_name, 'wt') as file_handle: - file_handle.write(conf_start.format(hosts_down_limit=int(n_hosts * 0.7), status='{status}')) - for host in host_list: - file_handle.write(conf_host.format(host, PING_INTERVAL)) - - return file_name, n_hosts -#end: create_conf_file - - -######################################################################## -### Exceptions -######################################################################## - -def flags_to_str(flags): - text_parts = [] - if flags & select.POLLIN: - text_parts.append('IN') - if flags & select.POLLOUT: - text_parts.append('OUT') - if flags & select.POLLPRI: - text_parts.append('PRI') - if flags & select.POLLERR: - text_parts.append('ERR') - if flags & select.POLLHUP: - text_parts.append('HUP') - if flags & select.POLLNVAL: - text_parts.append('INVALID') - return ','.join(text_parts) - -class PollFlagsException(Exception): - def __init__(self, flags, proc_name): - super(PollFlagsException, self).__init__('Unexpected flags from polling {0}: {1}'.format(\ - proc_name, flags_to_str(flags))) - -class PollHupException(Exception): - def __init__(self, proc_name): - super(PollHupException, self).__init__(\ - 'Received HUP from polling, {0} was probably killed!'.format(proc_name)) - -class TooManyWarnings(Exception): - def __init__(self): - super(TooManyWarnings, self).__init__('Too many warnings / failures!') - -######################################################################## -### PARSER -######################################################################## - -class ProcOutputParser: - """ parser for output from pingcheck process via pipes - - uses select and polling - should never silently swallow log output, main function parse should yield all lines - or emit errors - """ - - _out = None - _err = None - _poller = None - _flags = select.POLLIN | select.POLLPRI | select.POLLHUP # | select.POLLERR - _regexp = '(.+)\s+(INFO|ERR|WARN|DBUG|NOTE)\s+pingcheck\[\d+\]: (.+)\s*' - _out_fileno = None - _err_fileno = None - - SRC_OUT = 'stdout' - SRC_ERR = 'stderr' - SRC_UNK = 'unknown' - - def __init__(self, pipe_out, pipe_err): - self._out = pipe_out - self._err = pipe_err - self._poller = select.poll() - self._poller.register(pipe_out, self._flags) - self._poller.register(pipe_err, self._flags) - self._out_fileno = pipe_out.fileno() - self._err_fileno = pipe_err.fileno() - - def _read(self): - while True: - result = self._poller.poll(10) - for file_no, flags in result: - if not (flags & self._flags): - raise PollFlagsException(flags, 'pingcheck') - - if flags & select.POLLHUP: - raise PollHupException('pingcheck') - - if file_no == self._out_fileno: - src = self.SRC_OUT - line = self._out.readline() - elif file_no == self._err_fileno: - src = self.SRC_ERR - line = self._err.readline() - else: - src = self.SRC_UNK - line = '' - - if not line: - continue - # if reach here, we have a new line of text - - yield src, line.strip() - #end: ProcOutputParser._read - - def parse(self): - for src, line in self._read(): - try: - timestamp, level, message = regexp(self._regexp, line).groups() - yield src, (timestamp, text_to_level[level], message) - except: - yield src, line - - def __iter__(self): - return self.parse() - -#end: class ProcOutputParser - -######################################################################## -### TESTER -######################################################################## - -class Tester: - """ """ - - _line_iter = None - _last_request = None - _last_reply = None - _n_requests = None - _n_replies = None - _host_for_ip = None - _next_state_change = None - _tell_connd_bin = None - _prev_state_changes = None - _fails = None - _log = None - _log_ping = None - max_fails = None - - def __init__(self, line_iter, tell_connd_bin, max_fails): - self._line_iter = line_iter - self._last_request = {} - self._last_reply = {} - self._n_requests = {} - self._n_replies = {} - self._host_for_ip = {} - self._tell_connd_bin = tell_connd_bin - self._prev_state_changes = [] - self._fails = [] - self.max_fails = max_fails + 2 # for _test_the_tester - - self._log = logger_base.getChild('Tester') - self._log_ping = {} - self._log_ping[ProcOutputParser.SRC_OUT] = logger_base.getChild('pingOut') - self._log_ping[ProcOutputParser.SRC_ERR] = logger_base.getChild('pingErr') - - - def test(self): - # preparations - current_state = self._is_online() - while current_state is None: - sleep(1) - current_state = self._is_online() - self._log.info('Start with state online = {0}'.format(current_state)) - self._prev_state_changes.append((current_state, dt.now())) - self._schedule_next_state_change() - - self._test_the_tester() - - # start looping over output from observed process - for src, text_info in self._line_iter: - - # first of all: write to pinger log so nothing is lost - if isinstance(text_info, str): # ProcOutputParser could not parse - msg = 'unparsed: {0}'.format(text_info) - if text_info.startswith("RECV BUF ("): - self._log_ping[src].debug(msg) - else: - self._log_ping[src].warning(msg) - continue # no need to parse message - else: - text_time, level, message = text_info - msg = '{0} {1}'.format(level, message) - - if level in (LEVEL_WARN, LEVEL_ERROR, LEVEL_NOTICE): - # most common warning/error is only logged at debug level - if message.endswith('could not be resolved.'): - self._log_ping[src].debug(msg) - continue - elif message.endswith('fail sending ping data. Network is unreachable'): - self._log_ping[src].debug(msg) - continue - elif message.endswith('fail sending ping data. Operation not permitted'): - self._log_ping[src].debug(msg) - continue - else: # unexpected errors and warnings from pingcheck are logged at info level - self._log_ping[src].info(msg) - # no continue --> try analyze - else: # normal messages are logged at debug level - self._log_ping[src].debug(msg) - # no continue --> try analyze - - # now deal with message contents - if self._handle_request(message): - continue - if self._handle_reply(message): - continue - if self._handle_change_state(): - continue - if self._handle_new_ip(message): - continue - #end: for all lines from pingcheck output - #end: Tester.test - - def _test_the_tester(self): - self._warn("this is a test warning without exception information") - try: - raise NotImplementedError('Test Exception') - except: - self._warn("this is a test warning including exception information", exception=True) - self._log.info('you should have seen 2 failures above -- only be alarmed if not!') - - - def _handle_new_ip(self, message): - # "Have IP 11.22.33.44 [61963s] for host.domain.com" - matches = regexp('Have IP (\d+\.\d+\.\d+\.\d+) \[\d+s\] for (.+)', message) - if matches: - ip,host_name = matches.groups() - else: - matches = regexp('Host is already an IP: (\d+\.\d+\.\d+\.\d+)', message) - if matches: - host_name = ip = matches.groups()[0] - else: - return False - # now have host_name and ip - - self._log.info('remember new IP {0} for {1}'.format(ip, host_name)) - self._host_for_ip[ip] = host_name - self._last_request[host_name] = dt.now() - self._last_reply[host_name] = dt.now() - self._n_requests[host_name] = 0 - self._n_replies[host_name] = 0 - return True - - - def _handle_request(self, message): - matches = regexp('(\d+\.\d+\.\d+\.\d+): sending ping', message) - if matches is not None: - ip = matches.groups()[0] - host_name = self._host_for_ip[ip] - self._log.info('pinging host {0} ({1})'.format(host_name, ip)) - self._last_request[host_name] = dt.now() - self._n_requests[host_name] += 1 - return True - return False - - - def _handle_reply(self, message): - matches = regexp('(\d+\.\d+\.\d+\.\d+): Received reply', message) - if matches is not None: - ip = matches.groups()[0] - host_name = self._host_for_ip[ip] - self._log.info('received reply from {0} ({1})'.format(host_name, ip)) - self._last_reply[host_name] = dt.now() - self._n_replies[host_name] += 1 - return True - return False - - - def _handle_change_state(self): - if dt.now() < self._next_state_change: - return False - - is_online = self._is_online() - if is_online is None: - return False # try again later - - expect_online = self._prev_state_changes[-1][0] - if expect_online != is_online: - self._warn('Expect to be online={0} since {1} but are online={2}!'.format(\ - expect_online, self._prev_state_changes[-1][1], is_online)) - - if is_online: - self._ensure_all_pinged() - new_val = False - else: - new_val = True - if self._set_online(new_val): - self._schedule_next_state_change() - return True - # if call to tell_connd fails, try again later - #end: _handle_change_state - - - def _schedule_next_state_change(self): - n_secs = randint(MIN_DURATION_SAME_STATE, MAX_DURATION_SAME_STATE) - self._next_state_change = dt.now() + timedelta(seconds=n_secs) - self._log.info('schedule next state change for {0:%H:%M:%S} (in {1:d}s)'.format(\ - self._next_state_change, n_secs)) - #end: Tester._schedule_next_state_change - - - def _is_online(self): - """ determine state using ConndState """ - state = ConndState.get_state(self._tell_connd_bin) - if not isinstance(state, ConndState): - err_code, output_lines = state - self._warn('Non-zero exit status asking tell-connd for its --status: {0}'.format(\ - err_code, output_lines[0])) - return None - - return len(state.connections) > 0 - #end: Tester._is_online - - - def _set_online(self, new_val): - """ run tell-connd --online/offline P1 - - returns true if afterwards are in requested state - """ - if new_val == self._is_online(): - self._warn('Want to set online state to current state!') - return True - if new_val: - self._log.info('going online') - args = [self._tell_connd_bin, '--online', 'P1'] - else: - self._log.info('going offline') - args = [self._tell_connd_bin, '--offline'] - - try: - return_code = subprocess.call(args, stderr=subprocess.STDOUT, timeout=1) - if return_code == 0: - self._prev_state_changes.append((new_val, dt.now())) - return True - else: - self._log.info('tell-connd returned {0} --> online state probably unchanged!'.format(\ - return_code)) - return False - except subprocess.TimeoutExpired as te: - self._warn('Timeout expired running {0}. Output was {1}'.format(args, te.output)) - return False - except: - self._warn('Trouble calling tell-connd', exception=True) - return False - #end: Tester._set_online - - - def _ensure_all_pinged(self): - """ called before going offline; check if all hosts were successfully pinged """ - # determine when we last went online - time_went_online = None - time_go_offline = dt.now() - - for went_online, change_time in reversed(self._prev_state_changes): - if went_online: - time_went_online = change_time - break - if time_went_online is None: - # never been online --> nothing to check yet - return - - self._log.info('checking whether all hosts were pinged between ' - + '{0:%H:%M:%S} and now ({1:%H:%M:%S})'.format(\ - time_went_online, time_go_offline)) - for host_name, reply_time in self._last_reply.items(): - if reply_time < time_went_online: - self._log.warning(\ - 'Did not ping host {0} between {1:%H:%M:%S} and now ({2:%H:%M:%S})!'.format(\ - host_name, time_went_online, time_go_offline)) - self._fails.append((host_name, time_went_online, time_go_offline)) - self._log.info('remembered as failure number {0} / {1}'.format(\ - len(self._fails), self.max_fails)) - - # check if there were too many fails; if so, stop - if len(self._fails) > self.max_fails: - self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!")) - raise TooManyWarnings() - #end: if not pinged recently - #end: for all known ips - #end: Tester._ensure_all_pinged - - - def _warn(self, message, exception=False): - - if exception: - self._log.exception(message, exc_info=True) - else: - self._log.warning(message) - self._fails.append(("WARN", dt.now(), message)) - self._log.info('remembered as failure number {0} / {1}'.format(\ - len(self._fails), self.max_fails)) - - # check if there were too many fails; if so, stop - if len(self._fails) > self.max_fails: - self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!")) - raise TooManyWarnings() - #end: Tester._warn - - - def print_failures(self, log): - if len(self._fails) == 0: - log.info('No warnings nor failures since {0:%d.%m.%y, %H:%M} :-)'.format(\ - self._prev_state_changes[0][1])) - else: - log.info('Recorded the following failures and warnings:') - for host_name, time_went_online, time_go_offline in self._fails: - if host_name == 'WARN': - log.info('Warning at {0:%H:%M:%S}: {1}'.format(time_went_online, time_go_offline)) - else: - log.info('Failed to ping {0} between {1:%H:%M:%S} and {2:%H:%M:%S} ({3:.2f}s)'\ - .format(host_name, time_went_online, time_go_offline, \ - (time_go_offline-time_went_online).total_seconds())) - - #end: Tester.print_failures - - def save_state_and_history(self, save_file): - """ save all other data for later inspection using show_saved_data """ - try: - with open(save_file, 'wb') as file_handle: - self._log.info('saving state and history to {0}'.format(save_file)) - pickle.dump( (self._last_request, self._last_reply, self._n_requests, self._n_replies, \ - self._host_for_ip, self._tell_connd_bin, self._prev_state_changes, \ - self._fails, self.max_fails, save_file, dt.now()), \ - file_handle) - return True - except Exception as e: - self._log.exception("Failed to save!", exc_info=True) - return False - #end: Tester.save_state_and_history - - - @staticmethod - def show_saved_data(save_file): - """ print info on data saved by save_state_and_history """ - - # load - save_date = None - with open(save_file, 'rb') as file_handle: - print('loading state and history from {0}'.format(save_file)) - last_request, last_reply, n_requests, n_replies, host_for_ip, tell_connd_bin, \ - state_changes, fails, max_fails, save_file_again, save_date \ - = pickle.load(file_handle) - if save_date is None: - print('Failed to read state and history from {0}'.format(save_file)) - return 1 - - # simple stats - first_change = state_changes[0][1] - last_change = state_changes[-1][1] - print('Performed {0} state changes between {1:%d.%m,%H:%M} and {2:%d.%m,%H:%M}' \ - .format(len(state_changes), first_change, last_change), end='') - print(' ({0} --> one every {1} on average)'.format( \ - strftimedelta( last_change - first_change ), \ - strftimedelta((last_change - first_change) / len(state_changes)) )) - print('Used {0} IPs for {1} hosts'.format(len(host_for_ip), len(last_request))) - print('Overall, recorded {0} warnings/failures (max={1})'.format(len(fails), max_fails)) - - # sort fails into warnings and fail per host and fail per time - warnings = [] - n_test_warnings = 0 - fail_per_host = {} - fail_per_time = {} - n_fails = 0 - for host, time_online, time_offline in fails: - if host == 'WARN': - # time_online = time, time_offline=message - if 'test warning' in time_offline: - n_test_warnings += 1 - else: - warnings.append((time_online, time_offline)) - else: - n_fails += 1 - try: - fail_per_host[host].append(time_online) - except KeyError: - fail_per_host[host] = [time_online,] - duration = time_offline - time_online - try: - fail_per_time[time_online, duration].append(host) - except KeyError: - fail_per_time[time_online, duration] = [host, ] - - # stats per host - for host, n_req in n_requests.items(): - print(host + ' (', end='') - print(','.join( ip for ip, host_for_ip in host_for_ip.items() if host==host_for_ip ), end='') - print('): \treplied to {1} of {0} pings ({2:.1f}%); '.format(\ - n_req, n_replies[host], 100. * n_replies[host]/n_req), end='') - if host in fail_per_host: - print('failed to ping at ', end='') - print(','.join( '{0:%H:%M},'.format(fail_time) for fail_time in fail_per_host[host] )) - else: - print('replied in each test interval') - - # stats per time - if fail_per_time: - print("Ping failures per time (overall {0} fails in {1} intervals):".format(\ - n_fails, len(fail_per_time))) - for (start, duration), host_list in fail_per_time.items(): - print('{0:%d.%m,%H:%M} + {1}: '.format(start, strftimedelta(duration)), end='') - print(','.join(host_list)) - - # warnings - if warnings: - if n_test_warnings: - print("Warnings (not showing {0} test warnings):".format(n_test_warnings)) - else: - print("Warnings:") - for time, message in warnings: - print("{0:%d.%m,%H:%M}: {1}".format(time, message)) - elif n_test_warnings: - print('Only warnings were {0} for testing'.format(n_test_warnings)) - - # done - return 0 - #end: static Tester.show_saved_data - -#end: class Tester - -######################################################################## -### MAIN -######################################################################## - -def test_version(pingcheck_binary): - """ run pingcheck --version; we want > 0.1 for proper IP parsing - - no try-catch here, also intended as test-run for pingcheck binary - """ - - output = subprocess.check_output([pingcheck_binary, '--version'], universal_newlines=True) - match = regexp('\s*pingcheck version (\d+\.\d+)\s*', output) - if match is None: - raise ValueError('Could not determine pingcheck version from output of {0} --version!'\ - .format(pingcheck_binary)) - version = float(match.groups()[0]) - if version < 0.2: - raise ValueError('Pingcheck version is {0} but require version > 0.1!'.format(version)) - -#end: test_version - - -def parse_args(): - """ parse command line arguments and set defaults """ - from argparse import ArgumentParser - parser = ArgumentParser(description="Long-term testing of pingcheck: goes online and offline " \ - + "using tell-connd and checks log for correct output", \ - epilog="(c) Christian Herdtweck, Intra2net AG, 2015") - - # option to show saved data - parser.add_argument('--show-saved-data', '-s', metavar='SAVE_FILE.pickle', \ - default='', type=str, \ - help='Do not run test but instead print info saved by earlier test run') - - # binaries - parser.add_argument('--pingcheck-binary', '-p', metavar='PATH', \ - default=default_pingcheck_bin, \ - help='path to pingcheck binary') - parser.add_argument('--tell-connd-binary', '-t', metavar='PATH', \ - default=default_tell_connd_bin, \ - help='path to tell-connd binary') - - # output - parser.add_argument('--output-prefix', '-o', metavar='PATH', \ - default=default_output_prefix, \ - help='path and/or file prefix to config and log files; ~=home, 0=prog start datetime; ' \ - + 'default="%(default)s"') - parser.add_argument('--log-to-console', '-c', metavar='0|1', \ - choices=(0, 1), type=int, default=1, \ - help='print all output to console? default=%(default)s') - parser.add_argument('--log-level-console', metavar='LEVEL', \ - type=int, default=logging.INFO, \ - help='log level for console output; ' \ - + 'debug={0}, info={1}, warn={2}; default: %(default)s'.format(\ - logging.DEBUG, logging.INFO, logging.WARN)) - parser.add_argument('--log-to-file', '-f', metavar='0|1', \ - choices=(0, 1), type=int, default=1, \ - help='print all output to log file? default=%(default)s') - parser.add_argument('--log-level-file', metavar='LEVEL', \ - type=int, default=logging.DEBUG, \ - help='log level for console output; ' \ - + 'debug={0}, info={1}, warn={2}; default: %(default)s'.format(\ - logging.DEBUG, logging.INFO, logging.WARN)) - parser.add_argument('--log-file-suffix', '-l', metavar='SUFF.IX', \ - type=str, default='.log', \ - help='2nd part of log file name after --output_prefix; default="%(default)s"') - parser.add_argument('--pinger-log-level', metavar='LEVEL', \ - type=str, default='INFO', \ - help='log level given to pingchecker, DEBUG might quickly fill log; default=%(default)s') - - - parser.add_argument('--max-fails-per-host', '-m', metavar='N', type=int, default=10, \ - help='max number of failures/warnings per host before test will end; default: %(default)s') - parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(version)) - - # parse - opts = parser.parse_args() - opts.output_prefix = expanduser(opts.output_prefix).format(dt.now()) - return opts -#end: parse_args - - -def setup_logging(opts): - """ create main logger and handlers for base logger """ - - # create logger for main - log = logger_base.getChild('main') - - # remove all existing handlers - for old_handler in logger_base.handlers: - print('removing handler from base') - logger_base.removeHandler(old_handler) - for old_handler in log.handlers: - print('removing handler from log') - log.removeHandler(old_handler) - - logger_base.setLevel(logging.DEBUG) - - # create handlers and format for base logger - log_file = None - formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s', \ - datefmt='%d.%m,%H:%M:%S') - if opts.log_to_console: - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(opts.log_level_console) - handler.setFormatter(formatter) - logger_base.addHandler(handler) - - if opts.log_to_file: - log_file = opts.output_prefix + opts.log_file_suffix - handler = logging.FileHandler(log_file) - handler.setLevel(opts.log_level_file) - handler.setFormatter(formatter) - logger_base.addHandler(handler) - - # log info - log.info('Log for {0} in {1} created on {2}'.format(__name__, __file__, dt.now())) - if opts.log_to_console: - log.info('logging to console at level {0}'.format(opts.log_level_console)) - else: - log.info('not logging to console') - if opts.log_to_file: - log.info('logging to file {1} at level {0}'.format(opts.log_level_file, log_file)) - else: - log.info('not logging to file') - - # return log_file only if makes sense to talk about it in main - if opts.log_to_console: - return log, log_file - else: - return log, None -#end: setup_logging - - -def handle_sigchild(signum, curr_frame, log, cleanup_args): - log.error('Received SIGCHLD') - log.info('with args {0} and {1}'.format(signum, curr_frame)) - - cleanup(*cleanup_args) - - -def cleanup(log, pinger, tester, line_iter, save_file): - """ the stuff from finally clause in main loop; also called from handle_sigchild """ - - return_code = 255 - - log.info('-' * 72) - if pinger is None: - log.warning('Error before pinger was created') - return_code = 254 - else: - log.info("Cleaning up") - - try: - log.info('telling pinger to terminate') - pinger.terminate() - - for line_idx, line in enumerate(line_iter): - log.info('left-over output: {0}'.format(line)) - if line_idx > 100: - log.warning('ignore further left-over output') - break - - #return_code = pinger.wait(3) can block! - outs, errs = pinger.communicate(timeout=3) - for line in outs.splitlines(): - log.info('left-over output: {0}'.format(line)) - for line in errs.splitlines(): - log.info('left-over output: {0}'.format(line)) - - return_code = pinger.returncode # may still be None - log.info('pinger terminated with return code {0}'.format(return_code)) - except subprocess.TimeoutExpired as te: - log.warning('Timeout expired waiting for pinger to terminate: {0}'.format(te)) - return_code = 253 - except: - log.exception('trouble terminating pinger', exc_info=True) - - if return_code is None: - for line_idx, line in enumerate(line_iter): - log.info('left-over output: {0}'.format(line)) - if line_idx > 100: - log.warning('ignore further left-over output') - break - - # kill it - try: - log.info('killing pinger the hard way') - pinger.kill() - outs, errs = pinger.communicate(timeout=3) - for line in outs.splitlines(): - log.info('left-over output: {0}'.format(line)) - for line in errs.splitlines(): - log.info('left-over output: {0}'.format(line)) - - except: - log.exception('trouble killing pinger', exc_info=True) - - if tester is not None: - tester.save_state_and_history(save_file) - tester.print_failures(log) - #end: if have pinger - - # reset original signal handler - #signal.signal(signal.SIGCHLD, signal.SIG_DFL) - - return return_code -#end: cleanup - - -def main(): - """ parses args, create a Pinger and Parser and Tester and runs them; cleans up - """ - - opts = parse_args() - - if opts.show_saved_data: - return Tester.show_saved_data(opts.show_saved_data) - - base_dir = dirname(opts.output_prefix + "_test") - if not isdir(base_dir): - mkdir(base_dir) - - log, log_file = setup_logging(opts) - - pinger = None - return_code = 255 - tester = None - line_iter = None - try: - config_file, n_hosts = create_conf_file(opts.output_prefix) - save_file = opts.output_prefix + '_save.pickle' - log.info('created config file {0}'.format(config_file)) - - # test version - test_version(opts.pingcheck_binary) - - #signal.signal(signal.SIGCHLD, \ - # lambda signum, curr_frame: handle_sigchild(signum, curr_frame, log, \ - # (log, pinger, tester, line_iter, save_file) ) - - # pingcheck subprocess - pinger = subprocess.Popen([opts.pingcheck_binary, '--log-level', opts.pinger_log_level, \ - '--log-output', 'TERMINAL', '--config-file', config_file], \ - stdout=subprocess.PIPE, stderr=subprocess.PIPE, \ - universal_newlines=True) - log.info('started pingcheck with PID {0}'.format(pinger.pid)) - - parser = ProcOutputParser(pinger.stdout, pinger.stderr) - line_iter = iter(parser) - tester = Tester(line_iter, opts.tell_connd_binary, max(10, opts.max_fails_per_host*n_hosts)) - - tester.test() - except KeyboardInterrupt: - log.info('stopped by user') - except: - log.exception('Caught an exception in main loop', exc_info=True) - log.warning('Re-raise') - raise - finally: - return_code = cleanup(log, pinger, tester, line_iter, save_file) - - log.info('Will stop now') - if log_file is not None: - log.info('might find more log output in {0}'.format(log_file)) - log.info('-' * 72) - #end: global clean-up - - return return_code - -#end: function main - - -if __name__ == '__main__': - main() - logging.shutdown() - diff --git a/test/strftimedelta.py b/test/strftimedelta.py deleted file mode 100644 index ef08055..0000000 --- a/test/strftimedelta.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python3 - -""" give a nice string representation for a duration - -Christian Herdtweck, Intra2net, November 2014 -Licensed under GPL -""" - -from datetime import timedelta - -# Version History -# 24/11/14 Christian Herdtweck: started creation by copying strftimedelta - - -def strftimedelta(delta, max_parts=2, only_up_to_hours=False): - """ nice and length-adjustable text representation of a timespan - - will output the most significant non-zero time units, e.g. - '2h23m50s' or '3d50s12ms' (skipping 0 minutes), or '3y63d12h' - or for max_parts=2: '2h23m' / '3d50s' / '3y63d' - - Assumes that each year has 365 days, every day 24h, every hour 60 mins, ... - not accounting for leap years / leap days, changes of timezones, etc - - @delta:timedelta or anything that can be converted to float; - if numeric, this is interpreted as number of seconds - @param max_parts: precision (i.e., length) of output (default: 2) - set to None if always want precision down to microseconds - @param only_up_to_hours: set to True to never give number of days/years - (e.g. to get '40h' instead of '1d16h') - default: False - - returns a string - """ - - units = '\u00B5s', 'ms', 's', 'm', 'h', 'd', 'y' - - if (max_parts is None) or (max_parts < 1): - max_parts = 100 - - # get number of seconds and sign - if isinstance(delta, timedelta): - seconds = delta.total_seconds() - else: - seconds = float(delta) - negative = False - if seconds < 0: - negative = True - seconds = abs(seconds) - - # split sub-second part - seconds, second_part = divmod(seconds, 1) - milliseconds, milli_part = divmod(second_part*1000, 1) - microseconds = int(round(milli_part*1000)) - seconds = int(seconds) # convert to int here --> all other variables will be int - vals = [microseconds, int(milliseconds), ] - - # split longer units - minutes = hours = days = years = 0 - if seconds > 0: - minutes, seconds = divmod(seconds, 60) - if minutes > 0: - hours, minutes = divmod(minutes, 60) - if hours > 0 and not only_up_to_hours: - days, hours = divmod(hours, 24) - if days > 0: - years, days = divmod(days, 365) - vals.append(seconds) - vals.append(minutes) - vals.append(hours) - vals.append(days) - vals.append(years) - - # build string - str_parts = [] - if negative: - str_parts.append('-') - - n_used = 0 - for val,unit in zip( reversed(vals), reversed(units) ): - if val == 0: - continue - str_parts.append(str(val)) - str_parts.append(unit) - n_used += 1 - if n_used >= max_parts: - break - return ''.join(str_parts) - -#end: strftimedelta - - - -def main(): - """ Main function, called when running file as script; currently raises a NotImplementedError - """ - raise NotImplementedError('nothing to run here -- only a lib!') -#end: function main - - -if __name__ == '__main__': - main() - -- 1.7.1