3 """ A more long-term testing of pingcheck
5 Runs pingcheck parsing its log output; goes online and offline repeatedly to see if
8 see also: connd/stresstest_pingcheck.py
10 Christian Herdtweck, Intra2net, January 2015
15 # 15/01/15 Christian Herdtweck: started creation
16 # 21/1/15 ch: running 3rd long-term test, seems to work nicely now
21 from re import match as regexp
22 from time import sleep
23 from datetime import timedelta, datetime as dt
24 from warnings import warn
25 from random import randint
27 from os.path import expanduser, dirname, isdir
28 from os import mkdir, EX_OK
29 from connd_state import ConndState
32 from strftimedelta import strftimedelta
34 logger_base = logging.getLogger('long_term_test')
36 ########################################################################
38 ########################################################################
41 default_pingcheck_bin = '/usr/intranator/bin/pingcheck'
42 default_tell_connd_bin = '/usr/intranator/bin/tell-connd'
43 default_output_prefix = './long_term_test/{0:%d%m%y_%H%M}'
45 # for parsing log messages
53 text_to_level['INFO'] = LEVEL_INFO
54 text_to_level['DBUG'] = LEVEL_DEBUG
55 text_to_level['WARN'] = LEVEL_WARN
56 text_to_level['NOTE'] = LEVEL_NOTICE
57 text_to_level['ERR'] = LEVEL_ERROR
59 # duration in which to remain online/offline
60 MIN_DURATION_SAME_STATE = 10 # [seconds]
61 MAX_DURATION_SAME_STATE = 50 # [seconds]
63 ########################################################################
65 ########################################################################
67 PING_INTERVAL = 3 # [seconds]
68 config_file_pattern = 'long_term_test_{0:%d%m%y_%H%M%S}.conf'
69 conf_start = """# config file for pingcheck testing
71 default-source-network-interface=eth1
73 hosts-down-limit={hosts_down_limit:d}
75 status-notifier-cmd=echo host status is ${status}
85 source-network-interface=default
104 'forum.intra2net.com', \
105 'update.intranator.com', \
106 'demo-intranator.intra2net.com', \
108 #'partner.intra2net.com', has same IP as intra2net.com --> trouble in Tester._host_for_ip \
109 'demo-securitygateway.intra2net.com', \
110 'demo-networksecurity.intra2net.com', \
113 def create_conf_file(file_prefix):
114 """ create config file using constants above """
116 file_name = file_prefix + '_config.conf'
118 n_hosts = len(host_list)
120 with open(file_name, 'wt') as file_handle:
121 file_handle.write(conf_start.format(hosts_down_limit=int(n_hosts * 0.7), status='{status}'))
122 for host in host_list:
123 file_handle.write(conf_host.format(host, PING_INTERVAL))
125 return file_name, n_hosts
126 #end: create_conf_file
129 ########################################################################
131 ########################################################################
133 def flags_to_str(flags):
135 if flags & select.POLLIN:
136 text_parts.append('IN')
137 if flags & select.POLLOUT:
138 text_parts.append('OUT')
139 if flags & select.POLLPRI:
140 text_parts.append('PRI')
141 if flags & select.POLLERR:
142 text_parts.append('ERR')
143 if flags & select.POLLHUP:
144 text_parts.append('HUP')
145 if flags & select.POLLNVAL:
146 text_parts.append('INVALID')
147 return ','.join(text_parts)
149 class PollFlagsException(Exception):
150 def __init__(self, flags, proc_name):
151 super(PollFlagsException, self).__init__('Unexpected flags from polling {0}: {1}'.format(\
152 proc_name, flags_to_str(flags)))
154 class PollHupException(Exception):
155 def __init__(self, proc_name):
156 super(PollHupException, self).__init__(\
157 'Received HUP from polling, {0} was probably killed!'.format(proc_name))
159 class TooManyWarnings(Exception):
161 super(TooManyWarnings, self).__init__('Too many warnings / failures!')
163 ########################################################################
165 ########################################################################
167 class ProcOutputParser:
168 """ parser for output from pingcheck process via pipes
170 uses select and polling
171 should never silently swallow log output, main function parse should yield all lines
178 _flags = select.POLLIN | select.POLLPRI | select.POLLHUP # | select.POLLERR
179 _regexp = '(.+)\s+(INFO|ERR|WARN|DBUG|NOTE)\s+pingcheck\[\d+\]: (.+)\s*'
187 def __init__(self, pipe_out, pipe_err):
190 self._poller = select.poll()
191 self._poller.register(pipe_out, self._flags)
192 self._poller.register(pipe_err, self._flags)
193 self._out_fileno = pipe_out.fileno()
194 self._err_fileno = pipe_err.fileno()
198 result = self._poller.poll(10)
199 for file_no, flags in result:
200 if not (flags & self._flags):
201 raise PollFlagsException(flags, 'pingcheck')
203 if flags & select.POLLHUP:
204 raise PollHupException('pingcheck')
206 if file_no == self._out_fileno:
208 line = self._out.readline()
209 elif file_no == self._err_fileno:
211 line = self._err.readline()
218 # if reach here, we have a new line of text
220 yield src, line.strip()
221 #end: ProcOutputParser._read
224 for src, line in self._read():
226 timestamp, level, message = regexp(self._regexp, line).groups()
227 yield src, (timestamp, text_to_level[level], message)
234 #end: class ProcOutputParser
236 ########################################################################
238 ########################################################################
249 _next_state_change = None
250 _tell_connd_bin = None
251 _prev_state_changes = None
257 def __init__(self, line_iter, tell_connd_bin, max_fails):
258 self._line_iter = line_iter
259 self._last_request = {}
260 self._last_reply = {}
261 self._n_requests = {}
263 self._host_for_ip = {}
264 self._tell_connd_bin = tell_connd_bin
265 self._prev_state_changes = []
267 self.max_fails = max_fails + 2 # for _test_the_tester
269 self._log = logger_base.getChild('Tester')
271 self._log_ping[ProcOutputParser.SRC_OUT] = logger_base.getChild('pingOut')
272 self._log_ping[ProcOutputParser.SRC_ERR] = logger_base.getChild('pingErr')
277 current_state = self._is_online()
278 while current_state is None:
280 current_state = self._is_online()
281 self._log.info('Start with state online = {0}'.format(current_state))
282 self._prev_state_changes.append((current_state, dt.now()))
283 self._schedule_next_state_change()
285 self._test_the_tester()
287 # start looping over output from observed process
288 for src, text_info in self._line_iter:
290 # first of all: write to pinger log so nothing is lost
291 if isinstance(text_info, str): # ProcOutputParser could not parse
292 msg = 'unparsed: {0}'.format(text_info)
293 if text_info.startswith("RECV BUF ("):
294 self._log_ping[src].debug(msg)
296 self._log_ping[src].warning(msg)
297 continue # no need to parse message
299 text_time, level, message = text_info
300 msg = '{0} {1}'.format(level, message)
302 if level in (LEVEL_WARN, LEVEL_ERROR, LEVEL_NOTICE):
303 # most common warning/error is only logged at debug level
304 if message.endswith('could not be resolved.'):
305 self._log_ping[src].debug(msg)
307 elif message.endswith('fail sending ping data. Network is unreachable'):
308 self._log_ping[src].debug(msg)
310 elif message.endswith('fail sending ping data. Operation not permitted'):
311 self._log_ping[src].debug(msg)
313 else: # unexpected errors and warnings from pingcheck are logged at info level
314 self._log_ping[src].info(msg)
315 # no continue --> try analyze
316 else: # normal messages are logged at debug level
317 self._log_ping[src].debug(msg)
318 # no continue --> try analyze
320 # now deal with message contents
321 if self._handle_request(message):
323 if self._handle_reply(message):
325 if self._handle_change_state():
327 if self._handle_new_ip(message):
329 #end: for all lines from pingcheck output
332 def _test_the_tester(self):
333 self._warn("this is a test warning without exception information")
335 raise NotImplementedError('Test Exception')
337 self._warn("this is a test warning including exception information", exception=True)
338 self._log.info('you should have seen 2 failures above -- only be alarmed if not!')
341 def _handle_new_ip(self, message):
342 # "Have IP 11.22.33.44 [61963s] for host.domain.com"
343 matches = regexp('Have IP (\d+\.\d+\.\d+\.\d+) \[\d+s\] for (.+)', message)
345 ip,host_name = matches.groups()
347 matches = regexp('Host is already an IP: (\d+\.\d+\.\d+\.\d+)', message)
349 host_name = ip = matches.groups()[0]
352 # now have host_name and ip
354 self._log.info('remember new IP {0} for {1}'.format(ip, host_name))
355 self._host_for_ip[ip] = host_name
356 self._last_request[host_name] = dt.now()
357 self._last_reply[host_name] = dt.now()
358 self._n_requests[host_name] = 0
359 self._n_replies[host_name] = 0
363 def _handle_request(self, message):
364 matches = regexp('(\d+\.\d+\.\d+\.\d+): sending ping', message)
365 if matches is not None:
366 ip = matches.groups()[0]
367 host_name = self._host_for_ip[ip]
368 self._log.info('pinging host {0} ({1})'.format(host_name, ip))
369 self._last_request[host_name] = dt.now()
370 self._n_requests[host_name] += 1
375 def _handle_reply(self, message):
376 matches = regexp('(\d+\.\d+\.\d+\.\d+): Received reply', message)
377 if matches is not None:
378 ip = matches.groups()[0]
379 host_name = self._host_for_ip[ip]
380 self._log.info('received reply from {0} ({1})'.format(host_name, ip))
381 self._last_reply[host_name] = dt.now()
382 self._n_replies[host_name] += 1
387 def _handle_change_state(self):
388 if dt.now() < self._next_state_change:
391 is_online = self._is_online()
392 if is_online is None:
393 return False # try again later
395 expect_online = self._prev_state_changes[-1][0]
396 if expect_online != is_online:
397 self._warn('Expect to be online={0} since {1} but are online={2}!'.format(\
398 expect_online, self._prev_state_changes[-1][1], is_online))
401 self._ensure_all_pinged()
405 if self._set_online(new_val):
406 self._schedule_next_state_change()
408 # if call to tell_connd fails, try again later
409 #end: _handle_change_state
412 def _schedule_next_state_change(self):
413 n_secs = randint(MIN_DURATION_SAME_STATE, MAX_DURATION_SAME_STATE)
414 self._next_state_change = dt.now() + timedelta(seconds=n_secs)
415 self._log.info('schedule next state change for {0:%H:%M:%S} (in {1:d}s)'.format(\
416 self._next_state_change, n_secs))
417 #end: Tester._schedule_next_state_change
420 def _is_online(self):
421 """ determine state using ConndState """
422 state = ConndState.get_state(self._tell_connd_bin)
423 if not isinstance(state, ConndState):
424 err_code, output_lines = state
425 self._warn('Non-zero exit status asking tell-connd for its --status: {0}'.format(\
426 err_code, output_lines[0]))
429 return len(state.connections) > 0
430 #end: Tester._is_online
433 def _set_online(self, new_val):
434 """ run tell-connd --online/offline P1
436 returns true if afterwards are in requested state
438 if new_val == self._is_online():
439 self._warn('Want to set online state to current state!')
442 self._log.info('going online')
443 args = [self._tell_connd_bin, '--online', 'P1']
445 self._log.info('going offline')
446 args = [self._tell_connd_bin, '--offline']
449 return_code = subprocess.call(args, stderr=subprocess.STDOUT, timeout=1)
451 self._prev_state_changes.append((new_val, dt.now()))
454 self._log.info('tell-connd returned {0} --> online state probably unchanged!'.format(\
457 except subprocess.TimeoutExpired as te:
458 self._warn('Timeout expired running {0}. Output was {1}'.format(args, te.output))
461 self._warn('Trouble calling tell-connd', exception=True)
463 #end: Tester._set_online
466 def _ensure_all_pinged(self):
467 """ called before going offline; check if all hosts were successfully pinged """
468 # determine when we last went online
469 time_went_online = None
470 time_go_offline = dt.now()
472 for went_online, change_time in reversed(self._prev_state_changes):
474 time_went_online = change_time
476 if time_went_online is None:
477 # never been online --> nothing to check yet
480 self._log.info('checking whether all hosts were pinged between '
481 + '{0:%H:%M:%S} and now ({1:%H:%M:%S})'.format(\
482 time_went_online, time_go_offline))
483 for host_name, reply_time in self._last_reply.items():
484 if reply_time < time_went_online:
486 'Did not ping host {0} between {1:%H:%M:%S} and now ({2:%H:%M:%S})!'.format(\
487 host_name, time_went_online, time_go_offline))
488 self._fails.append((host_name, time_went_online, time_go_offline))
489 self._log.info('remembered as failure number {0} / {1}'.format(\
490 len(self._fails), self.max_fails))
492 # check if there were too many fails; if so, stop
493 if len(self._fails) > self.max_fails:
494 self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!"))
495 raise TooManyWarnings()
496 #end: if not pinged recently
497 #end: for all known ips
498 #end: Tester._ensure_all_pinged
501 def _warn(self, message, exception=False):
504 self._log.exception(message, exc_info=True)
506 self._log.warning(message)
507 self._fails.append(("WARN", dt.now(), message))
508 self._log.info('remembered as failure number {0} / {1}'.format(\
509 len(self._fails), self.max_fails))
511 # check if there were too many fails; if so, stop
512 if len(self._fails) > self.max_fails:
513 self._fails.append(("WARN", dt.now(), "Too many failurs/warnings!"))
514 raise TooManyWarnings()
518 def print_failures(self, log):
519 if len(self._fails) == 0:
520 log.info('No warnings nor failures since {0:%d.%m.%y, %H:%M} :-)'.format(\
521 self._prev_state_changes[0][1]))
523 log.info('Recorded the following failures and warnings:')
524 for host_name, time_went_online, time_go_offline in self._fails:
525 if host_name == 'WARN':
526 log.info('Warning at {0:%H:%M:%S}: {1}'.format(time_went_online, time_go_offline))
528 log.info('Failed to ping {0} between {1:%H:%M:%S} and {2:%H:%M:%S} ({3:.2f}s)'\
529 .format(host_name, time_went_online, time_go_offline, \
530 (time_go_offline-time_went_online).total_seconds()))
532 #end: Tester.print_failures
534 def save_state_and_history(self, save_file):
535 """ save all other data for later inspection using show_saved_data """
537 with open(save_file, 'wb') as file_handle:
538 self._log.info('saving state and history to {0}'.format(save_file))
539 pickle.dump( (self._last_request, self._last_reply, self._n_requests, self._n_replies, \
540 self._host_for_ip, self._tell_connd_bin, self._prev_state_changes, \
541 self._fails, self.max_fails, save_file, dt.now()), \
544 except Exception as e:
545 self._log.exception("Failed to save!", exc_info=True)
547 #end: Tester.save_state_and_history
551 def show_saved_data(save_file):
552 """ print info on data saved by save_state_and_history """
556 with open(save_file, 'rb') as file_handle:
557 print('loading state and history from {0}'.format(save_file))
558 last_request, last_reply, n_requests, n_replies, host_for_ip, tell_connd_bin, \
559 state_changes, fails, max_fails, save_file_again, save_date \
560 = pickle.load(file_handle)
561 if save_date is None:
562 print('Failed to read state and history from {0}'.format(save_file))
566 first_change = state_changes[0][1]
567 last_change = state_changes[-1][1]
568 print('Performed {0} state changes between {1:%d.%m,%H:%M} and {2:%d.%m,%H:%M}' \
569 .format(len(state_changes), first_change, last_change), end='')
570 print(' ({0} --> one every {1} on average)'.format( \
571 strftimedelta( last_change - first_change ), \
572 strftimedelta((last_change - first_change) / len(state_changes)) ))
573 print('Used {0} IPs for {1} hosts'.format(len(host_for_ip), len(last_request)))
574 print('Overall, recorded {0} warnings/failures (max={1})'.format(len(fails), max_fails))
576 # sort fails into warnings and fail per host and fail per time
582 for host, time_online, time_offline in fails:
584 # time_online = time, time_offline=message
585 if 'test warning' in time_offline:
588 warnings.append((time_online, time_offline))
592 fail_per_host[host].append(time_online)
594 fail_per_host[host] = [time_online,]
595 duration = time_offline - time_online
597 fail_per_time[time_online, duration].append(host)
599 fail_per_time[time_online, duration] = [host, ]
602 for host, n_req in n_requests.items():
603 print(host + ' (', end='')
604 print(','.join( ip for ip, host_for_ip in host_for_ip.items() if host==host_for_ip ), end='')
605 print('): \treplied to {1} of {0} pings ({2:.1f}%); '.format(\
606 n_req, n_replies[host], 100. * n_replies[host]/n_req), end='')
607 if host in fail_per_host:
608 print('failed to ping at ', end='')
609 print(','.join( '{0:%H:%M},'.format(fail_time) for fail_time in fail_per_host[host] ))
611 print('replied in each test interval')
615 print("Ping failures per time (overall {0} fails in {1} intervals):".format(\
616 n_fails, len(fail_per_time)))
617 for (start, duration), host_list in fail_per_time.items():
618 print('{0:%d.%m,%H:%M} + {1}: '.format(start, strftimedelta(duration)), end='')
619 print(','.join(host_list))
624 print("Warnings (not showing {0} test warnings):".format(n_test_warnings))
627 for time, message in warnings:
628 print("{0:%d.%m,%H:%M}: {1}".format(time, message))
629 elif n_test_warnings:
630 print('Only warnings were {0} for testing'.format(n_test_warnings))
634 #end: static Tester.show_saved_data
638 ########################################################################
640 ########################################################################
642 def test_version(pingcheck_binary):
643 """ run pingcheck --version; we want > 0.1 for proper IP parsing
645 no try-catch here, also intended as test-run for pingcheck binary
648 output = subprocess.check_output([pingcheck_binary, '--version'], universal_newlines=True)
649 match = regexp('\s*pingcheck version (\d+\.\d+)\s*', output)
651 raise ValueError('Could not determine pingcheck version from output of {0} --version!'\
652 .format(pingcheck_binary))
653 version = float(match.groups()[0])
655 raise ValueError('Pingcheck version is {0} but require version > 0.1!'.format(version))
661 """ parse command line arguments and set defaults """
662 from argparse import ArgumentParser
663 parser = ArgumentParser(description="Long-term testing of pingcheck: goes online and offline " \
664 + "using tell-connd and checks log for correct output", \
665 epilog="(c) Christian Herdtweck, Intra2net AG, 2015")
667 # option to show saved data
668 parser.add_argument('--show-saved-data', '-s', metavar='SAVE_FILE.pickle', \
669 default='', type=str, \
670 help='Do not run test but instead print info saved by earlier test run')
673 parser.add_argument('--pingcheck-binary', '-p', metavar='PATH', \
674 default=default_pingcheck_bin, \
675 help='path to pingcheck binary')
676 parser.add_argument('--tell-connd-binary', '-t', metavar='PATH', \
677 default=default_tell_connd_bin, \
678 help='path to tell-connd binary')
681 parser.add_argument('--output-prefix', '-o', metavar='PATH', \
682 default=default_output_prefix, \
683 help='path and/or file prefix to config and log files; ~=home, 0=prog start datetime; ' \
684 + 'default="%(default)s"')
685 parser.add_argument('--log-to-console', '-c', metavar='0|1', \
686 choices=(0, 1), type=int, default=1, \
687 help='print all output to console? default=%(default)s')
688 parser.add_argument('--log-level-console', metavar='LEVEL', \
689 type=int, default=logging.INFO, \
690 help='log level for console output; ' \
691 + 'debug={0}, info={1}, warn={2}; default: %(default)s'.format(\
692 logging.DEBUG, logging.INFO, logging.WARN))
693 parser.add_argument('--log-to-file', '-f', metavar='0|1', \
694 choices=(0, 1), type=int, default=1, \
695 help='print all output to log file? default=%(default)s')
696 parser.add_argument('--log-level-file', metavar='LEVEL', \
697 type=int, default=logging.DEBUG, \
698 help='log level for console output; ' \
699 + 'debug={0}, info={1}, warn={2}; default: %(default)s'.format(\
700 logging.DEBUG, logging.INFO, logging.WARN))
701 parser.add_argument('--log-file-suffix', '-l', metavar='SUFF.IX', \
702 type=str, default='.log', \
703 help='2nd part of log file name after --output_prefix; default="%(default)s"')
704 parser.add_argument('--pinger-log-level', metavar='LEVEL', \
705 type=str, default='INFO', \
706 help='log level given to pingchecker, DEBUG might quickly fill log; default=%(default)s')
709 parser.add_argument('--max-fails-per-host', '-m', metavar='N', type=int, default=10, \
710 help='max number of failures/warnings per host before test will end; default: %(default)s')
711 parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(version))
714 opts = parser.parse_args()
715 opts.output_prefix = expanduser(opts.output_prefix).format(dt.now())
720 def setup_logging(opts):
721 """ create main logger and handlers for base logger """
723 # create logger for main
724 log = logger_base.getChild('main')
726 # remove all existing handlers
727 for old_handler in logger_base.handlers:
728 print('removing handler from base')
729 logger_base.removeHandler(old_handler)
730 for old_handler in log.handlers:
731 print('removing handler from log')
732 log.removeHandler(old_handler)
734 logger_base.setLevel(logging.DEBUG)
736 # create handlers and format for base logger
738 formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s', \
739 datefmt='%d.%m,%H:%M:%S')
740 if opts.log_to_console:
741 handler = logging.StreamHandler(sys.stdout)
742 handler.setLevel(opts.log_level_console)
743 handler.setFormatter(formatter)
744 logger_base.addHandler(handler)
747 log_file = opts.output_prefix + opts.log_file_suffix
748 handler = logging.FileHandler(log_file)
749 handler.setLevel(opts.log_level_file)
750 handler.setFormatter(formatter)
751 logger_base.addHandler(handler)
754 log.info('Log for {0} in {1} created on {2}'.format(__name__, __file__, dt.now()))
755 if opts.log_to_console:
756 log.info('logging to console at level {0}'.format(opts.log_level_console))
758 log.info('not logging to console')
760 log.info('logging to file {1} at level {0}'.format(opts.log_level_file, log_file))
762 log.info('not logging to file')
764 # return log_file only if makes sense to talk about it in main
765 if opts.log_to_console:
772 def handle_sigchild(signum, curr_frame, log, cleanup_args):
773 log.error('Received SIGCHLD')
774 log.info('with args {0} and {1}'.format(signum, curr_frame))
776 cleanup(*cleanup_args)
779 def cleanup(log, pinger, tester, line_iter, save_file):
780 """ the stuff from finally clause in main loop; also called from handle_sigchild """
786 log.warning('Error before pinger was created')
789 log.info("Cleaning up")
792 log.info('telling pinger to terminate')
795 for line_idx, line in enumerate(line_iter):
796 log.info('left-over output: {0}'.format(line))
798 log.warning('ignore further left-over output')
801 #return_code = pinger.wait(3) can block!
802 outs, errs = pinger.communicate(timeout=3)
803 for line in outs.splitlines():
804 log.info('left-over output: {0}'.format(line))
805 for line in errs.splitlines():
806 log.info('left-over output: {0}'.format(line))
808 return_code = pinger.returncode # may still be None
809 log.info('pinger terminated with return code {0}'.format(return_code))
810 except subprocess.TimeoutExpired as te:
811 log.warning('Timeout expired waiting for pinger to terminate: {0}'.format(te))
814 log.exception('trouble terminating pinger', exc_info=True)
816 if return_code is None:
817 for line_idx, line in enumerate(line_iter):
818 log.info('left-over output: {0}'.format(line))
820 log.warning('ignore further left-over output')
825 log.info('killing pinger the hard way')
827 outs, errs = pinger.communicate(timeout=3)
828 for line in outs.splitlines():
829 log.info('left-over output: {0}'.format(line))
830 for line in errs.splitlines():
831 log.info('left-over output: {0}'.format(line))
834 log.exception('trouble killing pinger', exc_info=True)
836 if tester is not None:
837 tester.save_state_and_history(save_file)
838 tester.print_failures(log)
841 # reset original signal handler
842 #signal.signal(signal.SIGCHLD, signal.SIG_DFL)
849 """ parses args, create a Pinger and Parser and Tester and runs them; cleans up
854 if opts.show_saved_data:
855 return Tester.show_saved_data(opts.show_saved_data)
857 base_dir = dirname(opts.output_prefix + "_test")
858 if not isdir(base_dir):
861 log, log_file = setup_logging(opts)
868 config_file, n_hosts = create_conf_file(opts.output_prefix)
869 save_file = opts.output_prefix + '_save.pickle'
870 log.info('created config file {0}'.format(config_file))
873 test_version(opts.pingcheck_binary)
875 #signal.signal(signal.SIGCHLD, \
876 # lambda signum, curr_frame: handle_sigchild(signum, curr_frame, log, \
877 # (log, pinger, tester, line_iter, save_file) )
879 # pingcheck subprocess
880 pinger = subprocess.Popen([opts.pingcheck_binary, '--log-level', opts.pinger_log_level, \
881 '--log-output', 'TERMINAL', '--config-file', config_file], \
882 stdout=subprocess.PIPE, stderr=subprocess.PIPE, \
883 universal_newlines=True)
884 log.info('started pingcheck with PID {0}'.format(pinger.pid))
886 parser = ProcOutputParser(pinger.stdout, pinger.stderr)
887 line_iter = iter(parser)
888 tester = Tester(line_iter, opts.tell_connd_binary, max(10, opts.max_fails_per_host*n_hosts))
891 except KeyboardInterrupt:
892 log.info('stopped by user')
894 log.exception('Caught an exception in main loop', exc_info=True)
895 log.warning('Re-raise')
898 return_code = cleanup(log, pinger, tester, line_iter, save_file)
900 log.info('Will stop now')
901 if log_file is not None:
902 log.info('might find more log output in {0}'.format(log_file))
904 #end: global clean-up
911 if __name__ == '__main__':