/* The software in this package is distributed under the GNU General Public License version 2 (with a special exception described below). A copy of GNU General Public License (GPL) is included in this distribution, in the file COPYING.GPL. As a special exception, if other files instantiate templates or use macros or inline functions from this file, or you compile this file and link it with other works to produce a work based on this file, this file does not by itself cause the resulting work to be covered by the GNU General Public License. However the source code for this file must still be made available in accordance with section (3) of the GNU General Public License. This exception does not invalidate any other reasons why a work based on this file might be covered by the GNU General Public License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "boost_assert_handler.h" #include "config/configurationreader.h" #include "config/host.h" #include "link/linkstatus.h" #include "host/loglevel.h" #include "host/pingprotocol.h" #include "host/pingscheduler.h" #include "icmp/icmppinger.h" // contains IcmpPacketDistributor #include "dns/dnsmaster.h" using namespace std; using boost::shared_ptr; using boost::posix_time::time_duration; using I2n::Logger::GlobalLogger; // a map from interval (in seconds) to delay (in seconds) typedef std::pair IntervalCountPair; typedef std::map DelayMap; typedef shared_ptr TimerItem; const boost::posix_time::time_duration SIGNAL_CHECK_INTERVAL = boost::posix_time::seconds(1); //----------------------------------------------------------------------------- // Declarations //----------------------------------------------------------------------------- typedef std::pair GetConfigReturnType; GetConfigReturnType get_configuration(int, const char**); LinkStatusItem get_status_notifier(const ConfigurationItem&); void init_logger(); void set_log_output(const ConfigurationItem &); DelayMap calc_pinger_delays(const HostList &hosts); bool init_pingers(const IoServiceItem, const ConfigurationItem&, const LinkStatusItem&, PingSchedulerList*); void start_pingers(const PingSchedulerList&); void stop_pingers(const PingSchedulerList&); void signal_handler_int(int param); void signal_handler_term(int param); void signal_handler_usr1(int param); void signal_handler_usr2(int param); void signal_checker( const boost::system::error_code &error ); void install_signal_handlers( const IoServiceItem io_service, const int config_log_level ); void reset_signal_handlers(); // data required for signal handling (SIGINT, SIGTERM, ... ) struct signal_data_struct { volatile sig_atomic_t signaled_flag_int; volatile sig_atomic_t signaled_flag_term; volatile sig_atomic_t signaled_flag_usr1; volatile sig_atomic_t signaled_flag_usr2; IoServiceItem io_service; void (*old_handler_int )(int); void (*old_handler_term)(int); void (*old_handler_usr1)(int); void (*old_handler_usr2)(int); bool stopped; TimerItem check_timer; int config_log_level; signal_data_struct(): signaled_flag_int( 0 ), signaled_flag_term( 0 ), signaled_flag_usr1( 0 ), signaled_flag_usr2( 0 ), io_service(), old_handler_int( 0 ), old_handler_term( 0 ), old_handler_usr1( 0 ), old_handler_usr2( 0 ), stopped( false ), check_timer(), config_log_level( I2n::Logger::LogLevel::Notice ) { } }; //----------------------------------------------------------------------------- // Definitions //----------------------------------------------------------------------------- GetConfigReturnType get_configuration( int argc, const char *argv[] ) { ConfigurationReader config_reader; bool parsed_success = config_reader.parse( argc, argv ); Configuration config_obj = config_reader.get_configuration(); ConfigurationItem configuration( new Configuration( config_obj ) ); GetConfigReturnType return_val( parsed_success, configuration ); return return_val; } LinkStatusItem get_status_notifier( const ConfigurationItem &configuration ) { int hosts_down_limit = configuration->get_hosts_down_limit(); int link_up_interval_in_sec = configuration->get_link_up_interval_in_sec(); int link_down_interval_in_sec = configuration->get_link_down_interval_in_sec(); string status_notifier_cmd = configuration->get_status_notifier_cmd(); LinkStatusItem link_analyzer( new LinkStatus( hosts_down_limit, link_up_interval_in_sec, link_down_interval_in_sec, status_notifier_cmd ) ); return link_analyzer; } void init_logger() { // set default: log at level NOTICE to syslog and stderr // to ensure that in case of faulty config, the error is noticed I2n::Logger::enable_syslog( I2n::Logger::Facility::User ); I2n::Logger::enable_stderr_log( true ); I2n::Logger::set_log_level( I2n::Logger::LogLevel::Notice ); } void set_log_output( const ConfigurationItem &configuration ) { LogOutput log_output = configuration->get_log_output(); string log_file_name = configuration->get_log_file(); switch (log_output) { case LogOutput_UNDEFINED: GlobalLogger.warning() << "Unknown output target -- use syslog"; case LogOutput_SYSLOG: GlobalLogger.info() << "Setting log output target to syslog" << endl; I2n::Logger::enable_syslog(true); I2n::Logger::enable_stderr_log(false); I2n::Logger::enable_log_file(false); GlobalLogger.info() << "Set log output target to syslog" << endl; break; case LogOutput_TERMINAL: GlobalLogger.info() << "Setting log output target to terminal" << endl; I2n::Logger::enable_syslog(false); I2n::Logger::enable_stderr_log(true); I2n::Logger::enable_log_file(false); GlobalLogger.info() << "Set log output target to terminal" << endl; GlobalLogger.info() << "(check syslog for earlier messages)" << endl; break; case LogOutput_FILE: GlobalLogger.info() << "Setting log output target to file " << log_file_name << endl; I2n::Logger::enable_syslog(false); I2n::Logger::enable_stderr_log(false); I2n::Logger::enable_log_file(log_file_name); GlobalLogger.info() << "Set log output target to file " << log_file_name << endl; GlobalLogger.info() << "(check syslog for earlier messages)" << endl; break; default: GlobalLogger.error() << "Unknown log output target!" << endl; break; } } /** * @brief calculate delay between pingers to evenly distribute them in time * * If there are many pingers with same interval, will get bursts of pings * and none in-between. This function calculates delays for large numbers * of hosts with same ping intervals, to distribute them as evenly as * possible, right from the start (might diverge over time, anyway). * * If interval is chosen at random, will have many pingers with different * intervals; to those will assign a random delay in [0, interval]. * * Called by init_pingers with * @param hosts list of hosts as obtained from configuration * @returns a map from ping interval to delay between pingers of that interval */ DelayMap calc_pinger_delays(const HostList &hosts) { // first step: count number of hosts with same intervals DelayMap delay_shifts; int curr_interval; BOOST_FOREACH( const HostItem &host, hosts ) { curr_interval = host->get_interval_in_sec(); if (! curr_interval) delay_shifts[curr_interval] = 1.0f; else delay_shifts[curr_interval] += 1.0f; } // create random number generator typedef boost::rand48 rand_gen_type; typedef boost::uniform_real rand_dist_type; typedef boost::variate_generator< rand_gen_type&, rand_dist_type > rand_var_type; rand_gen_type random_number_generator( boost::numeric_cast(time(0)) ); // second step: divide intervals by counts, round to int // --> for 18 pingers with a 30s interval, get 30s/18 = 1.66667 // for random intervals, use random delays within intervals BOOST_FOREACH( IntervalCountPair interval_and_count, delay_shifts ) { if ( abs(interval_and_count.second - 1.0f) < 0.0001 ) // == 1.0f { // there is exactly 1 pinger with exactly that interval // --> assign a random delay within interval rand_dist_type random_distribution(0.0f, interval_and_count.first); rand_var_type random_variate(random_number_generator, random_distribution); delay_shifts[interval_and_count.first] = random_variate(); } else { // there are several pingers with same interval // --> distribute evenly delay_shifts[interval_and_count.first] = boost::numeric_cast(interval_and_count.first) / std::max(1.0f, interval_and_count.second); //max is paranoid } } return delay_shifts; } bool init_pingers( const IoServiceItem io_service, const ConfigurationItem &configuration, const LinkStatusItem &status_notifier, PingSchedulerList *scheduler_list ) { string default_network_interface = configuration->get_source_network_interface(); int ping_fail_limit = configuration->get_ping_fail_limit(); int ping_reply_timeout = configuration->get_ping_reply_timeout(); // remove some hosts at random configuration->randomize_hosts(); // calculate delays between pingers of same interval DelayMap delay_shifts = calc_pinger_delays(configuration->get_hosts()); // setup memory for assigned delays; init with delay > 0 DelayMap delays; BOOST_FOREACH( IntervalCountPair interval_and_delay, delay_shifts ) delays[interval_and_delay.first] = 0.0f; HostList hosts = configuration->get_hosts(); if (hosts.empty()) return false; // more variables for pingcheck, maybe should move to config? int n_parallel_pings = 1; int parallel_ping_delay = 100; // ms int congestion_duration_thresh = 10; // seconds int congestion_percentage_thresh = 75; int congest_caused_by_fail_limit_percentage = 99; int ping_timeout_factor = 5; BOOST_FOREACH( const HostItem &host, hosts ) { string destination_address = host->get_address(); uint16_t destination_port = host->get_port(); string host_network_interface = host->get_source_network_interface(); string network_interface = ( host_network_interface == "default" ) ? default_network_interface : host_network_interface; PingProtocolList protocol_list = host->get_ping_protocol_list(); int ping_interval_in_sec = host->get_interval_in_sec(); // get delay for this scheduler and update assigned delays delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec]; int current_delay = boost::math::iround(delays[ping_interval_in_sec]); GlobalLogger.debug() << "assigning delay of " << current_delay << "s to pinger with interval " << ping_interval_in_sec << "s"; PingSchedulerItem scheduler( new PingScheduler( io_service, network_interface, destination_address, destination_port, protocol_list, ping_interval_in_sec, ping_fail_limit, congestion_percentage_thresh, congest_caused_by_fail_limit_percentage, congestion_duration_thresh, ping_reply_timeout, status_notifier, current_delay, n_parallel_pings, parallel_ping_delay, ping_timeout_factor ) ); scheduler_list->push_back( scheduler ); } return true; } void start_pingers( const PingSchedulerList &scheduler_list ) { // start each ping scheduler BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list ) scheduler->start_pinging(); } void stop_pingers( const PingSchedulerList &scheduler_list ) { // Stop each ping scheduler GlobalLogger.info() << "Telling all pingers to stop"; BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list ) { scheduler->stop_pinging(); } IcmpPacketDistributor::clean_up_all(); } // the one instance of signal_data_struct signal_data_struct signal_data; /// registered as signal handler; just sets signal_data.signaled_flag void signal_handler_int(int param) { signal_data.signaled_flag_int = 1; } void signal_handler_term(int param) { signal_data.signaled_flag_term = 1; } void signal_handler_usr1(int param) { signal_data.signaled_flag_usr1 = 1; } void signal_handler_usr2(int param) { signal_data.signaled_flag_usr2 = 1; } /** * @brief called regularly from io_service; checks signal_data.signal_flag * * this does NOT work if there is a change in system time -- boost asio's * deadline timers seem to operate on fixed time points on a non-monotonic * clock; * * We therefore use external programs to kill pingchecker processes if the * system clock changes; We use SIGHUP for this because it is not handled * here and thus the default signal handler (which just kills the process * without calling any destructors) avoids a freeze in here. SIGTERM would * not work any more after system time change! * * --> do not catch SIGHUP in any asio-related signal handling! */ void signal_checker( const boost::system::error_code &error ) { bool want_stop = false; if ( error ) { // there was an error in the timer if ( error == boost::asio::error::operation_aborted ) { GlobalLogger.error() << "Signal check timer was cancelled! Stopping io_service" << endl; want_stop = true; } else { GlobalLogger.error() << "Signal check timer handler received error code " << error << "! Stopping io_service" << endl; want_stop = true; } } else { if ( signal_data.signaled_flag_int ) { signal_data.signaled_flag_int = 0; GlobalLogger.notice() << "Received signal SIGINT --> will stop" << endl; want_stop = true; } else if ( signal_data.signaled_flag_term ) { signal_data.signaled_flag_term = 0; GlobalLogger.notice() << "Received signal SIGTERM --> will stop" << endl; want_stop = true; } else if ( signal_data.signaled_flag_usr1 ) { signal_data.signaled_flag_usr1 = 0; int new_log_level = I2n::Logger::get_log_level()+1; I2n::Logger::set_log_level( new_log_level ); GlobalLogger.info() << "Received SIGUSR1 -- increased log level to " << I2n::Logger::get_log_level_string(); } else if ( signal_data.signaled_flag_usr2 ) { signal_data.signaled_flag_usr2 = 0; I2n::Logger::set_log_level( signal_data.config_log_level ); GlobalLogger.info() << "Received SIGUSR2 -- reset log level to normal (" << I2n::Logger::get_log_level_string() << ")"; } } if ( want_stop ) { // interrupt infinite loop in main and asio event loop signal_data.stopped = true; signal_data.io_service->stop(); } else { // re-schedule timer signal_data.check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL ); signal_data.check_timer->async_wait( signal_checker ); } } /// register own signal handlers; see reset_signal_handlers for undo void install_signal_handlers( const IoServiceItem io_service, const int config_log_level ) { signal_data.signaled_flag_int = 0; signal_data.signaled_flag_term = 0; signal_data.signaled_flag_usr1 = 0; signal_data.signaled_flag_usr2 = 0; signal_data.config_log_level = config_log_level; // install own signal handlers signal_data.old_handler_int = signal(SIGINT, signal_handler_int); signal_data.old_handler_term = signal(SIGTERM, signal_handler_term); signal_data.old_handler_usr1 = signal(SIGUSR1, signal_handler_usr1); signal_data.old_handler_usr2 = signal(SIGUSR2, signal_handler_usr2); if ( signal_data.old_handler_int == SIG_ERR || signal_data.old_handler_term == SIG_ERR || signal_data.old_handler_usr1 == SIG_ERR || signal_data.old_handler_usr2 == SIG_ERR ) throw runtime_error( string("Failed to install signal handler: ") + string(strerror(errno)) ); // create a timer and a shared pointer to it, so it does not get out of scope TimerItem check_timer( new boost::asio::deadline_timer( *io_service ) ); // remember the io_service and the timer signal_data.io_service = io_service; signal_data.check_timer = check_timer; // set the timer check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL ); check_timer->async_wait( signal_checker ); GlobalLogger.debug() << "signal timer set" << endl; } /// reset handlers to the ones saved in install_signal_handlers void reset_signal_handlers() { void (*old_handler_int)(int) = 0; void (*old_handler_term)(int) = 0; void (*old_handler_usr1)(int) = 0; void (*old_handler_usr2)(int) = 0; if (signal_data.old_handler_int != 0 ) old_handler_int = signal(SIGINT , signal_data.old_handler_int); if (signal_data.old_handler_term != 0 ) old_handler_term = signal(SIGTERM, signal_data.old_handler_term); if (signal_data.old_handler_usr1 != 0 ) old_handler_usr1 = signal(SIGUSR1, signal_data.old_handler_usr1); if (signal_data.old_handler_usr2 != 0 ) old_handler_usr2 = signal(SIGUSR2, signal_data.old_handler_usr2); if ( old_handler_int == SIG_ERR || old_handler_term == SIG_ERR || old_handler_usr1 == SIG_ERR || old_handler_usr2 == SIG_ERR ) throw runtime_error( string("Failed to reset signal handler: ") + string(strerror(errno)) ); } int main( int argc, const char *argv[] ) { init_logger(); GlobalLogger.debug() << "logger initiated with default config"; PingSchedulerList scheduler_list; IoServiceItem io_service; int ret_code = 0; try { GetConfigReturnType success_and_config = get_configuration( argc, argv ); ConfigurationItem configuration = success_and_config.second; if ( configuration->get_print_version() ) // do this even if parsing of config failed { GlobalLogger.debug() << "Printing version info (" << VERSION_STRING << "." << VERSION_REVISION_STRING << " build " << __DATE__ << ") and exit" << endl; cout << PROJECT_NAME << " version " << VERSION_STRING << "." << VERSION_REVISION_STRING << " build " << __DATE__ << endl; return 0; } if ( ! success_and_config.first ) { GlobalLogger.error() << "Could not read/parse configuration!"; GlobalLogger.debug() << "Return 1 immediately" << endl; return 1; } GlobalLogger.debug() << "Start setup" << endl; int log_level = configuration->get_log_level(); I2n::Logger::set_log_level( log_level ); GlobalLogger.info() << "Set LogLevel to " << I2n::Logger::get_log_level_string() << endl; set_log_output( configuration ); GlobalLogger.notice() << "started pingcheck version " << VERSION_STRING << "." << VERSION_REVISION_STRING << " build " << __DATE__ << endl; bool daemon_mode = configuration->get_daemon(); if ( daemon_mode ) { I2n::Daemon::daemonize(); } LinkStatusItem status_notifier = get_status_notifier( configuration ); IoServiceItem io_service_temp( new boost::asio::io_service() ); io_service_temp.swap( io_service ); io_service_temp.reset(); // create Dns master boost::asio::ip::address name_server_ip = boost::asio::ip::address::from_string( configuration->get_nameserver() ); int max_recursion_count = 10; // could make a config var some time DnsMaster::create_master( io_service, name_server_ip, configuration->get_resolved_ip_ttl_threshold(), configuration->get_min_time_between_resolves(), configuration->get_max_address_resolution_attempts(), max_recursion_count, configuration->get_dns_cache_file() ); if ( !init_pingers(io_service, configuration, status_notifier, &scheduler_list) ) { GlobalLogger.error() << "Could not initialize pingers or no hosts " << "given to ping --> exit"; return 2; } install_signal_handlers( io_service, log_level ); start_pingers( scheduler_list ); } catch ( const std::exception &ex ) { GlobalLogger.error() << "Uncaught exception. " << ex.what() << endl; ret_code = 3; } catch (...) { GlobalLogger.error() << "Caught unknown exception!" << endl; ret_code = 4; } if ( ret_code == 0 ) { GlobalLogger.info() << "starting io_service main loop" << endl; // call boost::asio main event loop, catching exceptions try { io_service->run(); } catch ( const std::exception &ex ) { GlobalLogger.error() << "Caught exception, will continue. " << ex.what() << endl; } catch (...) { GlobalLogger.error() << "Caught unknown exception, will continue!" << endl; } } // clean up try { GlobalLogger.info() << "Cleaning up" << endl; stop_pingers( scheduler_list ); reset_signal_handlers(); } catch ( const std::exception &ex ) { GlobalLogger.error() << "Uncaught exception while cleaning up: " << ex.what() << endl; ret_code += 16; } catch (...) { GlobalLogger.error() << "Caught unknown exception while cleaning up!" << endl; ret_code += 32; } GlobalLogger.notice() << "Pingcheck done " << endl; return ret_code; }