2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
26 #include <boost/asio.hpp>
27 #include <boost/foreach.hpp>
28 #include <boost/shared_ptr.hpp>
29 #include <boost/math/special_functions/round.hpp>
30 #include <boost/numeric/conversion/cast.hpp>
31 #include <boost/date_time/posix_time/posix_time_types.hpp>
32 #include <boost/random/linear_congruential.hpp>
33 #include <boost/random/uniform_real.hpp>
34 #include <boost/random/variate_generator.hpp>
36 #include <daemonfunc.hpp>
37 #include <logfunc.hpp>
39 #include "boost_assert_handler.h"
40 #include "config/configurationreader.h"
41 #include "config/host.h"
42 #include "link/linkstatus.h"
43 #include "host/loglevel.h"
44 #include "host/pingprotocol.h"
45 #include "host/pingscheduler.h"
46 #include "icmp/icmppinger.h" // contains IcmpPacketDistributor
47 #include "dns/dnsmaster.h"
51 using boost::shared_ptr;
52 using boost::posix_time::time_duration;
53 using I2n::Logger::GlobalLogger;
55 // a map from interval (in seconds) to delay (in seconds)
56 typedef std::pair<int, float> IntervalCountPair;
57 typedef std::map<int, float> DelayMap;
58 typedef shared_ptr<boost::asio::deadline_timer> TimerItem;
60 const boost::posix_time::time_duration SIGNAL_CHECK_INTERVAL = boost::posix_time::seconds(1);
62 //-----------------------------------------------------------------------------
64 //-----------------------------------------------------------------------------
66 typedef std::pair<bool, ConfigurationItem> GetConfigReturnType;
67 GetConfigReturnType get_configuration(int, const char**);
68 LinkStatusItem get_status_notifier(const ConfigurationItem&);
70 void set_log_output(const ConfigurationItem &);
71 DelayMap calc_pinger_delays(const HostList &hosts);
72 bool init_pingers(const IoServiceItem, const ConfigurationItem&,
73 const LinkStatusItem&, PingSchedulerList*);
74 void start_pingers(const PingSchedulerList&);
75 void stop_pingers(const PingSchedulerList&);
77 void signal_handler_int(int param);
78 void signal_handler_term(int param);
79 void signal_handler_usr1(int param);
80 void signal_handler_usr2(int param);
81 void signal_checker( const boost::system::error_code &error );
82 void install_signal_handlers( const IoServiceItem io_service, const int config_log_level );
83 void reset_signal_handlers();
85 // data required for signal handling (SIGINT, SIGTERM, ... )
86 struct signal_data_struct
88 volatile sig_atomic_t signaled_flag_int;
89 volatile sig_atomic_t signaled_flag_term;
90 volatile sig_atomic_t signaled_flag_usr1;
91 volatile sig_atomic_t signaled_flag_usr2;
92 IoServiceItem io_service;
93 void (*old_handler_int )(int);
94 void (*old_handler_term)(int);
95 void (*old_handler_usr1)(int);
96 void (*old_handler_usr2)(int);
98 TimerItem check_timer;
101 signal_data_struct():
102 signaled_flag_int( 0 ),
103 signaled_flag_term( 0 ),
104 signaled_flag_usr1( 0 ),
105 signaled_flag_usr2( 0 ),
107 old_handler_int( 0 ),
108 old_handler_term( 0 ),
109 old_handler_usr1( 0 ),
110 old_handler_usr2( 0 ),
113 config_log_level( I2n::Logger::LogLevel::Notice )
117 //-----------------------------------------------------------------------------
119 //-----------------------------------------------------------------------------
121 GetConfigReturnType get_configuration(
126 ConfigurationReader config_reader;
127 bool parsed_success = config_reader.parse( argc, argv );
128 Configuration config_obj = config_reader.get_configuration();
130 ConfigurationItem configuration( new Configuration( config_obj ) );
131 GetConfigReturnType return_val( parsed_success, configuration );
135 LinkStatusItem get_status_notifier(
136 const ConfigurationItem &configuration
139 int hosts_down_limit = configuration->get_hosts_down_limit();
140 int link_up_interval_in_sec = configuration->get_link_up_interval_in_sec();
141 int link_down_interval_in_sec = configuration->get_link_down_interval_in_sec();
142 string status_notifier_cmd = configuration->get_status_notifier_cmd();
143 LinkStatusItem link_analyzer(
146 link_up_interval_in_sec,
147 link_down_interval_in_sec,
152 return link_analyzer;
157 // set default: log at level NOTICE to syslog and stderr
158 // to ensure that in case of faulty config, the error is noticed
159 I2n::Logger::enable_syslog( I2n::Logger::Facility::User );
160 I2n::Logger::enable_stderr_log( true );
161 I2n::Logger::set_log_level( I2n::Logger::LogLevel::Notice );
165 const ConfigurationItem &configuration
168 LogOutput log_output = configuration->get_log_output();
169 string log_file_name = configuration->get_log_file();
172 case LogOutput_UNDEFINED:
173 GlobalLogger.warning() << "Unknown output target -- use syslog";
174 case LogOutput_SYSLOG:
175 GlobalLogger.info() << "Setting log output target to syslog" << endl;
176 I2n::Logger::enable_syslog(true);
177 I2n::Logger::enable_stderr_log(false);
178 I2n::Logger::enable_log_file(false);
179 GlobalLogger.info() << "Set log output target to syslog" << endl;
181 case LogOutput_TERMINAL:
182 GlobalLogger.info() << "Setting log output target to terminal" << endl;
183 I2n::Logger::enable_syslog(false);
184 I2n::Logger::enable_stderr_log(true);
185 I2n::Logger::enable_log_file(false);
186 GlobalLogger.info() << "Set log output target to terminal" << endl;
187 GlobalLogger.info() << "(check syslog for earlier messages)" << endl;
190 GlobalLogger.info() << "Setting log output target to file "
191 << log_file_name << endl;
192 I2n::Logger::enable_syslog(false);
193 I2n::Logger::enable_stderr_log(false);
194 I2n::Logger::enable_log_file(log_file_name);
195 GlobalLogger.info() << "Set log output target to file "
196 << log_file_name << endl;
197 GlobalLogger.info() << "(check syslog for earlier messages)" << endl;
200 GlobalLogger.error() << "Unknown log output target!" << endl;
206 * @brief calculate delay between pingers to evenly distribute them in time
208 * If there are many pingers with same interval, will get bursts of pings
209 * and none in-between. This function calculates delays for large numbers
210 * of hosts with same ping intervals, to distribute them as evenly as
211 * possible, right from the start (might diverge over time, anyway).
213 * If interval is chosen at random, will have many pingers with different
214 * intervals; to those will assign a random delay in [0, interval].
216 * Called by init_pingers with
217 * @param hosts list of hosts as obtained from configuration
218 * @returns a map from ping interval to delay between pingers of that interval
220 DelayMap calc_pinger_delays(const HostList &hosts)
222 // first step: count number of hosts with same intervals
223 DelayMap delay_shifts;
225 BOOST_FOREACH( const HostItem &host, hosts )
227 curr_interval = host->get_interval_in_sec();
229 delay_shifts[curr_interval] = 1.0f;
231 delay_shifts[curr_interval] += 1.0f;
234 // create random number generator
235 typedef boost::rand48 rand_gen_type;
236 typedef boost::uniform_real<float> rand_dist_type;
237 typedef boost::variate_generator< rand_gen_type&,
238 rand_dist_type > rand_var_type;
239 rand_gen_type random_number_generator(
240 boost::numeric_cast<unsigned int>(time(0)) );
242 // second step: divide intervals by counts, round to int
243 // --> for 18 pingers with a 30s interval, get 30s/18 = 1.66667
244 // for random intervals, use random delays within intervals
245 BOOST_FOREACH( IntervalCountPair interval_and_count, delay_shifts )
247 if ( abs(interval_and_count.second - 1.0f) < 0.0001 ) // == 1.0f
248 { // there is exactly 1 pinger with exactly that interval
249 // --> assign a random delay within interval
250 rand_dist_type random_distribution(0.0f, interval_and_count.first);
251 rand_var_type random_variate(random_number_generator,
252 random_distribution);
253 delay_shifts[interval_and_count.first] = random_variate();
256 { // there are several pingers with same interval
257 // --> distribute evenly
258 delay_shifts[interval_and_count.first] =
259 boost::numeric_cast<float>(interval_and_count.first) /
260 std::max(1.0f, interval_and_count.second); //max is paranoid
268 const IoServiceItem io_service,
269 const ConfigurationItem &configuration,
270 const LinkStatusItem &status_notifier,
271 PingSchedulerList *scheduler_list
274 string default_network_interface = configuration->get_source_network_interface();
275 int ping_fail_limit = configuration->get_ping_fail_limit();
276 int ping_reply_timeout = configuration->get_ping_reply_timeout();
278 // remove some hosts at random
279 configuration->randomize_hosts();
281 // calculate delays between pingers of same interval
282 DelayMap delay_shifts = calc_pinger_delays(configuration->get_hosts());
284 // setup memory for assigned delays; init with delay > 0
286 BOOST_FOREACH( IntervalCountPair interval_and_delay, delay_shifts )
287 delays[interval_and_delay.first] = 0.0f;
289 HostList hosts = configuration->get_hosts();
294 // more variables for pingcheck, maybe should move to config?
295 int n_parallel_pings = 1;
296 int parallel_ping_delay = 100; // ms
297 int congestion_duration_thresh = 10; // seconds
298 int congestion_percentage_thresh = 75;
299 int congest_caused_by_fail_limit_percentage = 99;
300 int ping_timeout_factor = 5;
302 BOOST_FOREACH( const HostItem &host, hosts )
304 string destination_address = host->get_address();
305 uint16_t destination_port = host->get_port();
306 string host_network_interface = host->get_source_network_interface();
307 string network_interface = ( host_network_interface == "default" ) ?
308 default_network_interface :
309 host_network_interface;
310 PingProtocolList protocol_list = host->get_ping_protocol_list();
311 int ping_interval_in_sec = host->get_interval_in_sec();
313 // get delay for this scheduler and update assigned delays
314 delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec];
315 int current_delay = boost::math::iround(delays[ping_interval_in_sec]);
316 GlobalLogger.debug() << "assigning delay of " << current_delay
317 << "s to pinger with interval " << ping_interval_in_sec << "s";
319 PingSchedulerItem scheduler(
326 ping_interval_in_sec,
328 congestion_percentage_thresh,
329 congest_caused_by_fail_limit_percentage,
330 congestion_duration_thresh,
339 scheduler_list->push_back( scheduler );
346 const PingSchedulerList &scheduler_list
349 // start each ping scheduler
350 BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list )
351 scheduler->start_pinging();
355 const PingSchedulerList &scheduler_list
358 // Stop each ping scheduler
359 GlobalLogger.info() << "Telling all pingers to stop";
360 BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list )
362 scheduler->stop_pinging();
365 IcmpPacketDistributor::clean_up_all();
369 // the one instance of signal_data_struct
370 signal_data_struct signal_data;
373 /// registered as signal handler; just sets signal_data.signaled_flag
374 void signal_handler_int(int param)
376 signal_data.signaled_flag_int = 1;
378 void signal_handler_term(int param)
380 signal_data.signaled_flag_term = 1;
382 void signal_handler_usr1(int param)
384 signal_data.signaled_flag_usr1 = 1;
386 void signal_handler_usr2(int param)
388 signal_data.signaled_flag_usr2 = 1;
392 /// called regularly from io_service; checks signal_data.signal_flag
393 void signal_checker( const boost::system::error_code &error )
395 bool want_stop = false;
398 { // there was an error in the timer
399 if ( error == boost::asio::error::operation_aborted )
401 GlobalLogger.error() << "Signal check timer was cancelled! Stopping io_service" << endl;
406 GlobalLogger.error() << "Signal check timer handler received error code " << error
407 << "! Stopping io_service" << endl;
412 if ( signal_data.signaled_flag_int )
414 signal_data.signaled_flag_int = 0;
415 GlobalLogger.notice() << "Received signal SIGINT --> will stop" << endl;
418 else if ( signal_data.signaled_flag_term )
420 signal_data.signaled_flag_term = 0;
421 GlobalLogger.notice() << "Received signal SIGTERM --> will stop" << endl;
424 else if ( signal_data.signaled_flag_usr1 )
426 signal_data.signaled_flag_usr1 = 0;
427 int new_log_level = I2n::Logger::get_log_level()+1;
428 I2n::Logger::set_log_level( new_log_level );
429 GlobalLogger.info() << "Received SIGUSR1 -- increased log level to "
430 << I2n::Logger::get_log_level_string();
432 else if ( signal_data.signaled_flag_usr2 )
434 signal_data.signaled_flag_usr2 = 0;
435 I2n::Logger::set_log_level( signal_data.config_log_level );
436 GlobalLogger.info() << "Received SIGUSR2 -- reset log level to normal ("
437 << I2n::Logger::get_log_level_string() << ")";
442 { // interrupt infinite loop in main and asio event loop
443 signal_data.stopped = true;
444 signal_data.io_service->stop();
447 { // re-schedule timer
448 signal_data.check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL );
449 signal_data.check_timer->async_wait( signal_checker );
453 /// register own signal handlers; see reset_signal_handlers for undo
454 void install_signal_handlers( const IoServiceItem io_service, const int config_log_level )
456 signal_data.signaled_flag_int = 0;
457 signal_data.signaled_flag_term = 0;
458 signal_data.signaled_flag_usr1 = 0;
459 signal_data.signaled_flag_usr2 = 0;
460 signal_data.config_log_level = config_log_level;
462 // install own signal handlers
463 signal_data.old_handler_int = signal(SIGINT, signal_handler_int);
464 signal_data.old_handler_term = signal(SIGTERM, signal_handler_term);
465 signal_data.old_handler_usr1 = signal(SIGUSR1, signal_handler_usr1);
466 signal_data.old_handler_usr2 = signal(SIGUSR2, signal_handler_usr2);
467 if ( signal_data.old_handler_int == SIG_ERR ||
468 signal_data.old_handler_term == SIG_ERR ||
469 signal_data.old_handler_usr1 == SIG_ERR ||
470 signal_data.old_handler_usr2 == SIG_ERR )
471 throw runtime_error( string("Failed to install signal handler: ") + string(strerror(errno)) );
473 // create a timer and a shared pointer to it, so it does not get out of scope
474 TimerItem check_timer( new boost::asio::deadline_timer( *io_service ) );
476 // remember the io_service and the timer
477 signal_data.io_service = io_service;
478 signal_data.check_timer = check_timer;
481 check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL );
482 check_timer->async_wait( signal_checker );
483 GlobalLogger.debug() << "signal timer set" << endl;
486 /// reset handlers to the ones saved in install_signal_handlers
487 void reset_signal_handlers()
489 void (*old_handler_int)(int) = 0;
490 void (*old_handler_term)(int) = 0;
491 void (*old_handler_usr1)(int) = 0;
492 void (*old_handler_usr2)(int) = 0;
493 if (signal_data.old_handler_int != 0 )
494 old_handler_int = signal(SIGINT , signal_data.old_handler_int);
495 if (signal_data.old_handler_term != 0 )
496 old_handler_term = signal(SIGTERM, signal_data.old_handler_term);
497 if (signal_data.old_handler_usr1 != 0 )
498 old_handler_usr1 = signal(SIGUSR1, signal_data.old_handler_usr1);
499 if (signal_data.old_handler_usr2 != 0 )
500 old_handler_usr2 = signal(SIGUSR2, signal_data.old_handler_usr2);
502 if ( old_handler_int == SIG_ERR ||
503 old_handler_term == SIG_ERR ||
504 old_handler_usr1 == SIG_ERR ||
505 old_handler_usr2 == SIG_ERR )
506 throw runtime_error( string("Failed to reset signal handler: ") + string(strerror(errno)) );
510 int main( int argc, const char *argv[] )
513 GlobalLogger.debug() << "logger initiated with default config";
515 PingSchedulerList scheduler_list;
516 IoServiceItem io_service;
521 GetConfigReturnType success_and_config = get_configuration( argc, argv );
522 ConfigurationItem configuration = success_and_config.second;
524 if ( configuration->get_print_version() ) // do this even if parsing of config failed
526 GlobalLogger.debug() << "Printing version info ("
527 << VERSION_STRING << "." << VERSION_REVISION_STRING
528 << " build " << __DATE__
529 << ") and exit" << endl;
530 cout << PROJECT_NAME << " version "
531 << VERSION_STRING << "." << VERSION_REVISION_STRING
532 << " build " << __DATE__
537 if ( ! success_and_config.first )
539 GlobalLogger.error() << "Could not read/parse configuration!";
540 GlobalLogger.debug() << "Return 1 immediately" << endl;
543 GlobalLogger.debug() << "Start setup" << endl;
545 int log_level = configuration->get_log_level();
546 I2n::Logger::set_log_level( log_level );
547 GlobalLogger.info() << "Set LogLevel to " << I2n::Logger::get_log_level_string() << endl;
549 set_log_output( configuration );
550 GlobalLogger.notice() << "started pingcheck version "
551 << VERSION_STRING << "." << VERSION_REVISION_STRING
552 << " build " << __DATE__
555 bool daemon_mode = configuration->get_daemon();
558 I2n::Daemon::daemonize();
561 LinkStatusItem status_notifier = get_status_notifier( configuration );
563 IoServiceItem io_service_temp( new boost::asio::io_service() );
564 io_service_temp.swap( io_service );
565 io_service_temp.reset();
568 boost::asio::ip::address name_server_ip =
569 boost::asio::ip::address::from_string(
570 configuration->get_nameserver() );
571 int max_recursion_count = 10; // could make a config var some time
572 DnsMaster::create_master(
575 configuration->get_resolved_ip_ttl_threshold(),
576 configuration->get_min_time_between_resolves(),
577 configuration->get_max_address_resolution_attempts(),
579 configuration->get_dns_cache_file() );
581 if ( !init_pingers(io_service, configuration, status_notifier,
584 GlobalLogger.error() << "Could not initialize pingers or no hosts "
585 << "given to ping --> exit";
589 install_signal_handlers( io_service, log_level );
591 start_pingers( scheduler_list );
593 catch ( const std::exception &ex )
595 GlobalLogger.error() << "Uncaught exception. " << ex.what() << endl;
599 GlobalLogger.error() << "Caught unknown exception!" << endl;
605 GlobalLogger.info() << "starting io_service main loop" << endl;
607 // call boost::asio main event loop, catching exceptions
612 catch ( const std::exception &ex )
614 GlobalLogger.error() << "Caught exception, will continue. " << ex.what() << endl;
617 GlobalLogger.error() << "Caught unknown exception, will continue!" << endl;
624 GlobalLogger.info() << "Cleaning up" << endl;
625 stop_pingers( scheduler_list );
626 reset_signal_handlers();
628 catch ( const std::exception &ex )
630 GlobalLogger.error() << "Uncaught exception while cleaning up: " << ex.what() << endl;
634 GlobalLogger.error() << "Caught unknown exception while cleaning up!" << endl;
638 GlobalLogger.notice() << "Pingcheck done " << endl;