Update pingcheck to work with cmake 3.28
[pingcheck] / src / main.cpp
... / ...
CommitLineData
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
20#include <signal.h>
21#include <stdint.h>
22
23#include <vector>
24#include <iostream>
25
26#include <boost/asio.hpp>
27#include <boost/foreach.hpp>
28#include <boost/shared_ptr.hpp>
29#include <boost/math/special_functions/round.hpp>
30#include <boost/numeric/conversion/cast.hpp>
31#include <boost/date_time/posix_time/posix_time_types.hpp>
32#include <boost/random/linear_congruential.hpp>
33#include <boost/random/uniform_real.hpp>
34#include <boost/random/variate_generator.hpp>
35
36#include <daemonfunc.hpp>
37#include <logfunc.hpp>
38
39#include "boost_assert_handler.h"
40#include "config/configurationreader.h"
41#include "config/host.h"
42#include "link/linkstatus.h"
43#include "host/loglevel.h"
44#include "host/pingprotocol.h"
45#include "host/pingscheduler.h"
46#include "icmp/icmppinger.h" // contains IcmpPacketDistributor
47#include "dns/dnsmaster.h"
48
49
50using namespace std;
51using boost::shared_ptr;
52using boost::posix_time::time_duration;
53using I2n::Logger::GlobalLogger;
54
55// a map from interval (in seconds) to delay (in seconds)
56typedef std::pair<int, float> IntervalCountPair;
57typedef std::map<int, float> DelayMap;
58typedef shared_ptr<boost::asio::deadline_timer> TimerItem;
59
60const boost::posix_time::time_duration SIGNAL_CHECK_INTERVAL = boost::posix_time::seconds(1);
61
62//-----------------------------------------------------------------------------
63// Declarations
64//-----------------------------------------------------------------------------
65
66typedef std::pair<bool, ConfigurationItem> GetConfigReturnType;
67GetConfigReturnType get_configuration(int, const char**);
68LinkStatusItem get_status_notifier(const ConfigurationItem&);
69void init_logger();
70void set_log_output(const ConfigurationItem &);
71DelayMap calc_pinger_delays(const HostList &hosts);
72bool init_pingers(const IoServiceItem, const ConfigurationItem&,
73 const LinkStatusItem&, PingSchedulerList*);
74void start_pingers(const PingSchedulerList&);
75void stop_pingers(const PingSchedulerList&);
76
77void signal_handler_int(int param);
78void signal_handler_term(int param);
79void signal_handler_usr1(int param);
80void signal_handler_usr2(int param);
81void signal_checker( const boost::system::error_code &error );
82void install_signal_handlers( const IoServiceItem io_service, const int config_log_level );
83void reset_signal_handlers();
84
85// data required for signal handling (SIGINT, SIGTERM, ... )
86struct signal_data_struct
87{
88 volatile sig_atomic_t signaled_flag_int;
89 volatile sig_atomic_t signaled_flag_term;
90 volatile sig_atomic_t signaled_flag_usr1;
91 volatile sig_atomic_t signaled_flag_usr2;
92 IoServiceItem io_service;
93 void (*old_handler_int )(int);
94 void (*old_handler_term)(int);
95 void (*old_handler_usr1)(int);
96 void (*old_handler_usr2)(int);
97 bool stopped;
98 TimerItem check_timer;
99 int config_log_level;
100
101 signal_data_struct():
102 signaled_flag_int( 0 ),
103 signaled_flag_term( 0 ),
104 signaled_flag_usr1( 0 ),
105 signaled_flag_usr2( 0 ),
106 io_service(),
107 old_handler_int( 0 ),
108 old_handler_term( 0 ),
109 old_handler_usr1( 0 ),
110 old_handler_usr2( 0 ),
111 stopped( false ),
112 check_timer(),
113 config_log_level( I2n::Logger::LogLevel::Notice )
114 { }
115
116};
117//-----------------------------------------------------------------------------
118// Definitions
119//-----------------------------------------------------------------------------
120
121GetConfigReturnType get_configuration(
122 int argc,
123 const char *argv[]
124)
125{
126 ConfigurationReader config_reader;
127 bool parsed_success = config_reader.parse( argc, argv );
128 Configuration config_obj = config_reader.get_configuration();
129
130 ConfigurationItem configuration( new Configuration( config_obj ) );
131 GetConfigReturnType return_val( parsed_success, configuration );
132 return return_val;
133}
134
135LinkStatusItem get_status_notifier(
136 const ConfigurationItem &configuration
137)
138{
139 int hosts_down_limit = configuration->get_hosts_down_limit();
140 int link_up_interval_in_sec = configuration->get_link_up_interval_in_sec();
141 int link_down_interval_in_sec = configuration->get_link_down_interval_in_sec();
142 string status_notifier_cmd = configuration->get_status_notifier_cmd();
143 LinkStatusItem link_analyzer(
144 new LinkStatus(
145 hosts_down_limit,
146 link_up_interval_in_sec,
147 link_down_interval_in_sec,
148 status_notifier_cmd
149 )
150 );
151
152 return link_analyzer;
153}
154
155void init_logger()
156{
157 // set default: log at level NOTICE to syslog and stderr
158 // to ensure that in case of faulty config, the error is noticed
159 I2n::Logger::enable_syslog( I2n::Logger::Facility::User );
160 I2n::Logger::enable_stderr_log( true );
161 I2n::Logger::set_log_level( I2n::Logger::LogLevel::Notice );
162}
163
164void set_log_output(
165 const ConfigurationItem &configuration
166)
167{
168 LogOutput log_output = configuration->get_log_output();
169 string log_file_name = configuration->get_log_file();
170 switch (log_output)
171 {
172 case LogOutput_UNDEFINED:
173 GlobalLogger.warning() << "Unknown output target -- use syslog";
174 case LogOutput_SYSLOG:
175 GlobalLogger.info() << "Setting log output target to syslog" << endl;
176 I2n::Logger::enable_syslog(true);
177 I2n::Logger::enable_stderr_log(false);
178 I2n::Logger::enable_log_file(false);
179 GlobalLogger.info() << "Set log output target to syslog" << endl;
180 break;
181 case LogOutput_TERMINAL:
182 GlobalLogger.info() << "Setting log output target to terminal" << endl;
183 I2n::Logger::enable_syslog(false);
184 I2n::Logger::enable_stderr_log(true);
185 I2n::Logger::enable_log_file(false);
186 GlobalLogger.info() << "Set log output target to terminal" << endl;
187 GlobalLogger.info() << "(check syslog for earlier messages)" << endl;
188 break;
189 case LogOutput_FILE:
190 GlobalLogger.info() << "Setting log output target to file "
191 << log_file_name << endl;
192 I2n::Logger::enable_syslog(false);
193 I2n::Logger::enable_stderr_log(false);
194 I2n::Logger::enable_log_file(log_file_name);
195 GlobalLogger.info() << "Set log output target to file "
196 << log_file_name << endl;
197 GlobalLogger.info() << "(check syslog for earlier messages)" << endl;
198 break;
199 default:
200 GlobalLogger.error() << "Unknown log output target!" << endl;
201 break;
202 }
203}
204
205/**
206 * @brief calculate delay between pingers to evenly distribute them in time
207 *
208 * If there are many pingers with same interval, will get bursts of pings
209 * and none in-between. This function calculates delays for large numbers
210 * of hosts with same ping intervals, to distribute them as evenly as
211 * possible, right from the start (might diverge over time, anyway).
212 *
213 * If interval is chosen at random, will have many pingers with different
214 * intervals; to those will assign a random delay in [0, interval].
215 *
216 * Called by init_pingers with
217 * @param hosts list of hosts as obtained from configuration
218 * @returns a map from ping interval to delay between pingers of that interval
219 */
220DelayMap calc_pinger_delays(const HostList &hosts)
221{
222 // first step: count number of hosts with same intervals
223 DelayMap delay_shifts;
224 int curr_interval;
225 BOOST_FOREACH( const HostItem &host, hosts )
226 {
227 curr_interval = host->get_interval_in_sec();
228 if (! curr_interval)
229 delay_shifts[curr_interval] = 1.0f;
230 else
231 delay_shifts[curr_interval] += 1.0f;
232 }
233
234 // create random number generator
235 typedef boost::rand48 rand_gen_type;
236 typedef boost::uniform_real<float> rand_dist_type;
237 typedef boost::variate_generator< rand_gen_type&,
238 rand_dist_type > rand_var_type;
239 rand_gen_type random_number_generator(
240 boost::numeric_cast<unsigned int>(time(0)) );
241
242 // second step: divide intervals by counts, round to int
243 // --> for 18 pingers with a 30s interval, get 30s/18 = 1.66667
244 // for random intervals, use random delays within intervals
245 BOOST_FOREACH( IntervalCountPair interval_and_count, delay_shifts )
246 {
247 if ( abs(interval_and_count.second - 1.0f) < 0.0001 ) // == 1.0f
248 { // there is exactly 1 pinger with exactly that interval
249 // --> assign a random delay within interval
250 rand_dist_type random_distribution(0.0f, interval_and_count.first);
251 rand_var_type random_variate(random_number_generator,
252 random_distribution);
253 delay_shifts[interval_and_count.first] = random_variate();
254 }
255 else
256 { // there are several pingers with same interval
257 // --> distribute evenly
258 delay_shifts[interval_and_count.first] =
259 boost::numeric_cast<float>(interval_and_count.first) /
260 std::max(1.0f, interval_and_count.second); //max is paranoid
261 }
262 }
263
264 return delay_shifts;
265}
266
267bool init_pingers(
268 const IoServiceItem io_service,
269 const ConfigurationItem &configuration,
270 const LinkStatusItem &status_notifier,
271 PingSchedulerList *scheduler_list
272)
273{
274 string default_network_interface = configuration->get_source_network_interface();
275 int ping_fail_limit = configuration->get_ping_fail_limit();
276 int ping_reply_timeout = configuration->get_ping_reply_timeout();
277
278 // remove some hosts at random
279 configuration->randomize_hosts();
280
281 // calculate delays between pingers of same interval
282 DelayMap delay_shifts = calc_pinger_delays(configuration->get_hosts());
283
284 // setup memory for assigned delays; init with delay > 0
285 DelayMap delays;
286 BOOST_FOREACH( IntervalCountPair interval_and_delay, delay_shifts )
287 delays[interval_and_delay.first] = 0.0f;
288
289 HostList hosts = configuration->get_hosts();
290
291 if (hosts.empty())
292 return false;
293
294 // more variables for pingcheck, maybe should move to config?
295 int n_parallel_pings = 1;
296 int parallel_ping_delay = 100; // ms
297 int congestion_duration_thresh = 10; // seconds
298 int congestion_percentage_thresh = 75;
299 int congest_caused_by_fail_limit_percentage = 99;
300 int ping_timeout_factor = 5;
301
302 BOOST_FOREACH( const HostItem &host, hosts )
303 {
304 string destination_address = host->get_address();
305 uint16_t destination_port = host->get_port();
306 string host_network_interface = host->get_source_network_interface();
307 string network_interface = ( host_network_interface == "default" ) ?
308 default_network_interface :
309 host_network_interface;
310 PingProtocolList protocol_list = host->get_ping_protocol_list();
311 int ping_interval_in_sec = host->get_interval_in_sec();
312
313 // get delay for this scheduler and update assigned delays
314 delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec];
315 int current_delay = boost::math::iround(delays[ping_interval_in_sec]);
316 GlobalLogger.debug() << "assigning delay of " << current_delay
317 << "s to pinger with interval " << ping_interval_in_sec << "s";
318
319 PingSchedulerItem scheduler(
320 new PingScheduler(
321 io_service,
322 network_interface,
323 destination_address,
324 destination_port,
325 protocol_list,
326 ping_interval_in_sec,
327 ping_fail_limit,
328 congestion_percentage_thresh,
329 congest_caused_by_fail_limit_percentage,
330 congestion_duration_thresh,
331 ping_reply_timeout,
332 status_notifier,
333 current_delay,
334 n_parallel_pings,
335 parallel_ping_delay,
336 ping_timeout_factor
337 )
338 );
339 scheduler_list->push_back( scheduler );
340 }
341
342 return true;
343}
344
345void start_pingers(
346 const PingSchedulerList &scheduler_list
347)
348{
349 // start each ping scheduler
350 BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list )
351 scheduler->start_pinging();
352}
353
354void stop_pingers(
355 const PingSchedulerList &scheduler_list
356)
357{
358 // Stop each ping scheduler
359 GlobalLogger.info() << "Telling all pingers to stop";
360 BOOST_FOREACH( const PingSchedulerItem &scheduler, scheduler_list )
361 {
362 scheduler->stop_pinging();
363 }
364
365 IcmpPacketDistributor::clean_up_all();
366}
367
368
369// the one instance of signal_data_struct
370signal_data_struct signal_data;
371
372
373/// registered as signal handler; just sets signal_data.signaled_flag
374void signal_handler_int(int param)
375{
376 signal_data.signaled_flag_int = 1;
377}
378void signal_handler_term(int param)
379{
380 signal_data.signaled_flag_term = 1;
381}
382void signal_handler_usr1(int param)
383{
384 signal_data.signaled_flag_usr1 = 1;
385}
386void signal_handler_usr2(int param)
387{
388 signal_data.signaled_flag_usr2 = 1;
389}
390
391
392/**
393 * @brief called regularly from io_service; checks signal_data.signal_flag
394 *
395 * this does NOT work if there is a change in system time -- boost asio's
396 * deadline timers seem to operate on fixed time points on a non-monotonic
397 * clock;
398 *
399 * We therefore use external programs to kill pingchecker processes if the
400 * system clock changes; We use SIGHUP for this because it is not handled
401 * here and thus the default signal handler (which just kills the process
402 * without calling any destructors) avoids a freeze in here. SIGTERM would
403 * not work any more after system time change!
404 *
405 * --> do not catch SIGHUP in any asio-related signal handling!
406 */
407void signal_checker( const boost::system::error_code &error )
408{
409 bool want_stop = false;
410
411 if ( error )
412 { // there was an error in the timer
413 if ( error == boost::asio::error::operation_aborted )
414 {
415 GlobalLogger.error() << "Signal check timer was cancelled! Stopping io_service" << endl;
416 want_stop = true;
417 }
418 else
419 {
420 GlobalLogger.error() << "Signal check timer handler received error code " << error
421 << "! Stopping io_service" << endl;
422 want_stop = true;
423 }
424 }
425 else {
426 if ( signal_data.signaled_flag_int )
427 {
428 signal_data.signaled_flag_int = 0;
429 GlobalLogger.notice() << "Received signal SIGINT --> will stop" << endl;
430 want_stop = true;
431 }
432 else if ( signal_data.signaled_flag_term )
433 {
434 signal_data.signaled_flag_term = 0;
435 GlobalLogger.notice() << "Received signal SIGTERM --> will stop" << endl;
436 want_stop = true;
437 }
438 else if ( signal_data.signaled_flag_usr1 )
439 {
440 signal_data.signaled_flag_usr1 = 0;
441 int new_log_level = I2n::Logger::get_log_level()+1;
442 I2n::Logger::set_log_level( new_log_level );
443 GlobalLogger.info() << "Received SIGUSR1 -- increased log level to "
444 << I2n::Logger::get_log_level_string();
445 }
446 else if ( signal_data.signaled_flag_usr2 )
447 {
448 signal_data.signaled_flag_usr2 = 0;
449 I2n::Logger::set_log_level( signal_data.config_log_level );
450 GlobalLogger.info() << "Received SIGUSR2 -- reset log level to normal ("
451 << I2n::Logger::get_log_level_string() << ")";
452 }
453 }
454
455 if ( want_stop )
456 { // interrupt infinite loop in main and asio event loop
457 signal_data.stopped = true;
458 signal_data.io_service->stop();
459 }
460 else
461 { // re-schedule timer
462 signal_data.check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL );
463 signal_data.check_timer->async_wait( signal_checker );
464 }
465}
466
467/// register own signal handlers; see reset_signal_handlers for undo
468void install_signal_handlers( const IoServiceItem io_service, const int config_log_level )
469{
470 signal_data.signaled_flag_int = 0;
471 signal_data.signaled_flag_term = 0;
472 signal_data.signaled_flag_usr1 = 0;
473 signal_data.signaled_flag_usr2 = 0;
474 signal_data.config_log_level = config_log_level;
475
476 // install own signal handlers
477 signal_data.old_handler_int = signal(SIGINT, signal_handler_int);
478 signal_data.old_handler_term = signal(SIGTERM, signal_handler_term);
479 signal_data.old_handler_usr1 = signal(SIGUSR1, signal_handler_usr1);
480 signal_data.old_handler_usr2 = signal(SIGUSR2, signal_handler_usr2);
481 if ( signal_data.old_handler_int == SIG_ERR ||
482 signal_data.old_handler_term == SIG_ERR ||
483 signal_data.old_handler_usr1 == SIG_ERR ||
484 signal_data.old_handler_usr2 == SIG_ERR )
485 throw runtime_error( string("Failed to install signal handler: ") + string(strerror(errno)) );
486
487 // create a timer and a shared pointer to it, so it does not get out of scope
488 TimerItem check_timer( new boost::asio::deadline_timer( *io_service ) );
489
490 // remember the io_service and the timer
491 signal_data.io_service = io_service;
492 signal_data.check_timer = check_timer;
493
494 // set the timer
495 check_timer->expires_from_now( SIGNAL_CHECK_INTERVAL );
496 check_timer->async_wait( signal_checker );
497 GlobalLogger.debug() << "signal timer set" << endl;
498}
499
500/// reset handlers to the ones saved in install_signal_handlers
501void reset_signal_handlers()
502{
503 void (*old_handler_int)(int) = 0;
504 void (*old_handler_term)(int) = 0;
505 void (*old_handler_usr1)(int) = 0;
506 void (*old_handler_usr2)(int) = 0;
507 if (signal_data.old_handler_int != 0 )
508 old_handler_int = signal(SIGINT , signal_data.old_handler_int);
509 if (signal_data.old_handler_term != 0 )
510 old_handler_term = signal(SIGTERM, signal_data.old_handler_term);
511 if (signal_data.old_handler_usr1 != 0 )
512 old_handler_usr1 = signal(SIGUSR1, signal_data.old_handler_usr1);
513 if (signal_data.old_handler_usr2 != 0 )
514 old_handler_usr2 = signal(SIGUSR2, signal_data.old_handler_usr2);
515
516 if ( old_handler_int == SIG_ERR ||
517 old_handler_term == SIG_ERR ||
518 old_handler_usr1 == SIG_ERR ||
519 old_handler_usr2 == SIG_ERR )
520 throw runtime_error( string("Failed to reset signal handler: ") + string(strerror(errno)) );
521}
522
523
524int main( int argc, const char *argv[] )
525{
526 init_logger();
527 GlobalLogger.debug() << "logger initiated with default config";
528
529 PingSchedulerList scheduler_list;
530 IoServiceItem io_service;
531 int ret_code = 0;
532
533 try
534 {
535 GetConfigReturnType success_and_config = get_configuration( argc, argv );
536 ConfigurationItem configuration = success_and_config.second;
537
538 if ( configuration->get_print_version() ) // do this even if parsing of config failed
539 {
540 GlobalLogger.debug() << "Printing version info ("
541 << VERSION_STRING << "." << VERSION_REVISION_STRING
542 << " build " << __DATE__
543 << ") and exit" << endl;
544 cout << PROJECT_NAME << " version "
545 << VERSION_STRING << "." << VERSION_REVISION_STRING
546 << " build " << __DATE__
547 << endl;
548 return 0;
549 }
550
551 if ( ! success_and_config.first )
552 {
553 GlobalLogger.error() << "Could not read/parse configuration!";
554 GlobalLogger.debug() << "Return 1 immediately" << endl;
555 return 1;
556 }
557 GlobalLogger.debug() << "Start setup" << endl;
558
559 int log_level = configuration->get_log_level();
560 I2n::Logger::set_log_level( log_level );
561 GlobalLogger.info() << "Set LogLevel to " << I2n::Logger::get_log_level_string() << endl;
562
563 set_log_output( configuration );
564 GlobalLogger.notice() << "started pingcheck version "
565 << VERSION_STRING << "." << VERSION_REVISION_STRING
566 << " build " << __DATE__
567 << endl;
568
569 bool daemon_mode = configuration->get_daemon();
570 if ( daemon_mode )
571 {
572 I2n::Daemon::daemonize();
573 }
574
575 LinkStatusItem status_notifier = get_status_notifier( configuration );
576
577 IoServiceItem io_service_temp( new boost::asio::io_service() );
578 io_service_temp.swap( io_service );
579 io_service_temp.reset();
580
581 // create Dns master
582 boost::asio::ip::address name_server_ip =
583 boost::asio::ip::address::from_string(
584 configuration->get_nameserver() );
585 int max_recursion_count = 10; // could make a config var some time
586 DnsMaster::create_master(
587 io_service,
588 name_server_ip,
589 configuration->get_resolved_ip_ttl_threshold(),
590 configuration->get_min_time_between_resolves(),
591 configuration->get_max_address_resolution_attempts(),
592 max_recursion_count,
593 configuration->get_dns_cache_file() );
594
595 if ( !init_pingers(io_service, configuration, status_notifier,
596 &scheduler_list) )
597 {
598 GlobalLogger.error() << "Could not initialize pingers or no hosts "
599 << "given to ping --> exit";
600 return 2;
601 }
602
603 install_signal_handlers( io_service, log_level );
604
605 start_pingers( scheduler_list );
606 }
607 catch ( const std::exception &ex )
608 {
609 GlobalLogger.error() << "Uncaught exception. " << ex.what() << endl;
610 ret_code = 3;
611 }
612 catch (...) {
613 GlobalLogger.error() << "Caught unknown exception!" << endl;
614 ret_code = 4;
615 }
616
617 if ( ret_code == 0 )
618 {
619 GlobalLogger.info() << "starting io_service main loop" << endl;
620
621 // call boost::asio main event loop, catching exceptions
622 try
623 {
624 io_service->run();
625 }
626 catch ( const std::exception &ex )
627 {
628 GlobalLogger.error() << "Caught exception, will continue. " << ex.what() << endl;
629 }
630 catch (...) {
631 GlobalLogger.error() << "Caught unknown exception, will continue!" << endl;
632 }
633 }
634
635 // clean up
636 try
637 {
638 GlobalLogger.info() << "Cleaning up" << endl;
639 stop_pingers( scheduler_list );
640 reset_signal_handlers();
641 }
642 catch ( const std::exception &ex )
643 {
644 GlobalLogger.error() << "Uncaught exception while cleaning up: " << ex.what() << endl;
645 ret_code += 16;
646 }
647 catch (...) {
648 GlobalLogger.error() << "Caught unknown exception while cleaning up!" << endl;
649 ret_code += 32;
650 }
651
652 GlobalLogger.notice() << "Pingcheck done " << endl;
653 return ret_code;
654}