2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 #include "host/pingscheduler.h"
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
28 #include <logfunc.hpp>
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
37 using boost::asio::io_service;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::posix_time::milliseconds;
44 using boost::shared_ptr;
45 using I2n::Logger::GlobalLogger;
47 //-----------------------------------------------------------------------------
49 //-----------------------------------------------------------------------------
52 * @brief Parameterized constructor.
54 * @param io_serv The one @c io_serv object that controls async processing
55 * @param network_interface The name of the network interface sending the pings.
56 * @param destination_address The remote address to ping.
57 * @param destination_port The remote port to ping.
58 * @param ping_protocol_list A list of protocols to use.
59 * @param ping_interval_in_sec Amount of time between each ping.
60 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
61 * @param ping_congestion_percentage_limit Amount of pings indication congested
63 * @param ping_congestion_duration_thresh Duration in seconds that indicates a
65 * @param ping_reply_timeout Max amount time to wait for ping to finish
66 * @param link_analyzer The object to monitor the link status.
67 * @param first_delay Delay in seconds from start_pinging to first ping attempt
68 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
70 PingScheduler::PingScheduler(
71 const IoServiceItem io_serv,
72 const string &network_interface,
73 const string &destination_address,
74 const uint16_t destination_port,
75 const PingProtocolList &ping_protocol_list,
76 const long ping_interval_in_sec,
77 const int ping_fail_percentage_limit,
78 const int ping_congestion_percentage_limit,
79 const int ping_congestion_duration_thresh,
80 const int ping_reply_timeout,
81 LinkStatusItem link_analyzer,
82 const int first_delay,
83 const int n_parallel_pings,
84 const int parallel_ping_delay
87 NetworkInterfaceName( network_interface ),
88 DestinationAddress( destination_address ),
89 DestinationPort( destination_port ),
90 Protocols( ping_protocol_list ),
92 PingIntervalInSec( ping_interval_in_sec ),
93 NPingers( n_parallel_pings ),
94 FirstDelay( first_delay ),
95 NextPingTimer( *io_serv ),
96 TimeSentLastPing( microsec_clock::universal_time() ),
97 PingReplyTimeout( ping_reply_timeout ),
98 HostAnalyzer( destination_address, ping_fail_percentage_limit,
99 ping_congestion_percentage_limit,
100 ping_congestion_duration_thresh, n_parallel_pings,
105 ParallelPingDelay( parallel_ping_delay ),
106 DelayedPingTimer( *io_serv ),
109 ContinueOnOutdatedIps( false )
111 BOOST_ASSERT( !network_interface.empty() );
112 BOOST_ASSERT( !destination_address.empty() );
113 BOOST_ASSERT( ( 0 < destination_port ) &&
114 ( destination_port < numeric_limits<uint16_t>::max() ) );
115 BOOST_ASSERT( 0 < ping_interval_in_sec );
116 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
117 ( ping_fail_percentage_limit <= 100) );
121 init_ping_protocol();
123 // start resolving already so we are prepared to ping
124 update_dns_resolver();
131 PingScheduler::~PingScheduler()
135 void PingScheduler::stop_pinging()
137 // stop pinger and resolver
138 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
140 cancel_resolve(true);
142 // now cancel the own timer in case that pinger cancelation called callback
143 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
144 NextPingTimer.cancel();
148 * @brief stop all pingers and remove them from Pingers variable which will
149 * proboably cause their destruction
151 * Pingers is empty afterwards
153 void PingScheduler::clear_pingers()
156 while(!Pingers.empty())
158 pinger = Pingers.back();
159 pinger->stop_pinging();
165 * @brief Start into infinite loop of calls to ping
167 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
168 * when io_service is started
170 void PingScheduler::start_pinging()
172 if ( FirstDelay > 0 )
173 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
174 << FirstDelay << "s";
176 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
178 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
179 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
180 boost::asio::placeholders::error ) );
185 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
187 void PingScheduler::ping(const boost::system::error_code &error)
190 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
191 if ( error == boost::asio::error::operation_aborted )
192 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
195 GlobalLogger.error() << LogPrefix << "Received error " << error
196 << " waiting for ping! Stopping";
200 // ping as soon as dns is ready
206 void PingScheduler::ping_when_ready()
210 GlobalLogger.info() << LogPrefix << "waiting for ping request "
211 << "(should take no more than " << PingIntervalInSec << "s)";
214 else if ( Resolver && Resolver->is_resolving() )
216 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
219 else if ( !Resolver )
221 // should not happen, but check anyway
222 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
226 GlobalLogger.info() << LogPrefix << "start ping";
229 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
230 // because a CNAME was out of date -- IPs may still be current)
231 HostAddress ip = Resolver->get_next_ip();
233 if ( !ip.is_valid() )
234 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
235 // or when ip went out of date between resolve and now
236 // --> try to use outdated IP
237 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
238 bool check_up_to_date = false;
239 ip = Resolver->get_next_ip(check_up_to_date);
241 if ( !ip.is_valid() )
242 { // Do not even have an outdated IP!
243 // This happens if have no cached IPs and resolve failed
244 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
245 << "-- treat like a failed ping.";
247 // skip the ping and directly call ping_done_handler
248 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
249 // ping would create failed assumption (nPings > nIPs)
250 ping_done_handler(PingStatus_FailureNoIP, 0);
251 HostAnalyzer.set_resolved_ip_count(0); // set back
255 // create new pingers
256 for (int count=0; count<NPingers; ++count)
258 PingerFactory::createPinger(*ProtocolIter, IoService,
259 NetworkInterfaceName,
262 // remember when pinging started
263 TimeSentLastPing = microsec_clock::universal_time();
266 boost::asio::ip::address actual_ip = ip.get_ip();
267 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
268 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
270 delayed_ping(boost::system::error_code(), actual_ip, 0);
274 void PingScheduler::delayed_ping( const boost::system::error_code &error,
275 const boost::asio::ip::address ip,
276 const int pinger_index )
280 GlobalLogger.info() << LogPrefix << "delayed ping received an error: "
285 GlobalLogger.debug() << LogPrefix << "starting delayed ping index "
287 Pingers[pinger_index]->ping(ip,
289 boost::bind(&PingScheduler::ping_done_handler,
291 if (pinger_index >= NPingers-1)
292 GlobalLogger.debug() << LogPrefix << "started all delayed pings";
295 DelayedPingTimer.expires_from_now( milliseconds(ParallelPingDelay) );
296 DelayedPingTimer.async_wait( bind( &PingScheduler::delayed_ping,
297 this, boost::asio::placeholders::error, ip, pinger_index+1) );
302 //------------------------------------------------------------------------------
303 // Post Processing of Ping result and Preparation for next ping
304 //------------------------------------------------------------------------------
307 * @brief called when Ping::ping is done; calls functions to update
308 * statistics, ping interval and elapsed time;
309 * schedules a call to ping, thereby closing the loop
311 void PingScheduler::ping_done_handler( const PingStatus &result,
312 const long ping_duration_us )
314 PingStatus edited_result = result;
315 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
317 edited_result = PingStatus_SuccessOutdatedIP;
319 // reset ContinueOnOutdatedIps
320 ContinueOnOutdatedIps = false;
325 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
326 << NPingers << " done with result " << to_string(edited_result);
329 // can call update_ping_interval only after update_ping_statistics!
330 HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
332 // prepare next ping only after all pingers are done
333 if (NPingersDone == NPingers)
335 // stop and destruct all pingers
338 GlobalLogger.debug() << LogPrefix
339 << "--------------------------------------------------------------";
341 // update variables for next ping: number of pings, delay, protocol
342 update_ping_protocol();
343 update_ping_interval();
344 update_ping_number();
351 void PingScheduler::prepare_next_ping()
353 // start DNS resolve if necessary
354 update_dns_resolver();
356 // schedule next ping
357 int seconds_since_last_ping = (microsec_clock::universal_time()
358 - TimeSentLastPing).total_seconds();
359 if ( seconds_since_last_ping > PingIntervalInSec )
361 GlobalLogger.info() << "We are late for next ping!";
362 seconds_since_last_ping = PingIntervalInSec;
363 (void) NextPingTimer.expires_from_now( seconds(0) );
366 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
367 - seconds_since_last_ping ) );
368 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
369 boost::asio::placeholders::error ) );
372 void PingScheduler::update_ping_interval()
374 // have to ping more often?
375 if ( HostAnalyzer.exceeded_ping_failed_limit() )
377 PingIntervalInSec.speed_up();
379 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
380 << PingIntervalInSec << "s";
384 PingIntervalInSec.back_to_original();
386 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
387 << "interval: " << PingIntervalInSec << "s";
391 /** in case of congested line, increase number of pings
393 * CAUTION! Only call this after clear_pingers !!!
395 void PingScheduler::update_ping_number()
397 // make sure we do not loose track of pingers here
398 if ( NPingersDone != NPingers || !Pingers.empty() )
400 GlobalLogger.warning() << LogPrefix << "Should only change number of "
401 << "pingers when all are finished and deleted! Have " << NPingers
402 << " pingers, " << NPingersDone << " of which are done and "
403 << Pingers.size() << " in listDone! Will not change NPingers.";
407 if ( HostAnalyzer.exceeded_ping_congestion_limit() )
411 GlobalLogger.debug() << LogPrefix << "- Increasing ping number to: "
416 NPingers.back_to_original();
418 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
419 << "number: " << NPingers;
422 HostAnalyzer.set_n_parallel_pings(NPingers);
425 //------------------------------------------------------------------------------
426 // Ping Protocol Rotation
427 //------------------------------------------------------------------------------
429 void PingScheduler::init_ping_protocol()
431 ProtocolIter = Protocols.end();
432 get_next_ping_protocol();
435 void PingScheduler::update_ping_protocol()
437 if ( can_change_ping_protocol() )
439 get_next_ping_protocol();
443 void PingScheduler::get_next_ping_protocol()
446 if (ProtocolIter == Protocols.end())
447 ProtocolIter = Protocols.begin();
450 bool PingScheduler::can_change_ping_protocol() const
452 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
453 // implemented in a Algorithm class that can be exchanged in this class to
454 // provide an algorithm neutral class
458 //------------------------------------------------------------------------------
459 // DNS host name resolution
460 //------------------------------------------------------------------------------
462 // show "!" after host name if running on outdated IPs
463 void PingScheduler::update_log_prefix()
465 std::stringstream temp;
466 temp << "Sched(" << DestinationAddress;
467 if (ContinueOnOutdatedIps)
470 LogPrefix = temp.str();
473 void PingScheduler::update_dns_resolver()
475 if (Resolver && Resolver->is_resolving())
476 cancel_resolve(false);
478 if (ContinueOnOutdatedIps)
480 ContinueOnOutdatedIps = false;
484 // DNS master caches created resolvers and resolved IPs, so this will
485 // probably just return an existing resolver with already resolved IPs for
486 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
487 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
490 // get number of up-to-date IPs
491 // TODO should check here, if they will be up to date in PingIntervalInSec
492 bool check_up_to_date = true;
493 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
496 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
497 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
498 HostAnalyzer.set_resolved_ip_count( ip_count );
500 if (Resolver->is_resolving())
501 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
502 << "resolver seems to be resolving all the same... "
503 << "Start pinging anyway!";
508 GlobalLogger.info() << LogPrefix
509 << "No up-to-date IPs --> start resolve";
510 start_resolving_ping_address();
511 // set resolved_ip_count will be called in resolve callback
515 void PingScheduler::start_resolving_ping_address()
517 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
521 void PingScheduler::dns_resolve_callback(const bool was_success,
522 const int recursion_count)
524 GlobalLogger.info() << LogPrefix << "dns resolution finished "
525 << "with success = " << was_success << " "
526 << "after " << recursion_count << " recursions";
530 // trust that a successfull DNS resolve means we have an IP with TTL>0
531 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
533 { // this will create trouble in HostAnalyzer
534 GlobalLogger.warning() << LogPrefix
535 << "Should not have reached this case: resolve was "
536 << "successfull but still have no IPs (up-to-date="
537 << !ContinueOnOutdatedIps << ")!";
538 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
539 GlobalLogger.warning() << LogPrefix << "This probably happened "
540 << "because you specified a TTL threshold > 0 but resolving"
541 << " had no effect on TTLs since external cache is only "
542 << "updated when TTL=0 is reached.";
546 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
547 << ip_count << " (IPs may be outdated="
548 << ContinueOnOutdatedIps << ") --> could ping now";
549 HostAnalyzer.set_resolved_ip_count( ip_count );
554 { // host name resolution failed; try again bypassing first outdated CNAME
555 // or using cached IP
556 std::string skip_host = Resolver->get_skip_cname();
558 if (skip_host.empty())
559 { // try to continue with cached IPs
560 int ip_count = Resolver->get_resolved_ip_count(false);
563 GlobalLogger.notice() << LogPrefix << "DNS failed "
564 << "and have no cached IPs either --> cannot ping";
565 // ping_when_ready will deal with this case
568 ContinueOnOutdatedIps = true;
571 GlobalLogger.notice() << LogPrefix << "DNS failed, "
572 << "try anyway with cached data";
575 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
576 << ip_count << " (IPs may be outdated=" << true << ")";
577 HostAnalyzer.set_resolved_ip_count( ip_count );
582 { // have CNAME to continue
583 ContinueOnOutdatedIps = true;
585 GlobalLogger.notice() << LogPrefix << "DNS failed, "
586 << "try again skipping a CNAME and resolving "
587 << skip_host << " directly";
589 cancel_resolve(false);
591 // now create new resolver
592 Resolver = DnsMaster::get_instance()
593 ->get_resolver_for(skip_host, *ProtocolIter);
594 start_resolving_ping_address();
600 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
602 * Resolvers have a life on their own: they are cached by DnsMaster so never go
603 * out of scope and even after calling callbacks, there might still be a
604 * longterm timer active to re-try resolving.
605 * We want to cancel that long-term timer only if the Resolver is not for our
606 * real, original DestinationAddress but a CNAME, which can happen when trying
607 * to skip cnames and working on out-dated IPs
609 void PingScheduler::cancel_resolve(const bool force_cancel)
613 GlobalLogger.info() << "Cancelling resolver (forced)";
614 Resolver->cancel_resolve();
616 else if ( Resolver->get_hostname() == DestinationAddress )
617 GlobalLogger.info() << LogPrefix
618 << "Leave original resolver active in background";
621 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
622 << Resolver->get_hostname() << " since is not the original "
623 << DestinationAddress;
624 Resolver->cancel_resolve();