2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 #include "host/pingscheduler.h"
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
28 #include <logfunc.hpp>
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
37 using boost::asio::io_service;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::posix_time::milliseconds;
44 using boost::shared_ptr;
45 using I2n::Logger::GlobalLogger;
48 //-----------------------------------------------------------------------------
50 //-----------------------------------------------------------------------------
53 * @brief Parameterized constructor.
55 * @param io_serv The one @c io_serv object that controls async processing
56 * @param network_interface The name of the network interface sending the pings.
57 * @param destination_address The remote address to ping.
58 * @param destination_port The remote port to ping.
59 * @param ping_protocol_list A list of protocols to use.
60 * @param ping_interval_in_sec Amount of time between each ping.
61 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
62 * @param ping_congestion_percentage_limit Amount of pings indication congested
64 * @param ping_congestion_duration_thresh Duration in seconds that indicates a
66 * @param ping_reply_timeout Max amount time to wait for ping to finish
67 * @param link_analyzer The object to monitor the link status.
68 * @param first_delay Delay in seconds from start_pinging to first ping attempt
69 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
71 PingScheduler::PingScheduler(
72 const IoServiceItem io_serv,
73 const string &network_interface,
74 const string &destination_address,
75 const uint16_t destination_port,
76 const PingProtocolList &ping_protocol_list,
77 const long ping_interval_in_sec,
78 const int ping_fail_percentage_limit,
79 const int ping_congestion_percentage_limit,
80 const int ping_congestion_duration_thresh,
81 const int ping_reply_timeout,
82 LinkStatusItem link_analyzer,
83 const int first_delay,
84 const int n_parallel_pings,
85 const int parallel_ping_delay,
86 const int ping_timeout_factor
89 NetworkInterfaceName( network_interface ),
90 DestinationAddress( destination_address ),
91 DestinationPort( destination_port ),
92 Protocols( ping_protocol_list ),
94 PingIntervalInSec( ping_interval_in_sec ),
95 NPingers( n_parallel_pings ),
96 FirstDelay( first_delay ),
97 NextPingTimer( *io_serv ),
98 TimeSentLastPing( microsec_clock::universal_time() ),
99 PingReplyTimeout( ping_reply_timeout ),
100 PingReplyTimeoutOrig( ping_reply_timeout ),
101 HostAnalyzer( destination_address, ping_fail_percentage_limit,
102 ping_congestion_percentage_limit,
103 ping_congestion_duration_thresh, n_parallel_pings,
108 ParallelPingDelay( parallel_ping_delay ),
109 DelayedPingTimer( *io_serv ),
112 ContinueOnOutdatedIps( false ),
113 PingTimeoutFactor( ping_timeout_factor )
115 BOOST_ASSERT( !network_interface.empty() );
116 BOOST_ASSERT( !destination_address.empty() );
117 BOOST_ASSERT( ( 0 < destination_port ) &&
118 ( destination_port < numeric_limits<uint16_t>::max() ) );
119 BOOST_ASSERT( 0 < ping_interval_in_sec );
120 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
121 ( ping_fail_percentage_limit <= 100) );
125 init_ping_protocol();
127 // start resolving already so we are prepared to ping
128 update_dns_resolver();
135 PingScheduler::~PingScheduler()
139 void PingScheduler::stop_pinging()
141 // stop pinger and resolver
142 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
144 cancel_resolve(true);
146 // now cancel the own timer in case that pinger cancelation called callback
147 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
148 NextPingTimer.cancel();
152 * @brief stop all pingers and remove them from Pingers variable which will
153 * proboably cause their destruction
155 * Pingers is empty afterwards
157 void PingScheduler::clear_pingers()
160 while(!Pingers.empty())
162 pinger = Pingers.back();
163 pinger->stop_pinging();
169 * @brief Start into infinite loop of calls to ping
171 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
172 * when io_service is started
174 void PingScheduler::start_pinging()
176 if ( FirstDelay > 0 )
177 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
178 << FirstDelay << "s";
180 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
182 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
183 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
184 boost::asio::placeholders::error ) );
189 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
191 void PingScheduler::ping(const boost::system::error_code &error)
194 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
195 if ( error == boost::asio::error::operation_aborted )
196 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
199 GlobalLogger.error() << LogPrefix << "Received error " << error
200 << " waiting for ping! Stopping";
204 // ping as soon as dns is ready
210 void PingScheduler::ping_when_ready()
214 GlobalLogger.info() << LogPrefix << "waiting for ping request "
215 << "(should take no more than " << PingIntervalInSec << "s)";
218 else if ( Resolver && Resolver->is_resolving() )
220 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
223 else if ( !Resolver )
225 // should not happen, but check anyway
226 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
230 GlobalLogger.info() << LogPrefix << "start ping";
233 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
234 // because a CNAME was out of date -- IPs may still be current)
235 HostAddress ip = Resolver->get_next_ip();
237 if ( !ip.is_valid() )
238 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
239 // or when ip went out of date between resolve and now
240 // --> try to use outdated IP
241 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
242 bool check_up_to_date = false;
243 ip = Resolver->get_next_ip(check_up_to_date);
245 if ( !ip.is_valid() )
246 { // Do not even have an outdated IP!
247 // This happens if have no cached IPs and resolve failed
248 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
249 << "-- treat like a failed ping.";
251 // skip the ping and directly call ping_done_handler the appropriate
253 for (int count=0; count<NPingers; ++count)
254 ping_done_handler(PingStatus_FailureNoIP, 0);
258 // create new pingers
259 for (int count=0; count<NPingers; ++count)
261 PingerFactory::createPinger(*ProtocolIter, IoService,
262 NetworkInterfaceName,
265 // remember when pinging started
266 TimeSentLastPing = microsec_clock::universal_time();
269 boost::asio::ip::address actual_ip = ip.get_ip();
270 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
271 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
272 delayed_ping(boost::system::error_code(), actual_ip, 0);
276 void PingScheduler::delayed_ping( const boost::system::error_code &error,
277 const boost::asio::ip::address ip,
278 const int pinger_index )
282 GlobalLogger.info() << LogPrefix << "delayed ping received an error: "
287 GlobalLogger.debug() << LogPrefix << "starting delayed ping index "
289 Pingers[pinger_index]->ping(ip,
291 boost::bind(&PingScheduler::ping_done_handler,
293 if (pinger_index >= NPingers-1)
294 GlobalLogger.debug() << LogPrefix << "started all delayed pings";
297 DelayedPingTimer.expires_from_now( milliseconds(ParallelPingDelay) );
298 DelayedPingTimer.async_wait( bind( &PingScheduler::delayed_ping,
299 this, boost::asio::placeholders::error, ip, pinger_index+1) );
304 //------------------------------------------------------------------------------
305 // Post Processing of Ping result and Preparation for next ping
306 //------------------------------------------------------------------------------
309 * @brief called when Ping::ping is done; calls functions to update
310 * statistics, ping interval and elapsed time;
311 * schedules a call to ping, thereby closing the loop
313 void PingScheduler::ping_done_handler( const PingStatus &result,
314 const long ping_duration_us )
316 PingStatus edited_result = result;
317 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
319 edited_result = PingStatus_SuccessOutdatedIP;
321 // reset ContinueOnOutdatedIps
322 ContinueOnOutdatedIps = false;
327 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
328 << NPingers << " done with result " << to_string(edited_result);
331 // can call update_ping_interval only after update_ping_statistics!
332 HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
334 // prepare next ping only after all pingers are done
335 if (NPingersDone == NPingers)
337 // stop and destruct all pingers
340 GlobalLogger.debug() << LogPrefix
341 << "--------------------------------------------------------------";
343 // update variables for next ping: number of pings, delay, protocol
344 update_ping_protocol();
345 update_ping_interval();
346 update_ping_number();
353 void PingScheduler::prepare_next_ping()
355 // start DNS resolve if necessary
356 update_dns_resolver();
360 // schedule next ping
361 int seconds_since_last_ping = (microsec_clock::universal_time()
362 - TimeSentLastPing).total_seconds();
363 if ( seconds_since_last_ping > PingIntervalInSec )
365 GlobalLogger.info() << "We are late for next ping!";
366 seconds_since_last_ping = PingIntervalInSec;
367 (void) NextPingTimer.expires_from_now( seconds(0) );
370 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
371 - seconds_since_last_ping ) );
372 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
373 boost::asio::placeholders::error ) );
376 void PingScheduler::update_ping_interval()
378 // have to ping more often?
379 if ( HostAnalyzer.exceeded_ping_failed_limit() )
381 PingIntervalInSec.speed_up();
383 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
384 << PingIntervalInSec << "s";
385 PingReplyTimeout = PingReplyTimeoutOrig * PingTimeoutFactor;
386 GlobalLogger.debug() << LogPrefix << "- Increase ping timeout to "
387 << PingReplyTimeout << "s";
391 PingIntervalInSec.back_to_original();
393 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
394 << "interval: " << PingIntervalInSec << "s";
395 PingReplyTimeout = PingReplyTimeoutOrig;
396 GlobalLogger.debug() << LogPrefix << "- Reset ping timeout to "
397 << PingReplyTimeout << "s";
401 /** in case of congested line, increase number of pings
403 * CAUTION! Only call this after clear_pingers !!!
405 void PingScheduler::update_ping_number()
407 // make sure we do not loose track of pingers here
408 if ( NPingersDone != NPingers || !Pingers.empty() )
410 GlobalLogger.warning() << LogPrefix << "Should only change number of "
411 << "pingers when all are finished and deleted! Have " << NPingers
412 << " pingers, " << NPingersDone << " of which are done and "
413 << Pingers.size() << " in listDone! Will not change NPingers.";
417 if ( HostAnalyzer.exceeded_ping_congestion_limit() )
421 GlobalLogger.notice() << LogPrefix << "Line appears congested!";
422 GlobalLogger.debug() << LogPrefix << "- Increasing ping number to: "
427 NPingers.back_to_original();
429 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
430 << "number: " << NPingers;
433 HostAnalyzer.set_n_parallel_pings(NPingers);
436 //------------------------------------------------------------------------------
437 // Ping Protocol Rotation
438 //------------------------------------------------------------------------------
440 void PingScheduler::init_ping_protocol()
442 ProtocolIter = Protocols.end();
443 get_next_ping_protocol();
446 void PingScheduler::update_ping_protocol()
448 if ( can_change_ping_protocol() )
450 get_next_ping_protocol();
454 void PingScheduler::get_next_ping_protocol()
457 if (ProtocolIter == Protocols.end())
458 ProtocolIter = Protocols.begin();
461 bool PingScheduler::can_change_ping_protocol() const
463 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
464 // implemented in a Algorithm class that can be exchanged in this class to
465 // provide an algorithm neutral class
469 //------------------------------------------------------------------------------
470 // DNS host name resolution
471 //------------------------------------------------------------------------------
473 // show "!" after host name if running on outdated IPs
474 void PingScheduler::update_log_prefix()
476 std::stringstream temp;
477 temp << "Sched(" << DestinationAddress;
478 if (ContinueOnOutdatedIps)
481 LogPrefix = temp.str();
484 void PingScheduler::update_dns_resolver()
486 if (Resolver && Resolver->is_resolving())
487 cancel_resolve(false);
489 if (ContinueOnOutdatedIps)
491 ContinueOnOutdatedIps = false;
495 // DNS master caches created resolvers and resolved IPs, so this will
496 // probably just return an existing resolver with already resolved IPs for
497 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
498 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
501 // get number of up-to-date IPs
502 // TODO should check here, if they will be up to date in PingIntervalInSec
503 bool check_up_to_date = true;
504 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
507 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
508 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
509 HostAnalyzer.set_resolved_ip_count( ip_count );
511 if (Resolver->is_resolving())
512 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
513 << "resolver seems to be resolving all the same... "
514 << "Start pinging anyway!";
519 GlobalLogger.info() << LogPrefix
520 << "No up-to-date IPs --> start resolve";
521 start_resolving_ping_address();
522 // set resolved_ip_count will be called in resolve callback
526 void PingScheduler::start_resolving_ping_address()
528 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
532 void PingScheduler::dns_resolve_callback(const bool was_success,
533 const int recursion_count)
535 GlobalLogger.info() << LogPrefix << "dns resolution finished "
536 << "with success = " << was_success << " "
537 << "after " << recursion_count << " recursions";
541 // trust that a successfull DNS resolve means we have an IP with TTL>0
542 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
545 GlobalLogger.warning() << LogPrefix
546 << "Should not have reached this case: resolve was "
547 << "successfull but still have no IPs (up-to-date="
548 << !ContinueOnOutdatedIps << ")!";
549 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
550 GlobalLogger.warning() << LogPrefix << "This probably happened "
551 << "because you specified a TTL threshold > 0 but resolving"
552 << " had no effect on TTLs since external cache is only "
553 << "updated when TTL=0 is reached.";
557 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
558 << ip_count << " (IPs may be outdated="
559 << ContinueOnOutdatedIps << ") --> could ping now";
561 HostAnalyzer.set_resolved_ip_count( ip_count );
565 { // host name resolution failed; try again bypassing first outdated CNAME
566 // or using cached IP
567 std::string skip_host = Resolver->get_skip_cname();
569 if (skip_host.empty())
570 { // try to continue with cached IPs
571 int ip_count = Resolver->get_resolved_ip_count(false);
574 GlobalLogger.notice() << LogPrefix << "DNS failed "
575 << "and have no cached IPs either --> cannot ping";
576 // ping_when_ready will deal with this case
579 ContinueOnOutdatedIps = true;
582 GlobalLogger.notice() << LogPrefix << "DNS failed, "
583 << "try anyway with cached data";
586 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
587 << ip_count << " (IPs may be outdated=" << true << ")";
588 HostAnalyzer.set_resolved_ip_count( ip_count );
593 { // have CNAME to continue
594 ContinueOnOutdatedIps = true;
596 GlobalLogger.notice() << LogPrefix << "DNS failed, "
597 << "try again skipping a CNAME and resolving "
598 << skip_host << " directly";
600 cancel_resolve(false);
602 // now create new resolver
603 Resolver = DnsMaster::get_instance()
604 ->get_resolver_for(skip_host, *ProtocolIter);
605 start_resolving_ping_address();
611 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
613 * Resolvers have a life on their own: they are cached by DnsMaster so never go
614 * out of scope and even after calling callbacks, there might still be a
615 * longterm timer active to re-try resolving.
616 * We want to cancel that long-term timer only if the Resolver is not for our
617 * real, original DestinationAddress but a CNAME, which can happen when trying
618 * to skip cnames and working on out-dated IPs
620 void PingScheduler::cancel_resolve(const bool force_cancel)
624 GlobalLogger.info() << "Cancelling resolver (forced)";
625 Resolver->cancel_resolve();
627 else if ( Resolver->get_hostname() == DestinationAddress )
628 GlobalLogger.info() << LogPrefix
629 << "Leave original resolver active in background";
632 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
633 << Resolver->get_hostname() << " since is not the original "
634 << DestinationAddress;
635 Resolver->cancel_resolve();