2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 #include "host/pingscheduler.h"
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
28 #include <logfunc.hpp>
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
37 using boost::asio::io_service;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::posix_time::milliseconds;
44 using boost::shared_ptr;
45 using I2n::Logger::GlobalLogger;
47 //-----------------------------------------------------------------------------
49 //-----------------------------------------------------------------------------
52 * @brief Parameterized constructor.
54 * @param io_serv The one @c io_serv object that controls async processing
55 * @param network_interface The name of the network interface sending the pings.
56 * @param destination_address The remote address to ping.
57 * @param destination_port The remote port to ping.
58 * @param ping_protocol_list A list of protocols to use.
59 * @param ping_interval_in_sec Amount of time between each ping.
60 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
61 * @param ping_congestion_percentage_limit Amount of pings indication congested
63 * @param ping_congestion_duration_thresh Duration in seconds that indicates a
65 * @param ping_reply_timeout Max amount time to wait for ping to finish
66 * @param link_analyzer The object to monitor the link status.
67 * @param first_delay Delay in seconds from start_pinging to first ping attempt
68 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
70 PingScheduler::PingScheduler(
71 const IoServiceItem io_serv,
72 const string &network_interface,
73 const string &destination_address,
74 const uint16_t destination_port,
75 const PingProtocolList &ping_protocol_list,
76 const long ping_interval_in_sec,
77 const int ping_fail_percentage_limit,
78 const int ping_congestion_percentage_limit,
79 const int ping_congestion_duration_thresh,
80 const int ping_reply_timeout,
81 LinkStatusItem link_analyzer,
82 const int first_delay,
83 const int n_parallel_pings,
84 const int parallel_ping_delay
87 NetworkInterfaceName( network_interface ),
88 DestinationAddress( destination_address ),
89 DestinationPort( destination_port ),
90 Protocols( ping_protocol_list ),
92 PingIntervalInSec( ping_interval_in_sec ),
93 FirstDelay( first_delay ),
94 NextPingTimer( *io_serv ),
95 TimeSentLastPing( microsec_clock::universal_time() ),
96 PingReplyTimeout( ping_reply_timeout ),
97 HostAnalyzer( destination_address, ping_fail_percentage_limit,
98 ping_congestion_percentage_limit,
99 ping_congestion_duration_thresh, n_parallel_pings,
103 NPingers( n_parallel_pings ),
105 ParallelPingDelay( parallel_ping_delay ),
106 DelayedPingTimer( *io_serv ),
109 ContinueOnOutdatedIps( false )
111 BOOST_ASSERT( !network_interface.empty() );
112 BOOST_ASSERT( !destination_address.empty() );
113 BOOST_ASSERT( ( 0 < destination_port ) &&
114 ( destination_port < numeric_limits<uint16_t>::max() ) );
115 BOOST_ASSERT( 0 < ping_interval_in_sec );
116 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
117 ( ping_fail_percentage_limit <= 100) );
121 init_ping_protocol();
127 PingScheduler::~PingScheduler()
131 void PingScheduler::stop_pinging()
133 // stop pinger and resolver
134 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
136 cancel_resolve(true);
138 // now cancel the own timer in case that pinger cancelation called callback
139 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
140 NextPingTimer.cancel();
144 * @brief stop all pingers and remove them from Pingers variable which will
145 * proboably cause their destruction
147 * Pingers is empty afterwards
149 void PingScheduler::clear_pingers()
152 while(!Pingers.empty())
154 pinger = Pingers.back();
155 pinger->stop_pinging();
161 * @brief Start into infinite loop of calls to ping
163 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
164 * when io_service is started
166 void PingScheduler::start_pinging()
168 if ( FirstDelay > 0 )
169 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
170 << FirstDelay << "s";
172 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
174 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
175 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
176 boost::asio::placeholders::error ) );
181 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
183 void PingScheduler::ping(const boost::system::error_code &error)
186 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
187 if ( error == boost::asio::error::operation_aborted )
188 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
191 GlobalLogger.error() << LogPrefix << "Received error " << error
192 << " waiting for ping! Stopping";
196 // ping as soon as dns is ready
202 void PingScheduler::ping_when_ready()
206 GlobalLogger.info() << LogPrefix << "waiting for ping request "
207 << "(should take no more than " << PingIntervalInSec << "s)";
210 else if ( Resolver && Resolver->is_resolving() )
212 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
215 else if ( !Resolver )
216 // should not happen, but check anyway
217 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
219 GlobalLogger.info() << LogPrefix << "start ping";
222 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
223 // because a CNAME was out of date -- IPs may still be current)
224 HostAddress ip = Resolver->get_next_ip();
226 if ( !ip.is_valid() )
227 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
228 // or when ip went out of date between resolve and now
229 // --> try to use outdated IP
230 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
231 bool check_up_to_date = false;
232 ip = Resolver->get_next_ip(check_up_to_date);
234 if ( !ip.is_valid() )
235 { // Do not even have an outdated IP!
236 // This happens if have no cached IPs and resolve failed
237 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
238 << "-- treat like a failed ping.";
240 // skip the ping and directly call ping_done_handler
241 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
242 // ping would create failed assumption (nPings > nIPs)
243 ping_done_handler(PingStatus_FailureNoIP, 0);
244 HostAnalyzer.set_resolved_ip_count(0); // set back
248 boost::asio::ip::address actual_ip = ip.get_ip();
249 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
250 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
251 delayed_ping(boost::system::error_code(), actual_ip, 0);
253 TimeSentLastPing = microsec_clock::universal_time();
257 void PingScheduler::delayed_ping( const boost::system::error_code &error,
258 const boost::asio::ip::address &ip,
259 const int pinger_index )
263 GlobalLogger.info() << LogPrefix << "delayed ping received an error: "
267 if (pinger_index == NPingers)
269 GlobalLogger.debug() << LogPrefix << "started all delayed pings";
273 GlobalLogger.debug() << LogPrefix << "starting delayed ping index "
275 Pingers[pinger_index]->ping(ip,
277 boost::bind(&PingScheduler::ping_done_handler,
279 DelayedPingTimer.expires_from_now( milliseconds(ParallelPingDelay) );
280 DelayedPingTimer.async_wait( bind( &PingScheduler::delayed_ping,
281 this, boost::asio::placeholders::error, ip, pinger_index+1) );
285 //------------------------------------------------------------------------------
286 // Post Processing of Ping result and Preparation for next ping
287 //------------------------------------------------------------------------------
290 * @brief called when Ping::ping is done; calls functions to update
291 * statistics, ping interval and elapsed time;
292 * schedules a call to ping, thereby closing the loop
294 void PingScheduler::ping_done_handler( const PingStatus &result,
295 const long ping_duration_us )
297 PingStatus edited_result = result;
298 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
300 edited_result = PingStatus_SuccessOutdatedIP;
302 // reset ContinueOnOutdatedIps
303 ContinueOnOutdatedIps = false;
308 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
309 << NPingers << " done with result " << to_string(edited_result);
312 // can call update_ping_interval only after update_ping_statistics!
313 HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
315 // prepare next ping only after all pingers are done
316 if (NPingersDone == NPingers)
321 void PingScheduler::prepare_next_ping()
323 update_ping_interval();
325 // get next protocol, possibly start resolving IPs
326 update_ping_protocol();
328 // schedule next ping
329 int seconds_since_last_ping = (microsec_clock::universal_time()
330 - TimeSentLastPing).total_seconds();
331 if ( seconds_since_last_ping > PingIntervalInSec )
333 GlobalLogger.info() << "We are late for next ping!";
334 seconds_since_last_ping = PingIntervalInSec;
335 (void) NextPingTimer.expires_from_now( seconds(0) );
338 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
339 - seconds_since_last_ping ) );
340 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
341 boost::asio::placeholders::error ) );
344 void PingScheduler::update_ping_interval()
346 // have to ping more often?
347 if ( HostAnalyzer.exceeded_ping_failed_limit() )
349 PingIntervalInSec.speed_up();
351 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
352 << PingIntervalInSec << "s";
356 PingIntervalInSec.back_to_original();
358 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
359 << "interval: " << PingIntervalInSec << "s";
363 //------------------------------------------------------------------------------
364 // Ping Protocol Rotation
365 //------------------------------------------------------------------------------
367 void PingScheduler::init_ping_protocol()
369 ProtocolIter = Protocols.end();
370 get_next_ping_protocol();
373 void PingScheduler::update_ping_protocol()
375 if ( can_change_ping_protocol() )
377 get_next_ping_protocol();
381 void PingScheduler::get_next_ping_protocol()
383 // stop and destruct all pingers
385 GlobalLogger.debug() << LogPrefix
386 << "------------------------------------------------------------------";
390 if (ProtocolIter == Protocols.end())
391 ProtocolIter = Protocols.begin();
392 PingProtocol ping_protocol = *ProtocolIter;
393 // --> ProtocolIter still points to currently used protocol which is
394 // required in dns_resolve_callback
396 // create new pingers
397 for (int count=0; count<NPingers; ++count)
398 Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
399 NetworkInterfaceName, PingReplyTimeout) );
401 update_dns_resolver( ping_protocol );
404 bool PingScheduler::can_change_ping_protocol() const
406 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
407 // implemented in a Algorithm class that can be exchanged in this class to
408 // provide an algorithm neutral class
412 //------------------------------------------------------------------------------
413 // DNS host name resolution
414 //------------------------------------------------------------------------------
416 // show "!" after host name if running on outdated IPs
417 void PingScheduler::update_log_prefix()
419 std::stringstream temp;
420 temp << "Sched(" << DestinationAddress;
421 if (ContinueOnOutdatedIps)
424 LogPrefix = temp.str();
427 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
429 if (Resolver && Resolver->is_resolving())
430 cancel_resolve(false);
432 if (ContinueOnOutdatedIps)
434 ContinueOnOutdatedIps = false;
438 // DNS master caches created resolvers and resolved IPs, so this will
439 // probably just return an existing resolver with already resolved IPs for
440 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
441 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
444 // get number of up-to-date IPs
445 // TODO should check here, if they will be up to date in PingIntervalInSec
446 bool check_up_to_date = true;
447 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
450 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
451 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
452 HostAnalyzer.set_resolved_ip_count( ip_count );
454 if (Resolver->is_resolving())
455 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
456 << "resolver seems to be resolving all the same... "
457 << "Start pinging anyway!";
462 GlobalLogger.info() << LogPrefix
463 << "No up-to-date IPs --> start resolve";
464 start_resolving_ping_address();
465 // set resolved_ip_count will be called in resolve callback
469 void PingScheduler::start_resolving_ping_address()
471 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
475 void PingScheduler::dns_resolve_callback(const bool was_success,
476 const int recursion_count)
478 GlobalLogger.info() << LogPrefix << "dns resolution finished "
479 << "with success = " << was_success << " "
480 << "after " << recursion_count << " recursions";
484 // trust that a successfull DNS resolve means we have an IP with TTL>0
485 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
487 { // this will create trouble in HostAnalyzer
488 GlobalLogger.warning() << LogPrefix
489 << "Should not have reached this case: resolve was "
490 << "successfull but still have no IPs (up-to-date="
491 << !ContinueOnOutdatedIps << ")!";
492 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
493 GlobalLogger.warning() << LogPrefix << "This probably happened "
494 << "because you specified a TTL threshold > 0 but resolving"
495 << " had no effect on TTLs since external cache is only "
496 << "updated when TTL=0 is reached.";
500 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
501 << ip_count << " (IPs may be outdated="
502 << ContinueOnOutdatedIps << ") --> could ping now";
503 HostAnalyzer.set_resolved_ip_count( ip_count );
508 { // host name resolution failed; try again bypassing first outdated CNAME
509 // or using cached IP
510 std::string skip_host = Resolver->get_skip_cname();
512 if (skip_host.empty())
513 { // try to continue with cached IPs
514 int ip_count = Resolver->get_resolved_ip_count(false);
517 GlobalLogger.notice() << LogPrefix << "DNS failed "
518 << "and have no cached IPs either --> cannot ping";
519 // ping_when_ready will deal with this case
522 ContinueOnOutdatedIps = true;
525 GlobalLogger.notice() << LogPrefix << "DNS failed, "
526 << "try anyway with cached data";
529 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
530 << ip_count << " (IPs may be outdated=" << true << ")";
531 HostAnalyzer.set_resolved_ip_count( ip_count );
536 { // have CNAME to continue
537 ContinueOnOutdatedIps = true;
539 GlobalLogger.notice() << LogPrefix << "DNS failed, "
540 << "try again skipping a CNAME and resolving "
541 << skip_host << " directly";
543 cancel_resolve(false);
545 // now create new resolver
546 Resolver = DnsMaster::get_instance()
547 ->get_resolver_for(skip_host, *ProtocolIter);
548 start_resolving_ping_address();
554 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
556 * Resolvers have a life on their own: they are cached by DnsMaster so never go
557 * out of scope and even after calling callbacks, there might still be a
558 * longterm timer active to re-try resolving.
559 * We want to cancel that long-term timer only if the Resolver is not for our
560 * real, original DestinationAddress but a CNAME, which can happen when trying
561 * to skip cnames and working on out-dated IPs
563 void PingScheduler::cancel_resolve(const bool force_cancel)
567 GlobalLogger.info() << "Cancelling resolver (forced)";
568 Resolver->cancel_resolve();
570 else if ( Resolver->get_hostname() == DestinationAddress )
571 GlobalLogger.info() << LogPrefix
572 << "Leave original resolver active in background";
575 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
576 << Resolver->get_hostname() << " since is not the original "
577 << DestinationAddress;
578 Resolver->cancel_resolve();