2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 #include "host/pingscheduler.h"
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
28 #include <logfunc.hpp>
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
37 using boost::asio::io_service;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::posix_time::milliseconds;
44 using boost::shared_ptr;
45 using I2n::Logger::GlobalLogger;
47 //-----------------------------------------------------------------------------
49 //-----------------------------------------------------------------------------
52 * @brief Parameterized constructor.
54 * @param io_serv The one @c io_serv object that controls async processing
55 * @param network_interface The name of the network interface sending the pings.
56 * @param destination_address The remote address to ping.
57 * @param destination_port The remote port to ping.
58 * @param ping_protocol_list A list of protocols to use.
59 * @param ping_interval_in_sec Amount of time between each ping.
60 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
61 * @param ping_reply_timeout Max amount time to wait for ping to finish
62 * @param link_analyzer The object to monitor the link status.
63 * @param first_delay Delay in seconds from start_pinging to first ping attempt
64 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
66 PingScheduler::PingScheduler(
67 const IoServiceItem io_serv,
68 const string &network_interface,
69 const string &destination_address,
70 const uint16_t destination_port,
71 const PingProtocolList &ping_protocol_list,
72 const long ping_interval_in_sec,
73 const int ping_fail_percentage_limit,
74 const int ping_reply_timeout,
75 LinkStatusItem link_analyzer,
76 const int first_delay,
77 const int n_parallel_pings,
78 const int parallel_ping_delay
81 NetworkInterfaceName( network_interface ),
82 DestinationAddress( destination_address ),
83 DestinationPort( destination_port ),
84 Protocols( ping_protocol_list ),
86 PingIntervalInSec( ping_interval_in_sec ),
87 FirstDelay( first_delay ),
88 NextPingTimer( *io_serv ),
89 TimeSentLastPing( microsec_clock::universal_time() ),
90 PingReplyTimeout( ping_reply_timeout ),
91 HostAnalyzer( destination_address, ping_fail_percentage_limit,
92 n_parallel_pings, link_analyzer ),
95 NPingers( n_parallel_pings ),
97 ParallelPingDelay( parallel_ping_delay ),
98 DelayedPingTimer( *io_serv ),
101 ContinueOnOutdatedIps( false )
103 BOOST_ASSERT( !network_interface.empty() );
104 BOOST_ASSERT( !destination_address.empty() );
105 BOOST_ASSERT( ( 0 < destination_port ) &&
106 ( destination_port < numeric_limits<uint16_t>::max() ) );
107 BOOST_ASSERT( 0 < ping_interval_in_sec );
108 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
109 ( ping_fail_percentage_limit <= 100) );
113 init_ping_protocol();
119 PingScheduler::~PingScheduler()
123 void PingScheduler::stop_pinging()
125 // stop pinger and resolver
126 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
128 cancel_resolve(true);
130 // now cancel the own timer in case that pinger cancelation called callback
131 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
132 NextPingTimer.cancel();
136 * @brief stop all pingers and remove them from Pingers variable which will
137 * proboably cause their destruction
139 * Pingers is empty afterwards
141 void PingScheduler::clear_pingers()
144 while(!Pingers.empty())
146 pinger = Pingers.back();
147 pinger->stop_pinging();
153 * @brief Start into infinite loop of calls to ping
155 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
156 * when io_service is started
158 void PingScheduler::start_pinging()
160 if ( FirstDelay > 0 )
161 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
162 << FirstDelay << "s";
164 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
166 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
167 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
168 boost::asio::placeholders::error ) );
173 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
175 void PingScheduler::ping(const boost::system::error_code &error)
178 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
179 if ( error == boost::asio::error::operation_aborted )
180 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
183 GlobalLogger.error() << LogPrefix << "Received error " << error
184 << " waiting for ping! Stopping";
188 // ping as soon as dns is ready
194 void PingScheduler::ping_when_ready()
198 GlobalLogger.info() << LogPrefix << "waiting for ping request "
199 << "(should take no more than " << PingIntervalInSec << "s)";
202 else if ( Resolver && Resolver->is_resolving() )
204 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
207 else if ( !Resolver )
208 // should not happen, but check anyway
209 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
211 GlobalLogger.info() << LogPrefix << "start ping";
214 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
215 // because a CNAME was out of date -- IPs may still be current)
216 HostAddress ip = Resolver->get_next_ip();
218 if ( !ip.is_valid() )
219 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
220 // or when ip went out of date between resolve and now
221 // --> try to use outdated IP
222 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
223 bool check_up_to_date = false;
224 ip = Resolver->get_next_ip(check_up_to_date);
226 if ( !ip.is_valid() )
227 { // Do not even have an outdated IP!
228 // This happens if have no cached IPs and resolve failed
229 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
230 << "-- treat like a failed ping.";
232 // skip the ping and directly call ping_done_handler
233 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
234 // ping would create failed assumption (nPings > nIPs)
235 ping_done_handler(PingStatus_FailureNoIP, 0);
236 HostAnalyzer.set_resolved_ip_count(0); // set back
240 boost::asio::ip::address actual_ip = ip.get_ip();
241 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
242 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
243 delayed_ping(boost::system::error_code(), actual_ip, 0);
245 TimeSentLastPing = microsec_clock::universal_time();
249 void PingScheduler::delayed_ping( const boost::system::error_code &error,
250 const boost::asio::ip::address &ip,
251 const int pinger_index )
255 GlobalLogger.info() << LogPrefix << "delayed ping received an error: "
259 if (pinger_index == NPingers)
261 GlobalLogger.debug() << LogPrefix << "started all delayed pings";
265 GlobalLogger.debug() << LogPrefix << "starting delayed ping index "
267 Pingers[pinger_index]->ping(ip,
269 boost::bind(&PingScheduler::ping_done_handler,
271 DelayedPingTimer.expires_from_now( milliseconds(ParallelPingDelay) );
272 DelayedPingTimer.async_wait( bind( &PingScheduler::delayed_ping,
273 this, boost::asio::placeholders::error, ip, pinger_index+1) );
277 //------------------------------------------------------------------------------
278 // Post Processing of Ping result and Preparation for next ping
279 //------------------------------------------------------------------------------
282 * @brief called when Ping::ping is done; calls functions to update
283 * statistics, ping interval and elapsed time;
284 * schedules a call to ping, thereby closing the loop
286 void PingScheduler::ping_done_handler( const PingStatus &result,
287 const long ping_duration_us )
289 PingStatus edited_result = result;
290 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
292 edited_result = PingStatus_SuccessOutdatedIP;
294 // reset ContinueOnOutdatedIps
295 ContinueOnOutdatedIps = false;
300 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
301 << NPingers << " done with result " << to_string(edited_result);
304 // can call update_ping_interval only after update_ping_statistics!
305 HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
307 // prepare next ping only after all pingers are done
308 if (NPingersDone == NPingers)
313 void PingScheduler::prepare_next_ping()
315 update_ping_interval();
317 // get next protocol, possibly start resolving IPs
318 update_ping_protocol();
320 // schedule next ping
321 int seconds_since_last_ping = (microsec_clock::universal_time()
322 - TimeSentLastPing).total_seconds();
323 if ( seconds_since_last_ping > PingIntervalInSec )
325 GlobalLogger.info() << "We are late for next ping!";
326 seconds_since_last_ping = PingIntervalInSec;
327 (void) NextPingTimer.expires_from_now( seconds(0) );
330 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
331 - seconds_since_last_ping ) );
332 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
333 boost::asio::placeholders::error ) );
336 void PingScheduler::update_ping_interval()
338 // have to ping more often?
339 if ( HostAnalyzer.exceeded_ping_failed_limit() )
341 PingIntervalInSec.speed_up();
343 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
344 << PingIntervalInSec << "s";
348 PingIntervalInSec.back_to_original();
350 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
351 << "interval: " << PingIntervalInSec << "s";
355 //------------------------------------------------------------------------------
356 // Ping Protocol Rotation
357 //------------------------------------------------------------------------------
359 void PingScheduler::init_ping_protocol()
361 ProtocolIter = Protocols.end();
362 get_next_ping_protocol();
365 void PingScheduler::update_ping_protocol()
367 if ( can_change_ping_protocol() )
369 get_next_ping_protocol();
373 void PingScheduler::get_next_ping_protocol()
375 // stop and destruct all pingers
377 GlobalLogger.debug() << LogPrefix
378 << "------------------------------------------------------------------";
382 if (ProtocolIter == Protocols.end())
383 ProtocolIter = Protocols.begin();
384 PingProtocol ping_protocol = *ProtocolIter;
385 // --> ProtocolIter still points to currently used protocol which is
386 // required in dns_resolve_callback
388 // create new pingers
389 for (int count=0; count<NPingers; ++count)
390 Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
391 NetworkInterfaceName, PingReplyTimeout) );
393 update_dns_resolver( ping_protocol );
396 bool PingScheduler::can_change_ping_protocol() const
398 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
399 // implemented in a Algorithm class that can be exchanged in this class to
400 // provide an algorithm neutral class
404 //------------------------------------------------------------------------------
405 // DNS host name resolution
406 //------------------------------------------------------------------------------
408 // show "!" after host name if running on outdated IPs
409 void PingScheduler::update_log_prefix()
411 std::stringstream temp;
412 temp << "Sched(" << DestinationAddress;
413 if (ContinueOnOutdatedIps)
416 LogPrefix = temp.str();
419 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
421 if (Resolver && Resolver->is_resolving())
422 cancel_resolve(false);
424 if (ContinueOnOutdatedIps)
426 ContinueOnOutdatedIps = false;
430 // DNS master caches created resolvers and resolved IPs, so this will
431 // probably just return an existing resolver with already resolved IPs for
432 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
433 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
436 // get number of up-to-date IPs
437 // TODO should check here, if they will be up to date in PingIntervalInSec
438 bool check_up_to_date = true;
439 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
442 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
443 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
444 HostAnalyzer.set_resolved_ip_count( ip_count );
446 if (Resolver->is_resolving())
447 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
448 << "resolver seems to be resolving all the same... "
449 << "Start pinging anyway!";
454 GlobalLogger.info() << LogPrefix
455 << "No up-to-date IPs --> start resolve";
456 start_resolving_ping_address();
457 // set resolved_ip_count will be called in resolve callback
461 void PingScheduler::start_resolving_ping_address()
463 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
467 void PingScheduler::dns_resolve_callback(const bool was_success,
468 const int recursion_count)
470 GlobalLogger.info() << LogPrefix << "dns resolution finished "
471 << "with success = " << was_success << " "
472 << "after " << recursion_count << " recursions";
476 // trust that a successfull DNS resolve means we have an IP with TTL>0
477 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
479 { // this will create trouble in HostAnalyzer
480 GlobalLogger.warning() << LogPrefix
481 << "Should not have reached this case: resolve was "
482 << "successfull but still have no IPs (up-to-date="
483 << !ContinueOnOutdatedIps << ")!";
484 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
485 GlobalLogger.warning() << LogPrefix << "This probably happened "
486 << "because you specified a TTL threshold > 0 but resolving"
487 << " had no effect on TTLs since external cache is only "
488 << "updated when TTL=0 is reached.";
492 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
493 << ip_count << " (IPs may be outdated="
494 << ContinueOnOutdatedIps << ") --> could ping now";
495 HostAnalyzer.set_resolved_ip_count( ip_count );
500 { // host name resolution failed; try again bypassing first outdated CNAME
501 // or using cached IP
502 std::string skip_host = Resolver->get_skip_cname();
504 if (skip_host.empty())
505 { // try to continue with cached IPs
506 int ip_count = Resolver->get_resolved_ip_count(false);
509 GlobalLogger.notice() << LogPrefix << "DNS failed "
510 << "and have no cached IPs either --> cannot ping";
511 // ping_when_ready will deal with this case
514 ContinueOnOutdatedIps = true;
517 GlobalLogger.notice() << LogPrefix << "DNS failed, "
518 << "try anyway with cached data";
521 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
522 << ip_count << " (IPs may be outdated=" << true << ")";
523 HostAnalyzer.set_resolved_ip_count( ip_count );
528 { // have CNAME to continue
529 ContinueOnOutdatedIps = true;
531 GlobalLogger.notice() << LogPrefix << "DNS failed, "
532 << "try again skipping a CNAME and resolving "
533 << skip_host << " directly";
535 cancel_resolve(false);
537 // now create new resolver
538 Resolver = DnsMaster::get_instance()
539 ->get_resolver_for(skip_host, *ProtocolIter);
540 start_resolving_ping_address();
546 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
548 * Resolvers have a life on their own: they are cached by DnsMaster so never go
549 * out of scope and even after calling callbacks, there might still be a
550 * longterm timer active to re-try resolving.
551 * We want to cancel that long-term timer only if the Resolver is not for our
552 * real, original DestinationAddress but a CNAME, which can happen when trying
553 * to skip cnames and working on out-dated IPs
555 void PingScheduler::cancel_resolve(const bool force_cancel)
559 GlobalLogger.info() << "Cancelling resolver (forced)";
560 Resolver->cancel_resolve();
562 else if ( Resolver->get_hostname() == DestinationAddress )
563 GlobalLogger.info() << LogPrefix
564 << "Leave original resolver active in background";
567 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
568 << Resolver->get_hostname() << " since is not the original "
569 << DestinationAddress;
570 Resolver->cancel_resolve();