2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
20 #include "host/pingscheduler.h"
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
28 #include <logfunc.hpp>
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
37 using boost::asio::io_service;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::shared_ptr;
44 using I2n::Logger::GlobalLogger;
46 //-----------------------------------------------------------------------------
48 //-----------------------------------------------------------------------------
51 * @brief Parameterized constructor.
53 * @param io_serv The one @c io_serv object that controls async processing
54 * @param network_interface The name of the network interface sending the pings.
55 * @param destination_address The remote address to ping.
56 * @param destination_port The remote port to ping.
57 * @param ping_protocol_list A list of protocols to use.
58 * @param ping_interval_in_sec Amount of time between each ping.
59 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
60 * @param ping_reply_timeout Max amount time to wait for ping to finish
61 * @param link_analyzer The object to monitor the link status.
62 * @param first_delay Delay in seconds from start_pinging to first ping attempt
63 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
65 PingScheduler::PingScheduler(
66 const IoServiceItem io_serv,
67 const string &network_interface,
68 const string &destination_address,
69 const uint16_t destination_port,
70 const PingProtocolList &ping_protocol_list,
71 const long ping_interval_in_sec,
72 const int ping_fail_percentage_limit,
73 const int ping_reply_timeout,
74 LinkStatusItem link_analyzer,
75 const int first_delay,
76 const int n_parallel_pings
77 const int parallel_ping_delay
80 NetworkInterfaceName( network_interface ),
81 DestinationAddress( destination_address ),
82 DestinationPort( destination_port ),
83 Protocols( ping_protocol_list ),
85 PingIntervalInSec( ping_interval_in_sec ),
86 FirstDelay( first_delay ),
87 NextPingTimer( *io_serv ),
88 TimeSentLastPing( microsec_clock::universal_time() ),
89 PingReplyTimeout( ping_reply_timeout ),
90 HostAnalyzer( destination_address, ping_fail_percentage_limit,
91 n_parallel_pings, link_analyzer ),
94 NPingers( n_parallel_pings ),
96 ParallelPingDelay( parallel_ping_delay ),
99 ContinueOnOutdatedIps( false )
101 BOOST_ASSERT( !network_interface.empty() );
102 BOOST_ASSERT( !destination_address.empty() );
103 BOOST_ASSERT( ( 0 < destination_port ) &&
104 ( destination_port < numeric_limits<uint16_t>::max() ) );
105 BOOST_ASSERT( 0 < ping_interval_in_sec );
106 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
107 ( ping_fail_percentage_limit <= 100) );
111 init_ping_protocol();
117 PingScheduler::~PingScheduler()
121 void PingScheduler::stop_pinging()
123 // stop pinger and resolver
124 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
126 cancel_resolve(true);
128 // now cancel the own timer in case that pinger cancelation called callback
129 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
130 NextPingTimer.cancel();
134 * @brief stop all pingers and remove them from Pingers variable which will
135 * proboably cause their destruction
137 * Pingers is empty afterwards
139 void PingScheduler::clear_pingers()
142 while ( !Pingers.empty() )
144 pinger = Pingers.front();
145 pinger->stop_pinging();
151 * @brief Start into infinite loop of calls to ping
153 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
154 * when io_service is started
156 void PingScheduler::start_pinging()
158 if ( FirstDelay > 0 )
159 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
160 << FirstDelay << "s";
162 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
164 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
165 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
166 boost::asio::placeholders::error ) );
171 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
173 void PingScheduler::ping(const boost::system::error_code &error)
176 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
177 if ( error == boost::asio::error::operation_aborted )
178 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
181 GlobalLogger.error() << LogPrefix << "Received error " << error
182 << " waiting for ping! Stopping";
186 // ping as soon as dns is ready
192 void PingScheduler::ping_when_ready()
196 GlobalLogger.info() << LogPrefix << "waiting for ping request "
197 << "(should take no more than " << PingIntervalInSec << "s)";
200 else if ( Resolver && Resolver->is_resolving() )
202 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
205 else if ( !Resolver )
206 // should not happen, but check anyway
207 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
209 GlobalLogger.info() << LogPrefix << "start ping";
212 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
213 // because a CNAME was out of date -- IPs may still be current)
214 HostAddress ip = Resolver->get_next_ip();
216 if ( !ip.is_valid() )
217 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
218 // or when ip went out of date between resolve and now
219 // --> try to use outdated IP
220 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
221 bool check_up_to_date = false;
222 ip = Resolver->get_next_ip(check_up_to_date);
224 if ( !ip.is_valid() )
225 { // Do not even have an outdated IP!
226 // This happens if have no cached IPs and resolve failed
227 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
228 << "-- treat like a failed ping.";
230 // skip the ping and directly call ping_done_handler
231 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
232 // ping would create failed assumption (nPings > nIPs)
233 ping_done_handler(PingStatus_FailureNoIP);
234 HostAnalyzer.set_resolved_ip_count(0); // set back
238 boost::asio::ip::address actual_ip = ip.get_ip();
239 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
240 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
242 BOOST_FOREACH( const PingerItem &pinger, Pingers )
244 boost::asio::deadline_timer delayed_ping_timer( IoService );
245 delayed_ping_timer.expires_from_now(
246 milliseconds(delay_count * ParallelPingDelay);
247 delayed_ping_timer.async_wait( bind( &PingScheduler::delayed_ping,
251 TimeSentLastPing = microsec_clock::universal_time();
256 void delayed_ping( const PingerItem &pinger )
258 pinger->ping( actual_ip,
260 boost::bind(&PingScheduler::ping_done_handler,
265 //------------------------------------------------------------------------------
266 // Post Processing of Ping result and Preparation for next ping
267 //------------------------------------------------------------------------------
270 * @brief called when Ping::ping is done; calls functions to update
271 * statistics, ping interval and elapsed time;
272 * schedules a call to ping, thereby closing the loop
274 void PingScheduler::ping_done_handler( const PingStatus &result )
276 PingStatus edited_result = result;
277 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
279 edited_result = PingStatus_SuccessOutdatedIP;
281 // reset ContinueOnOutdatedIps
282 ContinueOnOutdatedIps = false;
287 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
288 << NPingers << " done with result " << to_string(edited_result);
291 // can call update_ping_interval only after update_ping_statistics!
292 ptime now = microsec_clock::universal_time();
293 HostAnalyzer.update_ping_statistics( edited_result,
294 (now - TimeSentLastPing).total_microseconds());
296 // prepare next ping only after all pingers are done
297 if (NPingersDone == NPingers)
302 void PingScheduler::prepare_next_ping()
304 update_ping_interval();
306 // get next protocol, possibly start resolving IPs
307 update_ping_protocol();
309 // schedule next ping
310 int seconds_since_last_ping = (microsec_clock::universal_time()
311 - TimeSentLastPing).total_seconds();
312 if ( seconds_since_last_ping > PingIntervalInSec )
314 GlobalLogger.info() << "We are late for next ping!";
315 seconds_since_last_ping = PingIntervalInSec;
316 (void) NextPingTimer.expires_from_now( seconds(0) );
319 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
320 - seconds_since_last_ping ) );
321 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
322 boost::asio::placeholders::error ) );
325 void PingScheduler::update_ping_interval()
327 // have to ping more often?
328 if ( HostAnalyzer.exceeded_ping_failed_limit() )
330 PingIntervalInSec.speed_up();
332 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
333 << PingIntervalInSec << "s";
337 PingIntervalInSec.back_to_original();
339 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
340 << "interval: " << PingIntervalInSec << "s";
344 //------------------------------------------------------------------------------
345 // Ping Protocol Rotation
346 //------------------------------------------------------------------------------
348 void PingScheduler::init_ping_protocol()
350 ProtocolIter = Protocols.end();
351 get_next_ping_protocol();
354 void PingScheduler::update_ping_protocol()
356 if ( can_change_ping_protocol() )
358 get_next_ping_protocol();
362 void PingScheduler::get_next_ping_protocol()
364 // stop and destruct all pingers
366 GlobalLogger.debug() << LogPrefix
367 << "------------------------------------------------------------------";
371 if (ProtocolIter == Protocols.end())
372 ProtocolIter = Protocols.begin();
373 PingProtocol ping_protocol = *ProtocolIter;
374 // --> ProtocolIter still points to currently used protocol which is
375 // required in dns_resolve_callback
377 // create new pingers
378 for (int count=0; count<NPingers; ++count)
379 Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
380 NetworkInterfaceName, PingReplyTimeout) );
382 update_dns_resolver( ping_protocol );
385 bool PingScheduler::can_change_ping_protocol() const
387 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
388 // implemented in a Algorithm class that can be exchanged in this class to
389 // provide an algorithm neutral class
393 //------------------------------------------------------------------------------
394 // DNS host name resolution
395 //------------------------------------------------------------------------------
397 // show "!" after host name if running on outdated IPs
398 void PingScheduler::update_log_prefix()
400 std::stringstream temp;
401 temp << "Sched(" << DestinationAddress;
402 if (ContinueOnOutdatedIps)
405 LogPrefix = temp.str();
408 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
410 if (Resolver && Resolver->is_resolving())
411 cancel_resolve(false);
413 if (ContinueOnOutdatedIps)
415 ContinueOnOutdatedIps = false;
419 // DNS master caches created resolvers and resolved IPs, so this will
420 // probably just return an existing resolver with already resolved IPs for
421 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
422 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
425 // get number of up-to-date IPs
426 // TODO should check here, if they will be up to date in PingIntervalInSec
427 bool check_up_to_date = true;
428 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
431 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
432 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
433 HostAnalyzer.set_resolved_ip_count( ip_count );
435 if (Resolver->is_resolving())
436 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
437 << "resolver seems to be resolving all the same... "
438 << "Start pinging anyway!";
443 GlobalLogger.info() << LogPrefix
444 << "No up-to-date IPs --> start resolve";
445 start_resolving_ping_address();
446 // set resolved_ip_count will be called in resolve callback
450 void PingScheduler::start_resolving_ping_address()
452 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
456 void PingScheduler::dns_resolve_callback(const bool was_success,
457 const int recursion_count)
459 GlobalLogger.info() << LogPrefix << "dns resolution finished "
460 << "with success = " << was_success << " "
461 << "after " << recursion_count << " recursions";
465 // trust that a successfull DNS resolve means we have an IP with TTL>0
466 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
468 { // this will create trouble in HostAnalyzer
469 GlobalLogger.warning() << LogPrefix
470 << "Should not have reached this case: resolve was "
471 << "successfull but still have no IPs (up-to-date="
472 << !ContinueOnOutdatedIps << ")!";
473 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
474 GlobalLogger.warning() << LogPrefix << "This probably happened "
475 << "because you specified a TTL threshold > 0 but resolving"
476 << " had no effect on TTLs since external cache is only "
477 << "updated when TTL=0 is reached.";
481 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
482 << ip_count << " (IPs may be outdated="
483 << ContinueOnOutdatedIps << ") --> could ping now";
484 HostAnalyzer.set_resolved_ip_count( ip_count );
489 { // host name resolution failed; try again bypassing first outdated CNAME
490 // or using cached IP
491 std::string skip_host = Resolver->get_skip_cname();
493 if (skip_host.empty())
494 { // try to continue with cached IPs
495 int ip_count = Resolver->get_resolved_ip_count(false);
498 GlobalLogger.notice() << LogPrefix << "DNS failed "
499 << "and have no cached IPs either --> cannot ping";
500 // ping_when_ready will deal with this case
503 ContinueOnOutdatedIps = true;
506 GlobalLogger.notice() << LogPrefix << "DNS failed, "
507 << "try anyway with cached data";
510 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
511 << ip_count << " (IPs may be outdated=" << true << ")";
512 HostAnalyzer.set_resolved_ip_count( ip_count );
517 { // have CNAME to continue
518 ContinueOnOutdatedIps = true;
520 GlobalLogger.notice() << LogPrefix << "DNS failed, "
521 << "try again skipping a CNAME and resolving "
522 << skip_host << " directly";
524 cancel_resolve(false);
526 // now create new resolver
527 Resolver = DnsMaster::get_instance()
528 ->get_resolver_for(skip_host, *ProtocolIter);
529 start_resolving_ping_address();
535 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
537 * Resolvers have a life on their own: they are cached by DnsMaster so never go
538 * out of scope and even after calling callbacks, there might still be a
539 * longterm timer active to re-try resolving.
540 * We want to cancel that long-term timer only if the Resolver is not for our
541 * real, original DestinationAddress but a CNAME, which can happen when trying
542 * to skip cnames and working on out-dated IPs
544 void PingScheduler::cancel_resolve(const bool force_cancel)
548 GlobalLogger.info() << "Cancelling resolver (forced)";
549 Resolver->cancel_resolve();
551 else if ( Resolver->get_hostname() == DestinationAddress )
552 GlobalLogger.info() << LogPrefix
553 << "Leave original resolver active in background";
556 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
557 << Resolver->get_hostname() << " since is not the original "
558 << DestinationAddress;
559 Resolver->cancel_resolve();