706583bf9a94a06f2ce6f0619bb656ca3aaae831
[pingcheck] / src / host / pingscheduler.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/pingscheduler.h"
21
22 #include <iostream>
23 #include <limits>
24
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
27
28 #include <logfunc.hpp>
29
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
35
36 using namespace std;
37 using boost::asio::io_service;
38 using boost::bind;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::shared_ptr;
44 using I2n::Logger::GlobalLogger;
45
46 //-----------------------------------------------------------------------------
47 // PingScheduler
48 //-----------------------------------------------------------------------------
49
50 /**
51  * @brief Parameterized constructor.
52  *
53  * @param io_serv The one @c io_serv object that controls async processing
54  * @param network_interface The name of the network interface sending the pings.
55  * @param destination_address The remote address to ping.
56  * @param destination_port The remote port to ping.
57  * @param ping_protocol_list A list of protocols to use.
58  * @param ping_interval_in_sec Amount of time between each ping.
59  * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
60  * @param ping_reply_timeout Max amount time to wait for ping to finish
61  * @param link_analyzer The object to monitor the link status.
62  * @param first_delay Delay in seconds from start_pinging to first ping attempt
63  * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
64  */
65 PingScheduler::PingScheduler(
66         const IoServiceItem io_serv,
67         const string &network_interface,
68         const string &destination_address,
69         const uint16_t destination_port,
70         const PingProtocolList &ping_protocol_list,
71         const long ping_interval_in_sec,
72         const int ping_fail_percentage_limit,
73         const int ping_reply_timeout,
74         LinkStatusItem link_analyzer,
75         const int first_delay,
76         const int n_parallel_pings
77         const int parallel_ping_delay
78 ) :
79     IoService( io_serv ),
80     NetworkInterfaceName( network_interface ),
81     DestinationAddress( destination_address ),
82     DestinationPort( destination_port ),
83     Protocols( ping_protocol_list ),
84     ProtocolIter(),
85     PingIntervalInSec( ping_interval_in_sec ),
86     FirstDelay( first_delay ),
87     NextPingTimer( *io_serv ),
88     TimeSentLastPing( microsec_clock::universal_time() ),
89     PingReplyTimeout( ping_reply_timeout ),
90     HostAnalyzer( destination_address, ping_fail_percentage_limit,
91                   n_parallel_pings, link_analyzer ),
92     Resolver(),
93     Pingers(),
94     NPingers( n_parallel_pings ),
95     NPingersDone( 0 ),
96     ParallelPingDelay( parallel_ping_delay ),
97     WantToPing( false ),
98     LogPrefix(),
99     ContinueOnOutdatedIps( false )
100 {
101     BOOST_ASSERT( !network_interface.empty() );
102     BOOST_ASSERT( !destination_address.empty() );
103     BOOST_ASSERT( ( 0 < destination_port ) &&
104                   ( destination_port < numeric_limits<uint16_t>::max() ) );
105     BOOST_ASSERT( 0 < ping_interval_in_sec );
106     BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
107                   ( ping_fail_percentage_limit <= 100) );
108
109     update_log_prefix();
110
111     init_ping_protocol();
112 }
113
114 /**
115  * @brief Destructor.
116  */
117 PingScheduler::~PingScheduler()
118 {
119 }
120
121 void PingScheduler::stop_pinging()
122 {
123     // stop pinger and resolver
124     GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
125     clear_pingers();
126     cancel_resolve(true);
127
128     // now cancel the own timer in case that pinger cancelation called callback
129     GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
130     NextPingTimer.cancel();
131 }
132
133 /**
134  * @brief stop all pingers and remove them from Pingers variable which will
135  *   proboably cause their destruction
136  *   
137  * Pingers is empty afterwards
138  */
139 void PingScheduler::clear_pingers()
140 {
141     PingerItem pinger;
142     while ( !Pingers.empty() )
143     {
144         pinger = Pingers.front();
145         pinger->stop_pinging();
146         Pingers.pop_front();
147     }
148 }
149
150 /**
151  * @brief Start into infinite loop of calls to ping
152  *
153  * Does not start yet but set NextPingTimer (possibly to 0), so action starts
154  *   when io_service is started
155  */
156 void PingScheduler::start_pinging()
157 {
158     if ( FirstDelay > 0 )
159         GlobalLogger.info() << LogPrefix << "Delaying first ping by "
160                                          << FirstDelay << "s";
161     else
162         GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
163
164     (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
165     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
166                                           boost::asio::placeholders::error ) );
167 }
168
169
170 /**
171  * @brief call Ping::ping and schedule a call to ping_done_handler when finished
172  */
173 void PingScheduler::ping(const boost::system::error_code &error)
174 {
175     if ( error )
176     {   // get here, e.g. by NextPingTimer.cancel in stop_pinging
177         if ( error ==  boost::asio::error::operation_aborted )
178             GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
179                                  << " --> Stopping";
180         else
181             GlobalLogger.error() << LogPrefix << "Received error " << error
182                                  << " waiting for ping! Stopping";
183         return;
184     }
185
186     // ping as soon as dns is ready
187     WantToPing = true;
188     ping_when_ready();
189 }
190
191
192 void PingScheduler::ping_when_ready()
193 {
194     if ( !WantToPing )
195     {
196         GlobalLogger.info() << LogPrefix << "waiting for ping request "
197             << "(should take no more than " << PingIntervalInSec << "s)";
198         return;
199     }
200     else if ( Resolver && Resolver->is_resolving() )
201     {
202         GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
203         return;
204     }
205     else if ( !Resolver )
206         // should not happen, but check anyway
207         GlobalLogger.warning() << LogPrefix << "Have no resolver!";
208
209     GlobalLogger.info() << LogPrefix << "start ping";
210     WantToPing = false;
211
212     // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
213     //   because a CNAME was out of date -- IPs may still be current)
214     HostAddress ip = Resolver->get_next_ip();
215
216     if ( !ip.is_valid() )
217     {   // this can happen in 2 cases: if ContinueOnOutdatedIps==true
218         // or when ip went out of date between resolve and now
219         // --> try to use outdated IP
220         GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
221         bool check_up_to_date = false;
222         ip = Resolver->get_next_ip(check_up_to_date);
223     }
224     if ( !ip.is_valid() )
225     {   // Do not even have an outdated IP!
226         // This happens if have no cached IPs and resolve failed
227         GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
228             << "-- treat like a failed ping.";
229
230         // skip the ping and directly call ping_done_handler
231         HostAnalyzer.set_resolved_ip_count(1);   // must have been 0 --> failed
232              // ping would create failed assumption (nPings > nIPs)
233         ping_done_handler(PingStatus_FailureNoIP);
234         HostAnalyzer.set_resolved_ip_count(0);   // set back
235     }
236     else
237     {
238         boost::asio::ip::address actual_ip = ip.get_ip();
239         GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
240             << " with TTL " << ip.get_ttl().get_updated_value() << "s";
241         int delay_count = 0;
242         BOOST_FOREACH( const PingerItem &pinger, Pingers )
243         {
244             boost::asio::deadline_timer delayed_ping_timer( IoService );
245             delayed_ping_timer.expires_from_now(
246                                   milliseconds(delay_count * ParallelPingDelay);
247             delayed_ping_timer.async_wait( bind( &PingScheduler::delayed_ping,
248                                                                 this, pinger) );
249             ++delay_count;
250         }
251         TimeSentLastPing = microsec_clock::universal_time();
252         NPingersDone = 0;
253     }
254 }
255
256 void delayed_ping( const PingerItem &pinger )
257 {
258     pinger->ping( actual_ip,
259                   DestinationPort,
260                   boost::bind(&PingScheduler::ping_done_handler,
261                                                         this, _1) );
262 }
263
264
265 //------------------------------------------------------------------------------
266 // Post Processing of Ping result and Preparation for next ping
267 //------------------------------------------------------------------------------
268
269 /**
270  * @brief called when Ping::ping is done; calls functions to update
271  *   statistics, ping interval and elapsed time;
272  *   schedules a call to ping, thereby closing the loop
273  */
274 void PingScheduler::ping_done_handler( const PingStatus &result )
275 {
276     PingStatus edited_result = result;
277     if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
278     {
279         edited_result = PingStatus_SuccessOutdatedIP;
280
281         // reset ContinueOnOutdatedIps
282         ContinueOnOutdatedIps = false;
283         update_log_prefix();
284     }
285
286     ++NPingersDone;
287     GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
288         << NPingers << " done with result " << to_string(edited_result);
289
290     // post-processing
291     // can call update_ping_interval only after update_ping_statistics!
292     ptime now = microsec_clock::universal_time();
293     HostAnalyzer.update_ping_statistics( edited_result,
294                                  (now - TimeSentLastPing).total_microseconds());
295
296     // prepare next ping only after all pingers are done
297     if (NPingersDone == NPingers)
298         prepare_next_ping();
299 }
300
301
302 void PingScheduler::prepare_next_ping()
303 {
304     update_ping_interval();
305
306     // get next protocol, possibly start resolving IPs
307     update_ping_protocol();
308
309     // schedule next ping
310     int seconds_since_last_ping = (microsec_clock::universal_time()
311                                             - TimeSentLastPing).total_seconds();
312     if ( seconds_since_last_ping > PingIntervalInSec )
313     {
314         GlobalLogger.info() << "We are late for next ping!";
315         seconds_since_last_ping = PingIntervalInSec;
316         (void) NextPingTimer.expires_from_now( seconds(0) );
317     }
318     else
319         (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
320                                                   - seconds_since_last_ping ) );
321     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
322                                          boost::asio::placeholders::error ) );
323 }
324
325 void PingScheduler::update_ping_interval()
326 {
327     // have to ping more often?
328     if ( HostAnalyzer.exceeded_ping_failed_limit() )
329     {
330         PingIntervalInSec.speed_up();
331
332         GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
333                              << PingIntervalInSec << "s";
334     }
335     else
336     {
337         PingIntervalInSec.back_to_original();
338
339         GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
340                              << "interval: " << PingIntervalInSec << "s";
341     }
342 }
343
344 //------------------------------------------------------------------------------
345 // Ping Protocol Rotation
346 //------------------------------------------------------------------------------
347
348 void PingScheduler::init_ping_protocol()
349 {
350     ProtocolIter = Protocols.end();
351     get_next_ping_protocol();
352 }
353
354 void PingScheduler::update_ping_protocol()
355 {
356     if ( can_change_ping_protocol() )
357     {
358         get_next_ping_protocol();
359     }
360 }
361
362 void PingScheduler::get_next_ping_protocol()
363 {
364     // stop and destruct all pingers
365     clear_pingers();
366     GlobalLogger.debug() << LogPrefix
367         << "------------------------------------------------------------------";
368
369     // get next protocol
370     ++ProtocolIter;
371     if (ProtocolIter == Protocols.end())
372         ProtocolIter = Protocols.begin();
373     PingProtocol ping_protocol = *ProtocolIter;
374     // --> ProtocolIter still points to currently used protocol which is
375     //     required in dns_resolve_callback
376
377     // create new pingers
378     for (int count=0; count<NPingers; ++count)
379         Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
380                                       NetworkInterfaceName, PingReplyTimeout) );
381
382     update_dns_resolver( ping_protocol );
383 }
384
385 bool PingScheduler::can_change_ping_protocol() const
386 {
387     // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
388     // implemented in a Algorithm class that can be exchanged in this class to
389     // provide an algorithm neutral class
390     return true;
391 }
392
393 //------------------------------------------------------------------------------
394 // DNS host name resolution
395 //------------------------------------------------------------------------------
396
397 // show "!" after host name if running on outdated IPs
398 void PingScheduler::update_log_prefix()
399 {
400     std::stringstream temp;
401     temp << "Sched(" << DestinationAddress;
402     if (ContinueOnOutdatedIps)
403         temp << "!";
404     temp << "): ";
405     LogPrefix = temp.str();
406 }
407
408 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
409 {
410     if (Resolver && Resolver->is_resolving())
411         cancel_resolve(false);
412
413     if (ContinueOnOutdatedIps)
414     {
415         ContinueOnOutdatedIps = false;
416         update_log_prefix();
417     }
418
419     // DNS master caches created resolvers and resolved IPs, so this will
420     // probably just return an existing resolver with already resolved IPs for
421     // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
422     Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
423                                                            current_protocol);
424
425     // get number of up-to-date IPs
426     // TODO should check here, if they will be up to date in PingIntervalInSec
427     bool check_up_to_date = true;
428     int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
429     if (ip_count > 0)
430     {
431         GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
432             << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
433         HostAnalyzer.set_resolved_ip_count( ip_count );
434
435         if (Resolver->is_resolving())
436             GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
437                 << "resolver seems to be resolving all the same... "
438                 << "Start pinging anyway!";
439         ping_when_ready();
440     }
441     else
442     {
443         GlobalLogger.info() << LogPrefix
444                             << "No up-to-date IPs --> start resolve";
445         start_resolving_ping_address();
446         // set resolved_ip_count will be called in resolve callback
447     }
448 }
449
450 void PingScheduler::start_resolving_ping_address()
451 {
452     Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
453                                           this, _1, _2) );
454 }
455
456 void PingScheduler::dns_resolve_callback(const bool was_success,
457                                          const int recursion_count)
458 {
459     GlobalLogger.info() << LogPrefix << "dns resolution finished "
460                         << "with success = " << was_success << " "
461                         << "after " << recursion_count << " recursions";
462
463     if ( was_success )
464     {
465         // trust that a successfull DNS resolve means we have an IP with TTL>0
466         int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
467         if (ip_count == 0)
468         {   // this will create trouble in HostAnalyzer
469             GlobalLogger.warning() << LogPrefix
470                 << "Should not have reached this case: resolve was "
471                 << "successfull but still have no IPs (up-to-date="
472                 << !ContinueOnOutdatedIps << ")!";
473             if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
474                 GlobalLogger.warning() << LogPrefix << "This probably happened "
475                     << "because you specified a TTL threshold > 0 but resolving"
476                     << " had no effect on TTLs since external cache is only "
477                     << "updated when TTL=0 is reached.";
478         }
479         else
480         {
481             GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
482                 << ip_count << " (IPs may be outdated="
483                 << ContinueOnOutdatedIps << ") --> could ping now";
484             HostAnalyzer.set_resolved_ip_count( ip_count );
485         }
486         ping_when_ready();
487     }
488     else
489     {   // host name resolution failed; try again bypassing first outdated CNAME
490         // or using cached IP
491         std::string skip_host = Resolver->get_skip_cname();
492
493         if (skip_host.empty())
494         {   // try to continue with cached IPs
495             int ip_count = Resolver->get_resolved_ip_count(false);
496
497             if (ip_count == 0)
498                 GlobalLogger.notice() << LogPrefix << "DNS failed "
499                     << "and have no cached IPs either --> cannot ping";
500                 // ping_when_ready will deal with this case
501             else
502             {
503                 ContinueOnOutdatedIps = true;
504                 update_log_prefix();
505
506                 GlobalLogger.notice() << LogPrefix << "DNS failed, "
507                     << "try anyway with cached data";
508             }
509
510             GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
511                 << ip_count << " (IPs may be outdated=" << true << ")";
512             HostAnalyzer.set_resolved_ip_count( ip_count );
513
514             ping_when_ready();
515         }
516         else
517         {   // have CNAME to continue
518             ContinueOnOutdatedIps = true;
519             update_log_prefix();
520             GlobalLogger.notice() << LogPrefix << "DNS failed, "
521                 << "try again skipping a CNAME and resolving "
522                 << skip_host << " directly";
523
524             cancel_resolve(false);
525
526             // now create new resolver
527             Resolver = DnsMaster::get_instance()
528                                    ->get_resolver_for(skip_host, *ProtocolIter);
529             start_resolving_ping_address();
530         }
531     }
532 }
533
534 /**
535  * cancel resolver if force_cancel or if it is not resolving DestinationAddress
536  *
537  * Resolvers have a life on their own: they are cached by DnsMaster so never go
538  *   out of scope and even after calling callbacks, there might still be a
539  *   longterm timer active to re-try resolving.
540  * We want to cancel that long-term timer only if the Resolver is not for our
541  *   real, original DestinationAddress but a CNAME, which can happen when trying
542  *   to skip cnames and working on out-dated IPs
543  */
544 void PingScheduler::cancel_resolve(const bool force_cancel)
545 {
546     if (force_cancel)
547     {
548         GlobalLogger.info() << "Cancelling resolver (forced)";
549         Resolver->cancel_resolve();
550     }
551     else if ( Resolver->get_hostname() == DestinationAddress )
552         GlobalLogger.info() << LogPrefix
553                             << "Leave original resolver active in background";
554     else
555     {
556         GlobalLogger.info() << LogPrefix << "Cancel resolver for "
557             << Resolver->get_hostname() << " since is not the original "
558             << DestinationAddress;
559         Resolver->cancel_resolve();
560     }
561 }
562