010b8d5d239b550da1f18cc94955fe1b5a76f08b
[pingcheck] / src / host / pingscheduler.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/pingscheduler.h"
21
22 #include <iostream>
23 #include <limits>
24
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
27
28 #include <logfunc.hpp>
29
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
35
36 using namespace std;
37 using boost::asio::io_service;
38 using boost::bind;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::shared_ptr;
44 using I2n::Logger::GlobalLogger;
45
46 //-----------------------------------------------------------------------------
47 // PingScheduler
48 //-----------------------------------------------------------------------------
49
50 /**
51  * @brief Parameterized constructor.
52  *
53  * @param io_serv The one @c io_serv object that controls async processing
54  * @param network_interface The name of the network interface sending the pings.
55  * @param destination_address The remote address to ping.
56  * @param destination_port The remote port to ping.
57  * @param ping_protocol_list A list of protocols to use.
58  * @param ping_interval_in_sec Amount of time between each ping.
59  * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
60  * @param ping_reply_timeout Max amount time to wait for ping to finish
61  * @param link_analyzer The object to monitor the link status.
62  * @param first_delay Delay in seconds from start_pinging to first ping attempt
63  * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
64  */
65 PingScheduler::PingScheduler(
66         const IoServiceItem io_serv,
67         const string &network_interface,
68         const string &destination_address,
69         const uint16_t destination_port,
70         const PingProtocolList &ping_protocol_list,
71         const long ping_interval_in_sec,
72         const int ping_fail_percentage_limit,
73         const int ping_reply_timeout,
74         LinkStatusItem link_analyzer,
75         const int first_delay,
76         const int n_parallel_pings
77         const int parallel_ping_delay
78 ) :
79     IoService( io_serv ),
80     NetworkInterfaceName( network_interface ),
81     DestinationAddress( destination_address ),
82     DestinationPort( destination_port ),
83     Protocols( ping_protocol_list ),
84     ProtocolIter(),
85     PingIntervalInSec( ping_interval_in_sec ),
86     FirstDelay( first_delay ),
87     NextPingTimer( *io_serv ),
88     TimeSentLastPing( microsec_clock::universal_time() ),
89     PingReplyTimeout( ping_reply_timeout ),
90     HostAnalyzer( destination_address, ping_fail_percentage_limit,
91                   n_parallel_pings, link_analyzer ),
92     Resolver(),
93     Pingers(),
94     NPingers( n_parallel_pings ),
95     NPingersDone( 0 ),
96     ParallelPingDelay( parallel_ping_delay ),
97     WantToPing( false ),
98     LogPrefix(),
99     ContinueOnOutdatedIps( false )
100 {
101     BOOST_ASSERT( !network_interface.empty() );
102     BOOST_ASSERT( !destination_address.empty() );
103     BOOST_ASSERT( ( 0 < destination_port ) &&
104                   ( destination_port < numeric_limits<uint16_t>::max() ) );
105     BOOST_ASSERT( 0 < ping_interval_in_sec );
106     BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
107                   ( ping_fail_percentage_limit <= 100) );
108
109     update_log_prefix();
110
111     init_ping_protocol();
112 }
113
114 /**
115  * @brief Destructor.
116  */
117 PingScheduler::~PingScheduler()
118 {
119 }
120
121 void PingScheduler::stop_pinging()
122 {
123     // stop pinger and resolver
124     GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
125     clear_pingers();
126     cancel_resolve(true);
127
128     // now cancel the own timer in case that pinger cancelation called callback
129     GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
130     NextPingTimer.cancel();
131 }
132
133 /**
134  * @brief stop all pingers and remove them from Pingers variable which will
135  *   proboably cause their destruction
136  *   
137  * Pingers is empty afterwards
138  */
139 void PingScheduler::clear_pingers()
140 {
141     PingerItem pinger;
142     while ( !Pingers.empty() )
143     {
144         pinger = Pingers.front();
145         pinger->stop_pinging();
146         Pingers.pop_front();
147     }
148 }
149
150 /**
151  * @brief Start into infinite loop of calls to ping
152  *
153  * Does not start yet but set NextPingTimer (possibly to 0), so action starts
154  *   when io_service is started
155  */
156 void PingScheduler::start_pinging()
157 {
158     if ( FirstDelay > 0 )
159         GlobalLogger.info() << LogPrefix << "Delaying first ping by "
160                                          << FirstDelay << "s";
161     else
162         GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
163
164     (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
165     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
166                                           boost::asio::placeholders::error ) );
167 }
168
169
170 /**
171  * @brief call Ping::ping and schedule a call to ping_done_handler when finished
172  */
173 void PingScheduler::ping(const boost::system::error_code &error)
174 {
175     if ( error )
176     {   // get here, e.g. by NextPingTimer.cancel in stop_pinging
177         if ( error ==  boost::asio::error::operation_aborted )
178             GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
179                                  << " --> Stopping";
180         else
181             GlobalLogger.error() << LogPrefix << "Received error " << error
182                                  << " waiting for ping! Stopping";
183         return;
184     }
185
186     // ping as soon as dns is ready
187     WantToPing = true;
188     ping_when_ready();
189 }
190
191
192 void PingScheduler::ping_when_ready()
193 {
194     if ( !WantToPing )
195     {
196         GlobalLogger.info() << LogPrefix << "waiting for ping request "
197             << "(should take no more than " << PingIntervalInSec << "s)";
198         return;
199     }
200     else if ( Resolver && Resolver->is_resolving() )
201     {
202         GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
203         return;
204     }
205     else if ( !Resolver )
206         // should not happen, but check anyway
207         GlobalLogger.warning() << LogPrefix << "Have no resolver!";
208
209     GlobalLogger.info() << LogPrefix << "start ping";
210     WantToPing = false;
211
212     // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
213     //   because a CNAME was out of date -- IPs may still be current)
214     HostAddress ip = Resolver->get_next_ip();
215
216     if ( !ip.is_valid() )
217     {   // this can happen in 2 cases: if ContinueOnOutdatedIps==true
218         // or when ip went out of date between resolve and now
219         // --> try to use outdated IP
220         GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
221         bool check_up_to_date = false;
222         ip = Resolver->get_next_ip(check_up_to_date);
223     }
224     if ( !ip.is_valid() )
225     {   // Do not even have an outdated IP!
226         // This happens if have no cached IPs and resolve failed
227         GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
228             << "-- treat like a failed ping.";
229
230         // skip the ping and directly call ping_done_handler
231         HostAnalyzer.set_resolved_ip_count(1);   // must have been 0 --> failed
232              // ping would create failed assumption (nPings > nIPs)
233         ping_done_handler(PingStatus_FailureNoIP, 0);
234         HostAnalyzer.set_resolved_ip_count(0);   // set back
235     }
236     else
237     {
238         boost::asio::ip::address actual_ip = ip.get_ip();
239         GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
240             << " with TTL " << ip.get_ttl().get_updated_value() << "s";
241         int delay_count = 0;
242         BOOST_FOREACH( const PingerItem &pinger, Pingers )
243         {
244             boost::asio::deadline_timer delayed_ping_timer( IoService );
245             delayed_ping_timer.expires_from_now(
246                                   milliseconds(delay_count * ParallelPingDelay);
247             delayed_ping_timer.async_wait( bind( &PingScheduler::delayed_ping,
248                                                                 this, pinger) );
249             ++delay_count;
250         }
251         TimeSentLastPing = microsec_clock::universal_time();
252         NPingersDone = 0;
253     }
254 }
255
256 void delayed_ping( const PingerItem &pinger )
257 {
258     pinger->ping( actual_ip,
259                   DestinationPort,
260                   boost::bind(&PingScheduler::ping_done_handler,
261                                                         this, _1) );
262 }
263
264
265 //------------------------------------------------------------------------------
266 // Post Processing of Ping result and Preparation for next ping
267 //------------------------------------------------------------------------------
268
269 /**
270  * @brief called when Ping::ping is done; calls functions to update
271  *   statistics, ping interval and elapsed time;
272  *   schedules a call to ping, thereby closing the loop
273  */
274 void PingScheduler::ping_done_handler( const PingStatus &result,
275                                        const long ping_duration_us )
276 {
277     PingStatus edited_result = result;
278     if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
279     {
280         edited_result = PingStatus_SuccessOutdatedIP;
281
282         // reset ContinueOnOutdatedIps
283         ContinueOnOutdatedIps = false;
284         update_log_prefix();
285     }
286
287     ++NPingersDone;
288     GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
289         << NPingers << " done with result " << to_string(edited_result);
290
291     // post-processing
292     // can call update_ping_interval only after update_ping_statistics!
293     HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
294
295     // prepare next ping only after all pingers are done
296     if (NPingersDone == NPingers)
297         prepare_next_ping();
298 }
299
300
301 void PingScheduler::prepare_next_ping()
302 {
303     update_ping_interval();
304
305     // get next protocol, possibly start resolving IPs
306     update_ping_protocol();
307
308     // schedule next ping
309     int seconds_since_last_ping = (microsec_clock::universal_time()
310                                             - TimeSentLastPing).total_seconds();
311     if ( seconds_since_last_ping > PingIntervalInSec )
312     {
313         GlobalLogger.info() << "We are late for next ping!";
314         seconds_since_last_ping = PingIntervalInSec;
315         (void) NextPingTimer.expires_from_now( seconds(0) );
316     }
317     else
318         (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
319                                                   - seconds_since_last_ping ) );
320     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
321                                          boost::asio::placeholders::error ) );
322 }
323
324 void PingScheduler::update_ping_interval()
325 {
326     // have to ping more often?
327     if ( HostAnalyzer.exceeded_ping_failed_limit() )
328     {
329         PingIntervalInSec.speed_up();
330
331         GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
332                              << PingIntervalInSec << "s";
333     }
334     else
335     {
336         PingIntervalInSec.back_to_original();
337
338         GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
339                              << "interval: " << PingIntervalInSec << "s";
340     }
341 }
342
343 //------------------------------------------------------------------------------
344 // Ping Protocol Rotation
345 //------------------------------------------------------------------------------
346
347 void PingScheduler::init_ping_protocol()
348 {
349     ProtocolIter = Protocols.end();
350     get_next_ping_protocol();
351 }
352
353 void PingScheduler::update_ping_protocol()
354 {
355     if ( can_change_ping_protocol() )
356     {
357         get_next_ping_protocol();
358     }
359 }
360
361 void PingScheduler::get_next_ping_protocol()
362 {
363     // stop and destruct all pingers
364     clear_pingers();
365     GlobalLogger.debug() << LogPrefix
366         << "------------------------------------------------------------------";
367
368     // get next protocol
369     ++ProtocolIter;
370     if (ProtocolIter == Protocols.end())
371         ProtocolIter = Protocols.begin();
372     PingProtocol ping_protocol = *ProtocolIter;
373     // --> ProtocolIter still points to currently used protocol which is
374     //     required in dns_resolve_callback
375
376     // create new pingers
377     for (int count=0; count<NPingers; ++count)
378         Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
379                                       NetworkInterfaceName, PingReplyTimeout) );
380
381     update_dns_resolver( ping_protocol );
382 }
383
384 bool PingScheduler::can_change_ping_protocol() const
385 {
386     // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
387     // implemented in a Algorithm class that can be exchanged in this class to
388     // provide an algorithm neutral class
389     return true;
390 }
391
392 //------------------------------------------------------------------------------
393 // DNS host name resolution
394 //------------------------------------------------------------------------------
395
396 // show "!" after host name if running on outdated IPs
397 void PingScheduler::update_log_prefix()
398 {
399     std::stringstream temp;
400     temp << "Sched(" << DestinationAddress;
401     if (ContinueOnOutdatedIps)
402         temp << "!";
403     temp << "): ";
404     LogPrefix = temp.str();
405 }
406
407 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
408 {
409     if (Resolver && Resolver->is_resolving())
410         cancel_resolve(false);
411
412     if (ContinueOnOutdatedIps)
413     {
414         ContinueOnOutdatedIps = false;
415         update_log_prefix();
416     }
417
418     // DNS master caches created resolvers and resolved IPs, so this will
419     // probably just return an existing resolver with already resolved IPs for
420     // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
421     Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
422                                                            current_protocol);
423
424     // get number of up-to-date IPs
425     // TODO should check here, if they will be up to date in PingIntervalInSec
426     bool check_up_to_date = true;
427     int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
428     if (ip_count > 0)
429     {
430         GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
431             << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
432         HostAnalyzer.set_resolved_ip_count( ip_count );
433
434         if (Resolver->is_resolving())
435             GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
436                 << "resolver seems to be resolving all the same... "
437                 << "Start pinging anyway!";
438         ping_when_ready();
439     }
440     else
441     {
442         GlobalLogger.info() << LogPrefix
443                             << "No up-to-date IPs --> start resolve";
444         start_resolving_ping_address();
445         // set resolved_ip_count will be called in resolve callback
446     }
447 }
448
449 void PingScheduler::start_resolving_ping_address()
450 {
451     Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
452                                           this, _1, _2) );
453 }
454
455 void PingScheduler::dns_resolve_callback(const bool was_success,
456                                          const int recursion_count)
457 {
458     GlobalLogger.info() << LogPrefix << "dns resolution finished "
459                         << "with success = " << was_success << " "
460                         << "after " << recursion_count << " recursions";
461
462     if ( was_success )
463     {
464         // trust that a successfull DNS resolve means we have an IP with TTL>0
465         int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
466         if (ip_count == 0)
467         {   // this will create trouble in HostAnalyzer
468             GlobalLogger.warning() << LogPrefix
469                 << "Should not have reached this case: resolve was "
470                 << "successfull but still have no IPs (up-to-date="
471                 << !ContinueOnOutdatedIps << ")!";
472             if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
473                 GlobalLogger.warning() << LogPrefix << "This probably happened "
474                     << "because you specified a TTL threshold > 0 but resolving"
475                     << " had no effect on TTLs since external cache is only "
476                     << "updated when TTL=0 is reached.";
477         }
478         else
479         {
480             GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
481                 << ip_count << " (IPs may be outdated="
482                 << ContinueOnOutdatedIps << ") --> could ping now";
483             HostAnalyzer.set_resolved_ip_count( ip_count );
484         }
485         ping_when_ready();
486     }
487     else
488     {   // host name resolution failed; try again bypassing first outdated CNAME
489         // or using cached IP
490         std::string skip_host = Resolver->get_skip_cname();
491
492         if (skip_host.empty())
493         {   // try to continue with cached IPs
494             int ip_count = Resolver->get_resolved_ip_count(false);
495
496             if (ip_count == 0)
497                 GlobalLogger.notice() << LogPrefix << "DNS failed "
498                     << "and have no cached IPs either --> cannot ping";
499                 // ping_when_ready will deal with this case
500             else
501             {
502                 ContinueOnOutdatedIps = true;
503                 update_log_prefix();
504
505                 GlobalLogger.notice() << LogPrefix << "DNS failed, "
506                     << "try anyway with cached data";
507             }
508
509             GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
510                 << ip_count << " (IPs may be outdated=" << true << ")";
511             HostAnalyzer.set_resolved_ip_count( ip_count );
512
513             ping_when_ready();
514         }
515         else
516         {   // have CNAME to continue
517             ContinueOnOutdatedIps = true;
518             update_log_prefix();
519             GlobalLogger.notice() << LogPrefix << "DNS failed, "
520                 << "try again skipping a CNAME and resolving "
521                 << skip_host << " directly";
522
523             cancel_resolve(false);
524
525             // now create new resolver
526             Resolver = DnsMaster::get_instance()
527                                    ->get_resolver_for(skip_host, *ProtocolIter);
528             start_resolving_ping_address();
529         }
530     }
531 }
532
533 /**
534  * cancel resolver if force_cancel or if it is not resolving DestinationAddress
535  *
536  * Resolvers have a life on their own: they are cached by DnsMaster so never go
537  *   out of scope and even after calling callbacks, there might still be a
538  *   longterm timer active to re-try resolving.
539  * We want to cancel that long-term timer only if the Resolver is not for our
540  *   real, original DestinationAddress but a CNAME, which can happen when trying
541  *   to skip cnames and working on out-dated IPs
542  */
543 void PingScheduler::cancel_resolve(const bool force_cancel)
544 {
545     if (force_cancel)
546     {
547         GlobalLogger.info() << "Cancelling resolver (forced)";
548         Resolver->cancel_resolve();
549     }
550     else if ( Resolver->get_hostname() == DestinationAddress )
551         GlobalLogger.info() << LogPrefix
552                             << "Leave original resolver active in background";
553     else
554     {
555         GlobalLogger.info() << LogPrefix << "Cancel resolver for "
556             << Resolver->get_hostname() << " since is not the original "
557             << DestinationAddress;
558         Resolver->cancel_resolve();
559     }
560 }
561