b38271afbee96231f1f018c83821ee51c7606dac
[pingcheck] / src / host / pingscheduler.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/pingscheduler.h"
21
22 #include <iostream>
23 #include <limits>
24
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
27
28 #include <logfunc.hpp>
29
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
35
36 using namespace std;
37 using boost::asio::io_service;
38 using boost::bind;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::shared_ptr;
44 using I2n::Logger::GlobalLogger;
45
46 //-----------------------------------------------------------------------------
47 // PingScheduler
48 //-----------------------------------------------------------------------------
49
50 /**
51  * @brief Parameterized constructor.
52  *
53  * @param io_serv The one @c io_serv object that controls async processing
54  * @param network_interface The name of the network interface sending the pings.
55  * @param destination_address The remote address to ping.
56  * @param destination_port The remote port to ping.
57  * @param ping_protocol_list A list of protocols to use.
58  * @param ping_interval_in_sec Amount of time between each ping.
59  * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
60  * @param link_analyzer The object to monitor the link status.
61  * @param first_delay Delay in seconds from start_pinging to first ping attempt
62  */
63 PingScheduler::PingScheduler(
64         const IoServiceItem io_serv,
65         const string &network_interface,
66         const string &destination_address,
67         const uint16_t destination_port,
68         const PingProtocolList &ping_protocol_list,
69         const long ping_interval_in_sec,
70         const int ping_fail_percentage_limit,
71         const int ping_reply_timeout,
72         LinkStatusItem link_analyzer,
73         const int first_delay
74
75 ) :
76     IoService( io_serv ),
77     NetworkInterfaceName( network_interface ),
78     DestinationAddress( destination_address ),
79     DestinationPort( destination_port ),
80     Protocols( ping_protocol_list ),
81     ProtocolIter(),
82     PingIntervalInSec( ping_interval_in_sec ),
83     FirstDelay( first_delay ),
84     NextPingTimer( *io_serv ),
85     TimeSentLastPing( microsec_clock::universal_time() ),
86     PingReplyTimeout( ping_reply_timeout ),
87     HostAnalyzer( destination_address, ping_fail_percentage_limit,
88                   link_analyzer ),
89     Resolver(),
90     Ping(),
91     WantToPing( false ),
92     LogPrefix(),
93     ContinueOnOutdatedIps( false )
94 {
95     BOOST_ASSERT( !network_interface.empty() );
96     BOOST_ASSERT( !destination_address.empty() );
97     BOOST_ASSERT( ( 0 < destination_port ) &&
98                   ( destination_port < numeric_limits<uint16_t>::max() ) );
99     BOOST_ASSERT( 0 < ping_interval_in_sec );
100     BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
101                   ( ping_fail_percentage_limit <= 100) );
102
103     update_log_prefix();
104
105     init_ping_protocol();
106 }
107
108 /**
109  * @brief Destructor.
110  */
111 PingScheduler::~PingScheduler()
112 {
113 }
114
115 void PingScheduler::stop_pinging()
116 {
117     // stop pinger and resolver
118     GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
119     Ping->stop_pinging();
120     cancel_resolve(true);
121
122     // now cancel the own timer in case that pinger cancelation called callback
123     GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
124     NextPingTimer.cancel();
125 }
126
127 /**
128  * @brief Start into infinite loop of calls to ping
129  *
130  * Does not start yet but set NextPingTimer (possibly to 0), so action starts
131  *   when io_service is started
132  */
133 void PingScheduler::start_pinging()
134 {
135     if ( FirstDelay > 0 )
136         GlobalLogger.info() << LogPrefix << "Delaying first ping by "
137                                          << FirstDelay << "s";
138     else
139         GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
140
141     (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
142     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
143                                           boost::asio::placeholders::error ) );
144 }
145
146
147 /**
148  * @brief call Ping::ping and schedule a call to ping_done_handler when finished
149  */
150 void PingScheduler::ping(const boost::system::error_code &error)
151 {
152     if ( error )
153     {   // get here, e.g. by NextPingTimer.cancel in stop_pinging
154         if ( error ==  boost::asio::error::operation_aborted )
155             GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
156                                  << " --> Stopping";
157         else
158             GlobalLogger.error() << LogPrefix << "Received error " << error
159                                  << " waiting for ping! Stopping";
160         return;
161     }
162
163     // ping as soon as dns is ready
164     WantToPing = true;
165     ping_when_ready();
166 }
167
168
169 void PingScheduler::ping_when_ready()
170 {
171     if ( !WantToPing )
172     {
173         GlobalLogger.info() << LogPrefix << "not pinging (not requested to)";
174         return;
175     }
176     else if ( Resolver && Resolver->is_resolving() )
177     {
178         GlobalLogger.info() << LogPrefix << "not pinging (DNS not finished)";
179         return;
180     }
181     else if ( !Resolver )
182         // should not happen, but check anyway
183         GlobalLogger.warning() << LogPrefix << "Have no resolver!";
184
185     GlobalLogger.info() << LogPrefix << "start ping";
186     WantToPing = false;
187
188     // try to get an up-to-date IP
189     HostAddress ip = Resolver->get_next_ip();
190
191     if ( !ip.is_valid() && ContinueOnOutdatedIps)
192     {   // we failed to resolve --> try to use outdated IP
193         GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
194         bool check_up_to_date = false;
195         ip = Resolver->get_next_ip(check_up_to_date);
196     }
197     if ( ip.is_valid() )
198         Ping->ping( ip.get_ip(),
199                     DestinationPort,
200                     boost::bind(&PingScheduler::ping_done_handler, this, _1) );
201     else
202     {   // should not happen
203         GlobalLogger.error() << LogPrefix << "No IP to ping "
204                              << "-- this should not have happened!!";
205         WantToPing = true;
206         if ( !Resolver->is_resolving() )
207             start_resolving_ping_address();
208     }
209 }
210
211
212 //------------------------------------------------------------------------------
213 // Post Processing of Ping result
214 //------------------------------------------------------------------------------
215
216 /**
217  * @brief called when Ping::ping is done; calls functions to update
218  *   statistics, ping interval and elapsed time;
219  *   schedules a call to ping, thereby closing the loop
220  */
221 void PingScheduler::ping_done_handler( const bool ping_success )
222 {
223     // post-processing
224     // You must call these 3 methods exactly in this order
225     // TODO Fix this method, once it has a semantic dependency with the
226     // update_ping_statistics method, because it depends on the PingAnalyzer
227     // statistics to update the exceeded_ping_failed_limit
228     HostAnalyzer.update_ping_statistics( ping_success );
229     update_ping_interval();
230     update_ping_elapsed_time();
231
232     if (ping_success)
233     {   // reset ContinueOnOutdatedIps
234         ContinueOnOutdatedIps = false;
235         update_log_prefix();
236     }
237
238     // get next protocol, possibly start resolving IPs
239     update_ping_protocol();
240
241     // schedule next ping
242     (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec ) );
243     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
244                                          boost::asio::placeholders::error ) );
245 }
246
247 void PingScheduler::update_ping_interval()
248 {
249     // have to ping more often?
250     if ( HostAnalyzer.exceeded_ping_failed_limit() )
251     {
252         PingIntervalInSec.speed_up();
253
254         GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
255                              << PingIntervalInSec << "s";
256     }
257     else
258     {
259         PingIntervalInSec.back_to_original();
260
261         GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
262                              << "interval: " << PingIntervalInSec << "s";
263     }
264 }
265
266 void PingScheduler::update_ping_elapsed_time()
267 {
268     ptime now = microsec_clock::universal_time();
269     time_resolution_traits_adapted64_impl::int_type elapsed_time_in_sec =
270             (now - TimeSentLastPing).total_seconds();
271     GlobalLogger.debug() << LogPrefix << "- Time elapsed since last ping: "
272                                       << elapsed_time_in_sec << "s";
273
274     TimeSentLastPing = microsec_clock::universal_time();
275 }
276
277
278 //------------------------------------------------------------------------------
279 // Ping Protocol Rotation
280 //------------------------------------------------------------------------------
281
282 void PingScheduler::init_ping_protocol()
283 {
284     ProtocolIter = Protocols.end();
285     get_next_ping_protocol();
286 }
287
288 void PingScheduler::update_ping_protocol()
289 {
290     if ( can_change_ping_protocol() )
291     {
292         get_next_ping_protocol();
293     }
294 }
295
296 void PingScheduler::get_next_ping_protocol()
297 {
298     ++ProtocolIter;
299     if (ProtocolIter == Protocols.end())
300         ProtocolIter = Protocols.begin();
301     PingProtocol ping_protocol = *ProtocolIter;
302     // --> ProtocolIter still points to currently used protocol which is
303     //     required in dns_resolve_callback
304
305     if (Ping)
306         Ping->stop_pinging();
307
308     Ping = PingerFactory::createPinger(ping_protocol, IoService,
309                                        NetworkInterfaceName, PingReplyTimeout);
310
311     update_dns_resolver( ping_protocol );
312
313 }
314
315 bool PingScheduler::can_change_ping_protocol() const
316 {
317     // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
318     // implemented in a Algorithm class that can be exchanged in this class to
319     // provide an algorithm neutral class
320     return true;
321 }
322
323 //------------------------------------------------------------------------------
324 // DNS host name resolution
325 //------------------------------------------------------------------------------
326
327 // show "!" after host name if running on outdated IPs
328 void PingScheduler::update_log_prefix()
329 {
330     std::stringstream temp;
331     temp << "Sched(" << DestinationAddress;
332     if (ContinueOnOutdatedIps)
333         temp << "!";
334     temp << "): ";
335     LogPrefix = temp.str();
336 }
337
338 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
339 {
340     if (Resolver && Resolver->is_resolving())
341         cancel_resolve(false);
342
343     if (ContinueOnOutdatedIps)
344     {
345         ContinueOnOutdatedIps = false;
346         update_log_prefix();
347     }
348
349     // DNS master caches created resolvers and resolved IPs, so this will
350     // probably just return an existing resolver with already resolved IPs for
351     // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
352     Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
353                                                            current_protocol);
354     // start resolving if no ips available
355     if ( Resolver->have_up_to_date_ip() )
356     {
357         if (Resolver->is_resolving())
358             GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
359                 << "resolver seems to be resolving all the same... "
360                 << "Start pinging anyway!";
361         ping_when_ready();
362     }
363     else
364         start_resolving_ping_address();
365 }
366
367 void PingScheduler::start_resolving_ping_address()
368 {
369     Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
370                                           this, _1, _2) );
371 }
372
373 void PingScheduler::dns_resolve_callback(const bool was_success,
374                                          const int cname_count)
375 {
376     GlobalLogger.info() << LogPrefix << "dns resolution finished "
377                         << "with success = " << was_success << " "
378                         << "and cname_count = " << cname_count;
379
380     if ( was_success )
381     {
382         HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count());
383         ping_when_ready();
384     }
385     else
386     {   // host name resolution failed; try again bypassing first outdated CNAME
387         // or using cached IP
388         ContinueOnOutdatedIps = true;
389         update_log_prefix();
390
391         std::string skip_host = Resolver->get_skip_cname();
392
393         if (skip_host.empty())
394         {   // continue with IP
395             GlobalLogger.notice() << LogPrefix << "DNS failed, "
396                 << "try anyway with cached data";
397             HostAnalyzer.set_resolved_ip_count(0);
398             ping_when_ready();
399         }
400         else
401         {   // have CNAME to continue
402             GlobalLogger.notice() << LogPrefix << "DNS failed, "
403                 << "try again skipping a CNAME and resolving "
404                 << skip_host << " directly";
405
406             cancel_resolve(false);
407
408             // now create new resolver
409             Resolver = DnsMaster::get_instance()
410                                    ->get_resolver_for(skip_host, *ProtocolIter);
411             start_resolving_ping_address();
412         }
413     }
414 }
415
416 /**
417  * cancel resolver if force_cancel or if it is not resolving DestinationAddress
418  *
419  * Resolvers have a life on their own: they are cached by DnsMaster so never go
420  *   out of scope and even after calling callbacks, there might still be a
421  *   longterm timer active to re-try resolving.
422  * We want to cancel that long-term timer only if the Resolver is not for our
423  *   real, original DestinationAddress but a CNAME, which can happen when trying
424  *   to skip cnames and working on out-dated IPs
425  */
426 void PingScheduler::cancel_resolve(const bool force_cancel)
427 {
428     if (force_cancel)
429     {
430         GlobalLogger.info() << "Cancelling resolver (forced)";
431         Resolver->cancel_resolve();
432     }
433     else if ( Resolver->get_hostname() == DestinationAddress )
434         GlobalLogger.info() << LogPrefix
435                             << "Leave original resolver active in background";
436     else
437     {
438         GlobalLogger.info() << LogPrefix << "Cancel resolver for "
439             << Resolver->get_hostname() << " since is not the original "
440             << DestinationAddress;
441         Resolver->cancel_resolve();
442     }
443 }
444