created and passed first unit tests for DNS; finished recovery from PingScheduler...
[pingcheck] / src / host / pingscheduler.cpp
1 /*
2 The software in this package is distributed under the GNU General
3 Public License version 2 (with a special exception described below).
4
5 A copy of GNU General Public License (GPL) is included in this distribution,
6 in the file COPYING.GPL.
7
8 As a special exception, if other files instantiate templates or use macros
9 or inline functions from this file, or you compile this file and link it
10 with other works to produce a work based on this file, this file
11 does not by itself cause the resulting work to be covered
12 by the GNU General Public License.
13
14 However the source code for this file must still be made available
15 in accordance with section (3) of the GNU General Public License.
16
17 This exception does not invalidate any other reasons why a work based
18 on this file might be covered by the GNU General Public License.
19 */
20 #include "host/pingscheduler.h"
21
22 #include <iostream>
23 #include <limits>
24
25 #include <boost/bind.hpp>
26 #include <boost/foreach.hpp>
27
28 #include <logfunc.hpp>
29
30 #include "boost_assert_handler.h"
31 #include "host/pingerfactory.h"
32 #include "dns/dnsmaster.h"
33 #include "icmp/icmppinger.h"
34 #include "link/linkstatus.h"
35
36 using namespace std;
37 using boost::asio::io_service;
38 using boost::bind;
39 using boost::date_time::time_resolution_traits_adapted64_impl;
40 using boost::posix_time::microsec_clock;
41 using boost::posix_time::ptime;
42 using boost::posix_time::seconds;
43 using boost::shared_ptr;
44 using I2n::Logger::GlobalLogger;
45
46 //-----------------------------------------------------------------------------
47 // PingScheduler
48 //-----------------------------------------------------------------------------
49
50 /**
51  * @brief Parameterized constructor.
52  *
53  * @param io_serv The one @c io_serv object that controls async processing
54  * @param network_interface The name of the network interface originating the pings.
55  * @param destination_address The remote address to ping.
56  * @param destination_port The remote port to ping.
57  * @param ping_protocol_list A list of protocols to use.
58  * @param ping_interval_in_sec Amount of time between each ping.
59  * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
60  * @param link_analyzer The object to monitor the link status.
61  * @param first_delay Delay in seconds from start_pinging to first ping attempt
62  */
63 PingScheduler::PingScheduler(
64         const IoServiceItem io_serv,
65         const string &network_interface,
66         const string &destination_address,
67         const uint16_t destination_port,
68         const PingProtocolList &ping_protocol_list,
69         const long ping_interval_in_sec,
70         const int ping_fail_percentage_limit,
71         const int ping_reply_timeout,
72         LinkStatusItem link_analyzer,
73         const int first_delay
74
75 ) :
76     IoService( io_serv ),
77     NetworkInterfaceName( network_interface ),
78     DestinationAddress( destination_address ),
79     DestinationPort( destination_port ),
80     Protocols( ping_protocol_list ),
81     ProtocolIter(),
82     PingIntervalInSec( ping_interval_in_sec ),
83     FirstDelay( first_delay ),
84     NextPingTimer( *io_serv ),
85     TimeSentLastPing( microsec_clock::universal_time() ),
86     PingReplyTimeout( ping_reply_timeout ),
87     HostAnalyzer( destination_address, ping_fail_percentage_limit, link_analyzer ),
88     Resolver(),
89     Ping(),
90     WantToPing( false ),
91     LogPrefix(),
92     ContinueOnOutdatedIps( false )
93 {
94     BOOST_ASSERT( !network_interface.empty() );
95     BOOST_ASSERT( !destination_address.empty() );
96     BOOST_ASSERT( ( 0 < destination_port ) &&
97                   ( destination_port < numeric_limits<uint16_t>::max() ) );
98     BOOST_ASSERT( 0 < ping_interval_in_sec );
99     BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
100                   ( ping_fail_percentage_limit <= 100) );
101
102     update_log_prefix();
103
104     init_ping_protocol();
105 }
106
107 /**
108  * @brief Destructor.
109  */
110 PingScheduler::~PingScheduler()
111 {
112 }
113
114 void PingScheduler::stop_pinging()
115 {
116     // stop pinger, which will probably call ping_done_handler --> re-new NextPingTimer
117     GlobalLogger.debug() << "scheduler: stop pinging" << endl;
118     Ping->stop_pinging();
119     Resolver->cancel_resolve();
120
121     // now cancel the own timer
122     GlobalLogger.debug() << "scheduler: cancel timer" << endl;
123     NextPingTimer.cancel();
124 }
125
126 /**
127  * @brief Start into infinite loop of calls to ping
128  *
129  * Does not start yet but set NextPingTimer (possibly to 0), so action starts
130  *   when io_service is started
131  */
132 void PingScheduler::start_pinging()
133 {
134     if ( FirstDelay > 0 )
135         GlobalLogger.info() << "Delaying first ping by " << FirstDelay << "s";
136     else
137         GlobalLogger.info() << "Schedule ping as soon as possible";
138
139     (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
140     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
141                                           boost::asio::placeholders::error ) );
142 }
143
144
145 /**
146  * @brief call Ping::ping and schedule a call to ping_done_handler when finished
147  */
148 void PingScheduler::ping(const boost::system::error_code &error)
149 {
150     if ( error )
151     {   // get here, e.g. by NextPingTimer.cancel in stop_pinging
152         if ( error ==  boost::asio::error::operation_aborted )
153             GlobalLogger.error() << "Timer for ping was cancelled! "
154                                  << "Stopping" << endl;
155         else
156             GlobalLogger.error() << "Received error " << error
157                                  << " waiting for ping! Stopping"
158                                  << endl;
159         return;
160     }
161
162     // ping as soon as dns is ready
163     WantToPing = true;
164     ping_when_ready();
165 }
166
167
168 void PingScheduler::ping_when_ready()
169 {
170     if ( !WantToPing )
171     {
172         GlobalLogger.info() << "PingScheduler: not pinging (not requested to)";
173         return;
174     }
175     else if ( Resolver && Resolver->is_resolving() )
176     {
177         GlobalLogger.info() << "PingScheduler: not pinging (DNS not finished)";
178         return;
179     }
180     else if ( !Resolver )
181         // should not happen, but check anyway
182         GlobalLogger.warning() << LogPrefix << "Have no resolver!";
183
184     GlobalLogger.info() << "PingScheduler: start ping";
185     WantToPing = false;
186
187     // try to get an up-to-date IP
188     HostAddress ip = Resolver->get_next_ip();
189
190     if ( !ip.is_valid() && ContinueOnOutdatedIps)
191     {   // we failed to resolve --> try to use outdated IP
192         GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
193         bool check_up_to_date = false;
194         ip = Resolver->get_next_ip(check_up_to_date);
195     }
196     if ( ip.is_valid() )
197         Ping->ping(
198                 Resolver->get_next_ip().get_ip(),
199                 DestinationPort,
200                 boost::bind(&PingScheduler::ping_done_handler, this, _1)
201         );
202     else
203     {   // should not happen
204         GlobalLogger.error() << LogPrefix << "No IP to ping "
205                              << "-- this should not have happened!!";
206         WantToPing = true;
207         if ( !Resolver->is_resolving() )
208             start_resolving_ping_address();
209     }
210
211     // next time try with up-to-date IP
212     ContinueOnOutdatedIps = false;
213 }
214
215
216 //------------------------------------------------------------------------------
217 // Post Processing of Ping result
218 //------------------------------------------------------------------------------
219
220 /**
221  * @brief called when Ping::ping is done; calls functions to update
222  *   statistics, ping interval and elapsed time;
223  *   schedules a call to ping, thereby closing the loop
224  */
225 void PingScheduler::ping_done_handler( const bool ping_success )
226 {
227     // post-processing
228     // You must call these 3 methods exactly in this order
229     // TODO Fix this method, once it has a semantic dependency with the
230     // update_ping_statistics method, because it depends on the PingAnalyzer
231     // statistics to update the exceeded_ping_failed_limit
232     HostAnalyzer.update_ping_statistics( ping_success );
233     update_ping_interval();
234     update_ping_elapsed_time();
235
236     // get next protocol, possibly start resolving IPs
237     update_ping_protocol();
238
239     // schedule next ping
240     (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec ) );
241     NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
242                                          boost::asio::placeholders::error ) );
243 }
244
245 void PingScheduler::update_ping_interval()
246 {
247     // have to ping more often?
248     if ( HostAnalyzer.exceeded_ping_failed_limit() )
249     {
250         PingIntervalInSec.speed_up();
251
252         GlobalLogger.debug() << "- Speeding up ping interval to: " << PingIntervalInSec << "s"
253                 << endl;
254     }
255     else
256     {
257         PingIntervalInSec.back_to_original();
258
259         GlobalLogger.debug() << "- Stick to the original ping interval: " << PingIntervalInSec << "s"
260                 << endl;
261     }
262 }
263
264 void PingScheduler::update_ping_elapsed_time()
265 {
266     ptime now = microsec_clock::universal_time();
267     time_resolution_traits_adapted64_impl::int_type elapsed_time_in_sec =
268             (now - TimeSentLastPing).total_seconds();
269     GlobalLogger.debug() << "- Time elapsed since last ping: " << elapsed_time_in_sec << "s" << endl;
270
271     TimeSentLastPing = microsec_clock::universal_time();
272 }
273
274
275 //------------------------------------------------------------------------------
276 // Ping Protocol Rotation
277 //------------------------------------------------------------------------------
278
279 void PingScheduler::init_ping_protocol()
280 {
281     ProtocolIter = Protocols.end();
282     get_next_ping_protocol();
283 }
284
285 void PingScheduler::update_ping_protocol()
286 {
287     if ( can_change_ping_protocol() )
288     {
289         get_next_ping_protocol();
290     }
291 }
292
293 void PingScheduler::get_next_ping_protocol()
294 {
295     ++ProtocolIter;
296     if (ProtocolIter == Protocols.end())
297         ProtocolIter = Protocols.begin();
298     PingProtocol ping_protocol = *ProtocolIter;
299     // --> ProtocolIter still points to currently used protocol which is 
300     //     required in dns_resolve_callback
301
302     if (Ping)
303         Ping->stop_pinging();
304
305     Ping = PingerFactory::createPinger(ping_protocol, IoService,
306                                        NetworkInterfaceName, PingReplyTimeout);
307
308     update_dns_resolver( ping_protocol );
309 }
310
311 bool PingScheduler::can_change_ping_protocol() const
312 {
313     // TODO can_change_ping_protocol() and get_next_ping_protocol() may be implemented in a Algorithm
314     // class that can be exchanged in this class to provide an algorithm neutral class
315     return true;
316 }
317
318 //------------------------------------------------------------------------------
319 // DNS host name resolution
320 //------------------------------------------------------------------------------
321
322 // show "!" after host name if running on outdated IPs
323 void PingScheduler::update_log_prefix()
324 {
325     std::stringstream temp;
326     temp << "PS(" << DestinationAddress;
327     if (ContinueOnOutdatedIps)
328         temp << "!";
329     temp << "): ";
330     LogPrefix = temp.str();
331 }
332
333 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
334 {
335     if (Resolver && Resolver->is_resolving())
336     {
337         GlobalLogger.warning() << "Resolver still seems to be resolving "
338                                << "--> cancel!";
339         Resolver->cancel_resolve();
340     }
341
342     // DNS master caches created resolvers and resolved IPs, so this will
343     // probably just return an existing resolver with already resolved IPs for
344     // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
345     Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
346                                                            current_protocol);
347     // start resolving if no ips available
348     if ( Resolver->have_up_to_date_ip() )
349     {
350         if (!Resolver->is_resolving())
351             GlobalLogger.warning() << "PingScheduler: have up to date IPs but "
352                 << "resolver seems to be resolving all the same... "
353                 << "Start pinging anyway!";
354         ping_when_ready();
355     }
356     else
357         start_resolving_ping_address();
358 }
359
360 void PingScheduler::start_resolving_ping_address()
361 {
362     Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
363                                           this, _1, _2) );
364 }
365
366 void PingScheduler::dns_resolve_callback(const bool was_success,
367                                          const int cname_count)
368 {
369     GlobalLogger.info() << "PingScheduler: dns resolution finished "
370                         << "with success = " << was_success << " "
371                         << "and cname_count = " << cname_count;
372
373     // TODO this is too simple, but need to think more about how to update here!
374     // (may have to switch back some time to resolver for original host or so
375     ContinueOnOutdatedIps = !was_success;
376     update_log_prefix();
377
378     if ( was_success )
379     {
380         HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count());
381         ping_when_ready();
382     }
383     else
384     {   // host name resolution failed; try again bypassing first outdated CNAME
385         // or using cached IP
386
387         std::string skip_host = Resolver->get_skip_cname();
388
389         if (skip_host.empty())
390         {   // continue with IP
391             GlobalLogger.notice() << LogPrefix << "DNS failed, "
392                 << "try anyway with cached data";
393             HostAnalyzer.set_resolved_ip_count(0);
394             ping_when_ready();
395         }
396         else
397         {   // have CNAME to continue
398             GlobalLogger.notice() << LogPrefix << "DNS failed, "
399                 << "try again skipping a CNAME and resolving "
400                 << skip_host << " directly";
401             Resolver = DnsMaster::get_instance()
402                                    ->get_resolver_for(skip_host, *ProtocolIter);
403             start_resolving_ping_address();
404
405             // (the original resolver is still alive and cached by DnsMaster and
406             //  counting down time to re-try on its own until cancel_resolve)
407         }
408     }
409 }