give HostStatus analyzer more info: details on ping success/failure and ping duration
[pingcheck] / src / host / pingscheduler.cpp
CommitLineData
91fcc471
TJ
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
8f66f529 20#include "host/pingscheduler.h"
9c55ecd3 21
0d46491b 22#include <iostream>
1309d0e4 23#include <limits>
0d46491b 24
9c55ecd3 25#include <boost/bind.hpp>
26b0f687 26#include <boost/foreach.hpp>
0d46491b 27
301610ca
GMF
28#include <logfunc.hpp>
29
780b0bca 30#include "boost_assert_handler.h"
086e2cc0 31#include "host/pingerfactory.h"
26b0f687 32#include "dns/dnsmaster.h"
51cbc790 33#include "icmp/icmppinger.h"
72e54d1c 34#include "link/linkstatus.h"
ced28dc7 35
a7c2eb51 36using namespace std;
2bf8720f
GMF
37using boost::asio::io_service;
38using boost::bind;
101be5ce 39using boost::date_time::time_resolution_traits_adapted64_impl;
2bf8720f
GMF
40using boost::posix_time::microsec_clock;
41using boost::posix_time::ptime;
42using boost::posix_time::seconds;
e58d7507 43using boost::shared_ptr;
301610ca 44using I2n::Logger::GlobalLogger;
a7c2eb51 45
4c2a5ab5 46//-----------------------------------------------------------------------------
4bb97b45 47// PingScheduler
4c2a5ab5
GMF
48//-----------------------------------------------------------------------------
49
086e2cc0
GMF
50/**
51 * @brief Parameterized constructor.
52 *
ab2cb1ef 53 * @param io_serv The one @c io_serv object that controls async processing
c1abff61 54 * @param network_interface The name of the network interface sending the pings.
086e2cc0
GMF
55 * @param destination_address The remote address to ping.
56 * @param destination_port The remote port to ping.
57 * @param ping_protocol_list A list of protocols to use.
58 * @param ping_interval_in_sec Amount of time between each ping.
59 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
086e2cc0 60 * @param link_analyzer The object to monitor the link status.
365036be 61 * @param first_delay Delay in seconds from start_pinging to first ping attempt
086e2cc0 62 */
4bb97b45 63PingScheduler::PingScheduler(
ab2cb1ef 64 const IoServiceItem io_serv,
2bf8720f
GMF
65 const string &network_interface,
66 const string &destination_address,
238da857 67 const uint16_t destination_port,
fe6a2f80 68 const PingProtocolList &ping_protocol_list,
c15a722d 69 const long ping_interval_in_sec,
a341119a 70 const int ping_fail_percentage_limit,
079d19ab 71 const int ping_reply_timeout,
59733431
CH
72 LinkStatusItem link_analyzer,
73 const int first_delay
c5e4bfa1 74
e39cc3da 75) :
23f51766
CH
76 IoService( io_serv ),
77 NetworkInterfaceName( network_interface ),
23f51766
CH
78 DestinationAddress( destination_address ),
79 DestinationPort( destination_port ),
26b0f687
CH
80 Protocols( ping_protocol_list ),
81 ProtocolIter(),
82 PingIntervalInSec( ping_interval_in_sec ),
83 FirstDelay( first_delay ),
ab2cb1ef 84 NextPingTimer( *io_serv ),
e39cc3da 85 TimeSentLastPing( microsec_clock::universal_time() ),
26b0f687 86 PingReplyTimeout( ping_reply_timeout ),
c1abff61
CH
87 HostAnalyzer( destination_address, ping_fail_percentage_limit,
88 link_analyzer ),
26b0f687
CH
89 Resolver(),
90 Ping(),
91 WantToPing( false ),
92 LogPrefix(),
8d26221d 93 ContinueOnOutdatedIps( false )
ced28dc7 94{
475ad07c
GMF
95 BOOST_ASSERT( !network_interface.empty() );
96 BOOST_ASSERT( !destination_address.empty() );
23f51766
CH
97 BOOST_ASSERT( ( 0 < destination_port ) &&
98 ( destination_port < numeric_limits<uint16_t>::max() ) );
f71cb7e1 99 BOOST_ASSERT( 0 < ping_interval_in_sec );
23f51766
CH
100 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
101 ( ping_fail_percentage_limit <= 100) );
102
26b0f687 103 update_log_prefix();
23f51766
CH
104
105 init_ping_protocol();
2d591235 106}
ced28dc7 107
086e2cc0
GMF
108/**
109 * @brief Destructor.
110 */
4bb97b45 111PingScheduler::~PingScheduler()
2d591235 112{
ced28dc7
GMF
113}
114
c1d776ba 115void PingScheduler::stop_pinging()
ced28dc7 116{
c1abff61
CH
117 // stop pinger and resolver
118 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
5a9bc2d1 119 Ping->stop_pinging();
72be9e7d 120 cancel_resolve(true);
f076f8d4 121
c1abff61
CH
122 // now cancel the own timer in case that pinger cancelation called callback
123 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
f076f8d4 124 NextPingTimer.cancel();
c1d776ba
CH
125}
126
127/**
23f51766 128 * @brief Start into infinite loop of calls to ping
cad0b08d
CH
129 *
130 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
131 * when io_service is started
c1d776ba
CH
132 */
133void PingScheduler::start_pinging()
134{
c1d776ba 135 if ( FirstDelay > 0 )
c1abff61
CH
136 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
137 << FirstDelay << "s";
59733431 138 else
c1abff61 139 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
cad0b08d
CH
140
141 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
23f51766 142 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
cad0b08d 143 boost::asio::placeholders::error ) );
09de3c4b
GMF
144}
145
4e91c69a 146
c1d776ba 147/**
23f51766 148 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
c1d776ba 149 */
23f51766 150void PingScheduler::ping(const boost::system::error_code &error)
823623d9 151{
d26dce11 152 if ( error )
f076f8d4 153 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
d26dce11 154 if ( error == boost::asio::error::operation_aborted )
c1abff61
CH
155 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
156 << " --> Stopping";
d26dce11 157 else
c1abff61
CH
158 GlobalLogger.error() << LogPrefix << "Received error " << error
159 << " waiting for ping! Stopping";
d26dce11
CH
160 return;
161 }
162
23f51766
CH
163 // ping as soon as dns is ready
164 WantToPing = true;
8d26221d 165 ping_when_ready();
23f51766
CH
166}
167
c1d776ba 168
8d26221d 169void PingScheduler::ping_when_ready()
23f51766
CH
170{
171 if ( !WantToPing )
823623d9 172 {
2a4dde8b 173 GlobalLogger.info() << LogPrefix << "waiting for ping request "
f8918bd5 174 << "(should take no more than than " << PingIntervalInSec << "s)";
23f51766 175 return;
823623d9 176 }
23f51766
CH
177 else if ( Resolver && Resolver->is_resolving() )
178 {
fd62d09f 179 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
23f51766
CH
180 return;
181 }
182 else if ( !Resolver )
183 // should not happen, but check anyway
184 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
09de3c4b 185
c1abff61 186 GlobalLogger.info() << LogPrefix << "start ping";
23f51766 187 WantToPing = false;
09de3c4b 188
fd62d09f
CH
189 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
190 // because a CNAME was out of date -- IPs may still be current)
26b0f687 191 HostAddress ip = Resolver->get_next_ip();
8d26221d 192
fd62d09f
CH
193 if ( !ip.is_valid() )
194 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
195 // or when ip went out of date between resolve and now
196 // --> try to use outdated IP
26b0f687
CH
197 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
198 bool check_up_to_date = false;
199 ip = Resolver->get_next_ip(check_up_to_date);
200 }
fd62d09f 201 if ( !ip.is_valid() )
b44a5f96
CH
202 { // Do not even have an outdated IP!
203 // This happens if have no cached IPs and resolve failed
204 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
205 << "-- treat like a failed ping.";
fd62d09f
CH
206
207 // skip the ping and directly call ping_done_handler
838e0acf
CH
208 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
209 // ping would create failed assumption (nPings > nIPs)
96c4e7a4 210 ping_done_handler(PingStatus_FailureNoIP);
838e0acf 211 HostAnalyzer.set_resolved_ip_count(0); // set back
fd62d09f
CH
212 }
213 else
214 {
215 uint32_t ttl = ip.get_ttl().get_updated_value();
2a4dde8b
CH
216 std::string expiry;
217 if (ttl == 0)
218 expiry = "out of date!";
219 else
220 {
221 boost::posix_time::ptime now =
fd62d09f 222 boost::posix_time::second_clock::local_time();
2a4dde8b
CH
223 expiry = boost::posix_time::to_simple_string(now + seconds(ttl));
224 }
fd62d09f
CH
225
226 GlobalLogger.info() << LogPrefix << "pinging IP " << ip.get_ip()
2a4dde8b 227 << " with TTL " << ttl << "s (" << expiry << ")";
72be9e7d
CH
228 Ping->ping( ip.get_ip(),
229 DestinationPort,
230 boost::bind(&PingScheduler::ping_done_handler, this, _1) );
96c4e7a4 231 TimeSentLastPing = microsec_clock::universal_time();
26b0f687 232 }
3f6ba924
CH
233}
234
166fd9e9 235
23f51766
CH
236//------------------------------------------------------------------------------
237// Post Processing of Ping result
238//------------------------------------------------------------------------------
e58d7507 239
c1d776ba
CH
240/**
241 * @brief called when Ping::ping is done; calls functions to update
242 * statistics, ping interval and elapsed time;
23f51766 243 * schedules a call to ping, thereby closing the loop
c1d776ba 244 */
96c4e7a4 245void PingScheduler::ping_done_handler( const PingStatus &result )
502b6af0 246{
96c4e7a4
CH
247 PingStatus edited_result = result;
248 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
249 {
250 edited_result = PingStatus_SuccessOutdatedIP;
251
252 // reset ContinueOnOutdatedIps
253 ContinueOnOutdatedIps = false;
254 update_log_prefix();
255 }
256
257 GlobalLogger.info() << LogPrefix << "Ping done with result "
258 << to_string(edited_result);
f8918bd5 259
c1d776ba 260 // post-processing
079d19ab
CH
261 // You must call these 3 methods exactly in this order
262 // TODO Fix this method, once it has a semantic dependency with the
c1d776ba 263 // update_ping_statistics method, because it depends on the PingAnalyzer
a341119a 264 // statistics to update the exceeded_ping_failed_limit
96c4e7a4
CH
265 ptime now = microsec_clock::universal_time();
266 HostAnalyzer.update_ping_statistics( edited_result,
267 (now - TimeSentLastPing).total_microseconds());
d8a91bd6 268 update_ping_interval();
72be9e7d 269
23f51766
CH
270 // get next protocol, possibly start resolving IPs
271 update_ping_protocol();
272
c1d776ba
CH
273 // schedule next ping
274 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec ) );
23f51766 275 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
d26dce11 276 boost::asio::placeholders::error ) );
d8a91bd6
GMF
277}
278
279void PingScheduler::update_ping_interval()
280{
c1d776ba 281 // have to ping more often?
fb469ffa 282 if ( HostAnalyzer.exceeded_ping_failed_limit() )
d8a91bd6
GMF
283 {
284 PingIntervalInSec.speed_up();
285
c1abff61
CH
286 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
287 << PingIntervalInSec << "s";
d8a91bd6
GMF
288 }
289 else
290 {
291 PingIntervalInSec.back_to_original();
292
c1abff61
CH
293 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
294 << "interval: " << PingIntervalInSec << "s";
d8a91bd6 295 }
ced28dc7
GMF
296}
297
23f51766
CH
298//------------------------------------------------------------------------------
299// Ping Protocol Rotation
300//------------------------------------------------------------------------------
301
26b0f687 302void PingScheduler::init_ping_protocol()
23f51766 303{
26b0f687 304 ProtocolIter = Protocols.end();
23f51766
CH
305 get_next_ping_protocol();
306}
307
26b0f687 308void PingScheduler::update_ping_protocol()
23f51766
CH
309{
310 if ( can_change_ping_protocol() )
311 {
312 get_next_ping_protocol();
313 }
314}
315
26b0f687 316void PingScheduler::get_next_ping_protocol()
23f51766 317{
8f00b3df
CH
318 if (Ping)
319 {
320 Ping->stop_pinging();
321 Ping.reset();
322 }
323
fd62d09f
CH
324 GlobalLogger.debug() << LogPrefix
325 << "------------------------------------------------------------------";
26b0f687
CH
326 ++ProtocolIter;
327 if (ProtocolIter == Protocols.end())
328 ProtocolIter = Protocols.begin();
329 PingProtocol ping_protocol = *ProtocolIter;
72be9e7d 330 // --> ProtocolIter still points to currently used protocol which is
26b0f687 331 // required in dns_resolve_callback
23f51766 332
23f51766
CH
333 Ping = PingerFactory::createPinger(ping_protocol, IoService,
334 NetworkInterfaceName, PingReplyTimeout);
335
336 update_dns_resolver( ping_protocol );
72be9e7d 337
23f51766
CH
338}
339
26b0f687 340bool PingScheduler::can_change_ping_protocol() const
23f51766 341{
c1abff61
CH
342 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
343 // implemented in a Algorithm class that can be exchanged in this class to
344 // provide an algorithm neutral class
23f51766
CH
345 return true;
346}
347
348//------------------------------------------------------------------------------
349// DNS host name resolution
350//------------------------------------------------------------------------------
26b0f687
CH
351
352// show "!" after host name if running on outdated IPs
8d26221d 353void PingScheduler::update_log_prefix()
26b0f687
CH
354{
355 std::stringstream temp;
72be9e7d 356 temp << "Sched(" << DestinationAddress;
26b0f687
CH
357 if (ContinueOnOutdatedIps)
358 temp << "!";
359 temp << "): ";
360 LogPrefix = temp.str();
361}
362
363void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
23f51766
CH
364{
365 if (Resolver && Resolver->is_resolving())
72be9e7d
CH
366 cancel_resolve(false);
367
368 if (ContinueOnOutdatedIps)
23f51766 369 {
72be9e7d
CH
370 ContinueOnOutdatedIps = false;
371 update_log_prefix();
23f51766
CH
372 }
373
374 // DNS master caches created resolvers and resolved IPs, so this will
375 // probably just return an existing resolver with already resolved IPs for
376 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
377 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
378 current_protocol);
fd62d09f
CH
379
380 // get number of up-to-date IPs
2a4dde8b 381 // TODO should check here, if they will be up to date in PingIntervalInSec
fd62d09f
CH
382 bool check_up_to_date = true;
383 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
384 if (ip_count > 0)
23f51766 385 {
fd62d09f 386 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5 387 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
fd62d09f
CH
388 HostAnalyzer.set_resolved_ip_count( ip_count );
389
72be9e7d 390 if (Resolver->is_resolving())
c1abff61 391 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
23f51766
CH
392 << "resolver seems to be resolving all the same... "
393 << "Start pinging anyway!";
8d26221d 394 ping_when_ready();
23f51766
CH
395 }
396 else
2a4dde8b
CH
397 {
398 GlobalLogger.info() << LogPrefix
399 << "No up-to-date IPs --> start resolve";
23f51766 400 start_resolving_ping_address();
2a4dde8b
CH
401 // set resolved_ip_count will be called in resolve callback
402 }
23f51766
CH
403}
404
26b0f687 405void PingScheduler::start_resolving_ping_address()
23f51766 406{
26b0f687 407 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
23f51766
CH
408 this, _1, _2) );
409}
410
26b0f687 411void PingScheduler::dns_resolve_callback(const bool was_success,
cd71d095 412 const int recursion_count)
23f51766 413{
c1abff61 414 GlobalLogger.info() << LogPrefix << "dns resolution finished "
23f51766 415 << "with success = " << was_success << " "
cd71d095 416 << "after " << recursion_count << " recursions";
23f51766 417
26b0f687
CH
418 if ( was_success )
419 {
fd62d09f
CH
420 // trust that a successfull DNS resolve means we have an IP with TTL>0
421 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
422 if (ip_count == 0)
423 { // this will create trouble in HostAnalyzer
424 GlobalLogger.warning() << LogPrefix
425 << "Should not have reached this case: resolve was "
426 << "successfull but still have no IPs (up-to-date="
427 << !ContinueOnOutdatedIps << ")!";
428 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
429 GlobalLogger.warning() << LogPrefix << "This probably happened "
430 << "because you specified a TTL threshold > 0 but resolving"
431 << " had no effect on TTLs since external cache is only "
432 << "updated when TTL=0 is reached.";
433 }
434 else
435 {
436 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5
CH
437 << ip_count << " (IPs may be outdated="
438 << ContinueOnOutdatedIps << ") --> could ping now";
fd62d09f
CH
439 HostAnalyzer.set_resolved_ip_count( ip_count );
440 }
8d26221d 441 ping_when_ready();
26b0f687
CH
442 }
443 else
444 { // host name resolution failed; try again bypassing first outdated CNAME
8d26221d 445 // or using cached IP
26b0f687 446 std::string skip_host = Resolver->get_skip_cname();
23f51766
CH
447
448 if (skip_host.empty())
838e0acf 449 { // try to continue with cached IPs
fd62d09f 450 int ip_count = Resolver->get_resolved_ip_count(false);
838e0acf
CH
451
452 if (ip_count == 0)
453 GlobalLogger.notice() << LogPrefix << "DNS failed "
454 << "and have no cached IPs either --> cannot ping";
455 // ping_when_ready will deal with this case
456 else
457 {
458 ContinueOnOutdatedIps = true;
459 update_log_prefix();
460
461 GlobalLogger.notice() << LogPrefix << "DNS failed, "
462 << "try anyway with cached data";
463 }
464
fd62d09f 465 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5 466 << ip_count << " (IPs may be outdated=" << true << ")";
fd62d09f
CH
467 HostAnalyzer.set_resolved_ip_count( ip_count );
468
8d26221d 469 ping_when_ready();
23f51766
CH
470 }
471 else
8d26221d 472 { // have CNAME to continue
838e0acf
CH
473 ContinueOnOutdatedIps = true;
474 update_log_prefix();
23f51766
CH
475 GlobalLogger.notice() << LogPrefix << "DNS failed, "
476 << "try again skipping a CNAME and resolving "
477 << skip_host << " directly";
72be9e7d
CH
478
479 cancel_resolve(false);
480
481 // now create new resolver
23f51766 482 Resolver = DnsMaster::get_instance()
26b0f687 483 ->get_resolver_for(skip_host, *ProtocolIter);
23f51766
CH
484 start_resolving_ping_address();
485 }
486 }
23f51766 487}
72be9e7d
CH
488
489/**
490 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
491 *
492 * Resolvers have a life on their own: they are cached by DnsMaster so never go
493 * out of scope and even after calling callbacks, there might still be a
494 * longterm timer active to re-try resolving.
495 * We want to cancel that long-term timer only if the Resolver is not for our
496 * real, original DestinationAddress but a CNAME, which can happen when trying
497 * to skip cnames and working on out-dated IPs
498 */
499void PingScheduler::cancel_resolve(const bool force_cancel)
500{
501 if (force_cancel)
502 {
503 GlobalLogger.info() << "Cancelling resolver (forced)";
504 Resolver->cancel_resolve();
505 }
506 else if ( Resolver->get_hostname() == DestinationAddress )
507 GlobalLogger.info() << LogPrefix
508 << "Leave original resolver active in background";
509 else
510 {
511 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
512 << Resolver->get_hostname() << " since is not the original "
513 << DestinationAddress;
514 Resolver->cancel_resolve();
515 }
516}
517