moved time duration measurement of ping from scheduler to pingers
[pingcheck] / src / host / pingscheduler.cpp
CommitLineData
91fcc471
TJ
1/*
2The software in this package is distributed under the GNU General
3Public License version 2 (with a special exception described below).
4
5A copy of GNU General Public License (GPL) is included in this distribution,
6in the file COPYING.GPL.
7
8As a special exception, if other files instantiate templates or use macros
9or inline functions from this file, or you compile this file and link it
10with other works to produce a work based on this file, this file
11does not by itself cause the resulting work to be covered
12by the GNU General Public License.
13
14However the source code for this file must still be made available
15in accordance with section (3) of the GNU General Public License.
16
17This exception does not invalidate any other reasons why a work based
18on this file might be covered by the GNU General Public License.
19*/
8f66f529 20#include "host/pingscheduler.h"
9c55ecd3 21
0d46491b 22#include <iostream>
1309d0e4 23#include <limits>
0d46491b 24
9c55ecd3 25#include <boost/bind.hpp>
26b0f687 26#include <boost/foreach.hpp>
0d46491b 27
301610ca
GMF
28#include <logfunc.hpp>
29
780b0bca 30#include "boost_assert_handler.h"
086e2cc0 31#include "host/pingerfactory.h"
26b0f687 32#include "dns/dnsmaster.h"
51cbc790 33#include "icmp/icmppinger.h"
72e54d1c 34#include "link/linkstatus.h"
ced28dc7 35
a7c2eb51 36using namespace std;
2bf8720f
GMF
37using boost::asio::io_service;
38using boost::bind;
101be5ce 39using boost::date_time::time_resolution_traits_adapted64_impl;
2bf8720f
GMF
40using boost::posix_time::microsec_clock;
41using boost::posix_time::ptime;
42using boost::posix_time::seconds;
e58d7507 43using boost::shared_ptr;
301610ca 44using I2n::Logger::GlobalLogger;
a7c2eb51 45
4c2a5ab5 46//-----------------------------------------------------------------------------
4bb97b45 47// PingScheduler
4c2a5ab5
GMF
48//-----------------------------------------------------------------------------
49
086e2cc0
GMF
50/**
51 * @brief Parameterized constructor.
52 *
ab2cb1ef 53 * @param io_serv The one @c io_serv object that controls async processing
c1abff61 54 * @param network_interface The name of the network interface sending the pings.
086e2cc0
GMF
55 * @param destination_address The remote address to ping.
56 * @param destination_port The remote port to ping.
57 * @param ping_protocol_list A list of protocols to use.
58 * @param ping_interval_in_sec Amount of time between each ping.
59 * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
91aa83f9 60 * @param ping_reply_timeout Max amount time to wait for ping to finish
086e2cc0 61 * @param link_analyzer The object to monitor the link status.
365036be 62 * @param first_delay Delay in seconds from start_pinging to first ping attempt
91aa83f9 63 * @param n_parallel_pings: Number of pingers to ping the same IP in parallel
086e2cc0 64 */
4bb97b45 65PingScheduler::PingScheduler(
ab2cb1ef 66 const IoServiceItem io_serv,
2bf8720f
GMF
67 const string &network_interface,
68 const string &destination_address,
238da857 69 const uint16_t destination_port,
fe6a2f80 70 const PingProtocolList &ping_protocol_list,
c15a722d 71 const long ping_interval_in_sec,
a341119a 72 const int ping_fail_percentage_limit,
079d19ab 73 const int ping_reply_timeout,
59733431 74 LinkStatusItem link_analyzer,
91aa83f9
CH
75 const int first_delay,
76 const int n_parallel_pings
77 const int parallel_ping_delay
e39cc3da 78) :
23f51766
CH
79 IoService( io_serv ),
80 NetworkInterfaceName( network_interface ),
23f51766
CH
81 DestinationAddress( destination_address ),
82 DestinationPort( destination_port ),
26b0f687
CH
83 Protocols( ping_protocol_list ),
84 ProtocolIter(),
85 PingIntervalInSec( ping_interval_in_sec ),
86 FirstDelay( first_delay ),
ab2cb1ef 87 NextPingTimer( *io_serv ),
e39cc3da 88 TimeSentLastPing( microsec_clock::universal_time() ),
26b0f687 89 PingReplyTimeout( ping_reply_timeout ),
c1abff61 90 HostAnalyzer( destination_address, ping_fail_percentage_limit,
91aa83f9 91 n_parallel_pings, link_analyzer ),
26b0f687 92 Resolver(),
91aa83f9
CH
93 Pingers(),
94 NPingers( n_parallel_pings ),
95 NPingersDone( 0 ),
96 ParallelPingDelay( parallel_ping_delay ),
26b0f687
CH
97 WantToPing( false ),
98 LogPrefix(),
8d26221d 99 ContinueOnOutdatedIps( false )
ced28dc7 100{
475ad07c
GMF
101 BOOST_ASSERT( !network_interface.empty() );
102 BOOST_ASSERT( !destination_address.empty() );
23f51766
CH
103 BOOST_ASSERT( ( 0 < destination_port ) &&
104 ( destination_port < numeric_limits<uint16_t>::max() ) );
f71cb7e1 105 BOOST_ASSERT( 0 < ping_interval_in_sec );
23f51766
CH
106 BOOST_ASSERT( (0 <= ping_fail_percentage_limit) &&
107 ( ping_fail_percentage_limit <= 100) );
108
26b0f687 109 update_log_prefix();
23f51766
CH
110
111 init_ping_protocol();
2d591235 112}
ced28dc7 113
086e2cc0
GMF
114/**
115 * @brief Destructor.
116 */
4bb97b45 117PingScheduler::~PingScheduler()
2d591235 118{
ced28dc7
GMF
119}
120
c1d776ba 121void PingScheduler::stop_pinging()
ced28dc7 122{
c1abff61
CH
123 // stop pinger and resolver
124 GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
91aa83f9 125 clear_pingers();
72be9e7d 126 cancel_resolve(true);
f076f8d4 127
c1abff61
CH
128 // now cancel the own timer in case that pinger cancelation called callback
129 GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
f076f8d4 130 NextPingTimer.cancel();
c1d776ba
CH
131}
132
133/**
91aa83f9
CH
134 * @brief stop all pingers and remove them from Pingers variable which will
135 * proboably cause their destruction
136 *
137 * Pingers is empty afterwards
138 */
139void PingScheduler::clear_pingers()
140{
141 PingerItem pinger;
142 while ( !Pingers.empty() )
143 {
144 pinger = Pingers.front();
145 pinger->stop_pinging();
146 Pingers.pop_front();
147 }
148}
149
150/**
23f51766 151 * @brief Start into infinite loop of calls to ping
cad0b08d
CH
152 *
153 * Does not start yet but set NextPingTimer (possibly to 0), so action starts
154 * when io_service is started
c1d776ba
CH
155 */
156void PingScheduler::start_pinging()
157{
c1d776ba 158 if ( FirstDelay > 0 )
c1abff61
CH
159 GlobalLogger.info() << LogPrefix << "Delaying first ping by "
160 << FirstDelay << "s";
59733431 161 else
c1abff61 162 GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible";
cad0b08d
CH
163
164 (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
23f51766 165 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
cad0b08d 166 boost::asio::placeholders::error ) );
09de3c4b
GMF
167}
168
4e91c69a 169
c1d776ba 170/**
23f51766 171 * @brief call Ping::ping and schedule a call to ping_done_handler when finished
c1d776ba 172 */
23f51766 173void PingScheduler::ping(const boost::system::error_code &error)
823623d9 174{
d26dce11 175 if ( error )
f076f8d4 176 { // get here, e.g. by NextPingTimer.cancel in stop_pinging
d26dce11 177 if ( error == boost::asio::error::operation_aborted )
c1abff61
CH
178 GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!"
179 << " --> Stopping";
d26dce11 180 else
c1abff61
CH
181 GlobalLogger.error() << LogPrefix << "Received error " << error
182 << " waiting for ping! Stopping";
d26dce11
CH
183 return;
184 }
185
23f51766
CH
186 // ping as soon as dns is ready
187 WantToPing = true;
8d26221d 188 ping_when_ready();
23f51766
CH
189}
190
c1d776ba 191
8d26221d 192void PingScheduler::ping_when_ready()
23f51766
CH
193{
194 if ( !WantToPing )
823623d9 195 {
2a4dde8b 196 GlobalLogger.info() << LogPrefix << "waiting for ping request "
91aa83f9 197 << "(should take no more than " << PingIntervalInSec << "s)";
23f51766 198 return;
823623d9 199 }
23f51766
CH
200 else if ( Resolver && Resolver->is_resolving() )
201 {
fd62d09f 202 GlobalLogger.info() << LogPrefix << "waiting for DNS to finish";
23f51766
CH
203 return;
204 }
205 else if ( !Resolver )
206 // should not happen, but check anyway
207 GlobalLogger.warning() << LogPrefix << "Have no resolver!";
09de3c4b 208
c1abff61 209 GlobalLogger.info() << LogPrefix << "start ping";
23f51766 210 WantToPing = false;
09de3c4b 211
fd62d09f
CH
212 // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set
213 // because a CNAME was out of date -- IPs may still be current)
26b0f687 214 HostAddress ip = Resolver->get_next_ip();
8d26221d 215
fd62d09f
CH
216 if ( !ip.is_valid() )
217 { // this can happen in 2 cases: if ContinueOnOutdatedIps==true
218 // or when ip went out of date between resolve and now
219 // --> try to use outdated IP
26b0f687
CH
220 GlobalLogger.info() << LogPrefix << "Checking for outdated IPs";
221 bool check_up_to_date = false;
222 ip = Resolver->get_next_ip(check_up_to_date);
223 }
fd62d09f 224 if ( !ip.is_valid() )
b44a5f96
CH
225 { // Do not even have an outdated IP!
226 // This happens if have no cached IPs and resolve failed
227 GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping "
228 << "-- treat like a failed ping.";
fd62d09f
CH
229
230 // skip the ping and directly call ping_done_handler
838e0acf
CH
231 HostAnalyzer.set_resolved_ip_count(1); // must have been 0 --> failed
232 // ping would create failed assumption (nPings > nIPs)
9c0dcf33 233 ping_done_handler(PingStatus_FailureNoIP, 0);
838e0acf 234 HostAnalyzer.set_resolved_ip_count(0); // set back
fd62d09f
CH
235 }
236 else
237 {
91aa83f9
CH
238 boost::asio::ip::address actual_ip = ip.get_ip();
239 GlobalLogger.info() << LogPrefix << "pinging IP " << actual_ip
240 << " with TTL " << ip.get_ttl().get_updated_value() << "s";
241 int delay_count = 0;
242 BOOST_FOREACH( const PingerItem &pinger, Pingers )
2a4dde8b 243 {
91aa83f9
CH
244 boost::asio::deadline_timer delayed_ping_timer( IoService );
245 delayed_ping_timer.expires_from_now(
246 milliseconds(delay_count * ParallelPingDelay);
247 delayed_ping_timer.async_wait( bind( &PingScheduler::delayed_ping,
248 this, pinger) );
249 ++delay_count;
2a4dde8b 250 }
96c4e7a4 251 TimeSentLastPing = microsec_clock::universal_time();
91aa83f9 252 NPingersDone = 0;
26b0f687 253 }
3f6ba924
CH
254}
255
91aa83f9
CH
256void delayed_ping( const PingerItem &pinger )
257{
258 pinger->ping( actual_ip,
259 DestinationPort,
260 boost::bind(&PingScheduler::ping_done_handler,
261 this, _1) );
262}
263
166fd9e9 264
23f51766 265//------------------------------------------------------------------------------
91aa83f9 266// Post Processing of Ping result and Preparation for next ping
23f51766 267//------------------------------------------------------------------------------
e58d7507 268
c1d776ba
CH
269/**
270 * @brief called when Ping::ping is done; calls functions to update
271 * statistics, ping interval and elapsed time;
23f51766 272 * schedules a call to ping, thereby closing the loop
c1d776ba 273 */
9c0dcf33
CH
274void PingScheduler::ping_done_handler( const PingStatus &result,
275 const long ping_duration_us )
502b6af0 276{
96c4e7a4
CH
277 PingStatus edited_result = result;
278 if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps)
279 {
280 edited_result = PingStatus_SuccessOutdatedIP;
281
282 // reset ContinueOnOutdatedIps
283 ContinueOnOutdatedIps = false;
284 update_log_prefix();
285 }
286
91aa83f9
CH
287 ++NPingersDone;
288 GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of "
289 << NPingers << " done with result " << to_string(edited_result);
f8918bd5 290
c1d776ba 291 // post-processing
91aa83f9 292 // can call update_ping_interval only after update_ping_statistics!
9c0dcf33 293 HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us );
91aa83f9
CH
294
295 // prepare next ping only after all pingers are done
296 if (NPingersDone == NPingers)
297 prepare_next_ping();
298}
299
300
301void PingScheduler::prepare_next_ping()
302{
d8a91bd6 303 update_ping_interval();
72be9e7d 304
23f51766
CH
305 // get next protocol, possibly start resolving IPs
306 update_ping_protocol();
307
c1d776ba 308 // schedule next ping
91aa83f9
CH
309 int seconds_since_last_ping = (microsec_clock::universal_time()
310 - TimeSentLastPing).total_seconds();
311 if ( seconds_since_last_ping > PingIntervalInSec )
312 {
313 GlobalLogger.info() << "We are late for next ping!";
314 seconds_since_last_ping = PingIntervalInSec;
315 (void) NextPingTimer.expires_from_now( seconds(0) );
316 }
317 else
318 (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec
319 - seconds_since_last_ping ) );
23f51766 320 NextPingTimer.async_wait( bind( &PingScheduler::ping, this,
d26dce11 321 boost::asio::placeholders::error ) );
d8a91bd6
GMF
322}
323
324void PingScheduler::update_ping_interval()
325{
c1d776ba 326 // have to ping more often?
fb469ffa 327 if ( HostAnalyzer.exceeded_ping_failed_limit() )
d8a91bd6
GMF
328 {
329 PingIntervalInSec.speed_up();
330
c1abff61
CH
331 GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: "
332 << PingIntervalInSec << "s";
d8a91bd6
GMF
333 }
334 else
335 {
336 PingIntervalInSec.back_to_original();
337
c1abff61
CH
338 GlobalLogger.debug() << LogPrefix << "- Stick to the original ping "
339 << "interval: " << PingIntervalInSec << "s";
d8a91bd6 340 }
ced28dc7
GMF
341}
342
23f51766
CH
343//------------------------------------------------------------------------------
344// Ping Protocol Rotation
345//------------------------------------------------------------------------------
346
26b0f687 347void PingScheduler::init_ping_protocol()
23f51766 348{
26b0f687 349 ProtocolIter = Protocols.end();
23f51766
CH
350 get_next_ping_protocol();
351}
352
26b0f687 353void PingScheduler::update_ping_protocol()
23f51766
CH
354{
355 if ( can_change_ping_protocol() )
356 {
357 get_next_ping_protocol();
358 }
359}
360
26b0f687 361void PingScheduler::get_next_ping_protocol()
23f51766 362{
91aa83f9
CH
363 // stop and destruct all pingers
364 clear_pingers();
fd62d09f
CH
365 GlobalLogger.debug() << LogPrefix
366 << "------------------------------------------------------------------";
91aa83f9
CH
367
368 // get next protocol
26b0f687
CH
369 ++ProtocolIter;
370 if (ProtocolIter == Protocols.end())
371 ProtocolIter = Protocols.begin();
372 PingProtocol ping_protocol = *ProtocolIter;
72be9e7d 373 // --> ProtocolIter still points to currently used protocol which is
26b0f687 374 // required in dns_resolve_callback
23f51766 375
91aa83f9
CH
376 // create new pingers
377 for (int count=0; count<NPingers; ++count)
378 Pingers.push_back( PingerFactory::createPinger(ping_protocol, IoService,
379 NetworkInterfaceName, PingReplyTimeout) );
23f51766
CH
380
381 update_dns_resolver( ping_protocol );
382}
383
26b0f687 384bool PingScheduler::can_change_ping_protocol() const
23f51766 385{
c1abff61
CH
386 // TODO can_change_ping_protocol() and get_next_ping_protocol() may be
387 // implemented in a Algorithm class that can be exchanged in this class to
388 // provide an algorithm neutral class
23f51766
CH
389 return true;
390}
391
392//------------------------------------------------------------------------------
393// DNS host name resolution
394//------------------------------------------------------------------------------
26b0f687
CH
395
396// show "!" after host name if running on outdated IPs
8d26221d 397void PingScheduler::update_log_prefix()
26b0f687
CH
398{
399 std::stringstream temp;
72be9e7d 400 temp << "Sched(" << DestinationAddress;
26b0f687
CH
401 if (ContinueOnOutdatedIps)
402 temp << "!";
403 temp << "): ";
404 LogPrefix = temp.str();
405}
406
407void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
23f51766
CH
408{
409 if (Resolver && Resolver->is_resolving())
72be9e7d
CH
410 cancel_resolve(false);
411
412 if (ContinueOnOutdatedIps)
23f51766 413 {
72be9e7d
CH
414 ContinueOnOutdatedIps = false;
415 update_log_prefix();
23f51766
CH
416 }
417
418 // DNS master caches created resolvers and resolved IPs, so this will
419 // probably just return an existing resolver with already resolved IPs for
420 // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important)
421 Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress,
422 current_protocol);
fd62d09f
CH
423
424 // get number of up-to-date IPs
2a4dde8b 425 // TODO should check here, if they will be up to date in PingIntervalInSec
fd62d09f
CH
426 bool check_up_to_date = true;
427 int ip_count = Resolver->get_resolved_ip_count(check_up_to_date);
428 if (ip_count > 0)
23f51766 429 {
fd62d09f 430 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5 431 << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")";
fd62d09f
CH
432 HostAnalyzer.set_resolved_ip_count( ip_count );
433
72be9e7d 434 if (Resolver->is_resolving())
c1abff61 435 GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
23f51766
CH
436 << "resolver seems to be resolving all the same... "
437 << "Start pinging anyway!";
8d26221d 438 ping_when_ready();
23f51766
CH
439 }
440 else
2a4dde8b
CH
441 {
442 GlobalLogger.info() << LogPrefix
443 << "No up-to-date IPs --> start resolve";
23f51766 444 start_resolving_ping_address();
2a4dde8b
CH
445 // set resolved_ip_count will be called in resolve callback
446 }
23f51766
CH
447}
448
26b0f687 449void PingScheduler::start_resolving_ping_address()
23f51766 450{
26b0f687 451 Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback,
23f51766
CH
452 this, _1, _2) );
453}
454
26b0f687 455void PingScheduler::dns_resolve_callback(const bool was_success,
cd71d095 456 const int recursion_count)
23f51766 457{
c1abff61 458 GlobalLogger.info() << LogPrefix << "dns resolution finished "
23f51766 459 << "with success = " << was_success << " "
cd71d095 460 << "after " << recursion_count << " recursions";
23f51766 461
26b0f687
CH
462 if ( was_success )
463 {
fd62d09f
CH
464 // trust that a successfull DNS resolve means we have an IP with TTL>0
465 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps);
466 if (ip_count == 0)
467 { // this will create trouble in HostAnalyzer
468 GlobalLogger.warning() << LogPrefix
469 << "Should not have reached this case: resolve was "
470 << "successfull but still have no IPs (up-to-date="
471 << !ContinueOnOutdatedIps << ")!";
472 if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0)
473 GlobalLogger.warning() << LogPrefix << "This probably happened "
474 << "because you specified a TTL threshold > 0 but resolving"
475 << " had no effect on TTLs since external cache is only "
476 << "updated when TTL=0 is reached.";
477 }
478 else
479 {
480 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5
CH
481 << ip_count << " (IPs may be outdated="
482 << ContinueOnOutdatedIps << ") --> could ping now";
fd62d09f
CH
483 HostAnalyzer.set_resolved_ip_count( ip_count );
484 }
8d26221d 485 ping_when_ready();
26b0f687
CH
486 }
487 else
488 { // host name resolution failed; try again bypassing first outdated CNAME
8d26221d 489 // or using cached IP
26b0f687 490 std::string skip_host = Resolver->get_skip_cname();
23f51766
CH
491
492 if (skip_host.empty())
838e0acf 493 { // try to continue with cached IPs
fd62d09f 494 int ip_count = Resolver->get_resolved_ip_count(false);
838e0acf
CH
495
496 if (ip_count == 0)
497 GlobalLogger.notice() << LogPrefix << "DNS failed "
498 << "and have no cached IPs either --> cannot ping";
499 // ping_when_ready will deal with this case
500 else
501 {
502 ContinueOnOutdatedIps = true;
503 update_log_prefix();
504
505 GlobalLogger.notice() << LogPrefix << "DNS failed, "
506 << "try anyway with cached data";
507 }
508
fd62d09f 509 GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to "
f8918bd5 510 << ip_count << " (IPs may be outdated=" << true << ")";
fd62d09f
CH
511 HostAnalyzer.set_resolved_ip_count( ip_count );
512
8d26221d 513 ping_when_ready();
23f51766
CH
514 }
515 else
8d26221d 516 { // have CNAME to continue
838e0acf
CH
517 ContinueOnOutdatedIps = true;
518 update_log_prefix();
23f51766
CH
519 GlobalLogger.notice() << LogPrefix << "DNS failed, "
520 << "try again skipping a CNAME and resolving "
521 << skip_host << " directly";
72be9e7d
CH
522
523 cancel_resolve(false);
524
525 // now create new resolver
23f51766 526 Resolver = DnsMaster::get_instance()
26b0f687 527 ->get_resolver_for(skip_host, *ProtocolIter);
23f51766
CH
528 start_resolving_ping_address();
529 }
530 }
23f51766 531}
72be9e7d
CH
532
533/**
534 * cancel resolver if force_cancel or if it is not resolving DestinationAddress
535 *
536 * Resolvers have a life on their own: they are cached by DnsMaster so never go
537 * out of scope and even after calling callbacks, there might still be a
538 * longterm timer active to re-try resolving.
539 * We want to cancel that long-term timer only if the Resolver is not for our
540 * real, original DestinationAddress but a CNAME, which can happen when trying
541 * to skip cnames and working on out-dated IPs
542 */
543void PingScheduler::cancel_resolve(const bool force_cancel)
544{
545 if (force_cancel)
546 {
547 GlobalLogger.info() << "Cancelling resolver (forced)";
548 Resolver->cancel_resolve();
549 }
550 else if ( Resolver->get_hostname() == DestinationAddress )
551 GlobalLogger.info() << LogPrefix
552 << "Leave original resolver active in background";
553 else
554 {
555 GlobalLogger.info() << LogPrefix << "Cancel resolver for "
556 << Resolver->get_hostname() << " since is not the original "
557 << DestinationAddress;
558 Resolver->cancel_resolve();
559 }
560}
561