/* The software in this package is distributed under the GNU General Public License version 2 (with a special exception described below). A copy of GNU General Public License (GPL) is included in this distribution, in the file COPYING.GPL. As a special exception, if other files instantiate templates or use macros or inline functions from this file, or you compile this file and link it with other works to produce a work based on this file, this file does not by itself cause the resulting work to be covered by the GNU General Public License. However the source code for this file must still be made available in accordance with section (3) of the GNU General Public License. This exception does not invalidate any other reasons why a work based on this file might be covered by the GNU General Public License. */ #include "host/pingscheduler.h" #include #include #include #include #include #include "boost_assert_handler.h" #include "host/pingerfactory.h" #include "dns/dnsmaster.h" #include "icmp/icmppinger.h" #include "link/linkstatus.h" using namespace std; using boost::asio::io_service; using boost::bind; using boost::date_time::time_resolution_traits_adapted64_impl; using boost::posix_time::microsec_clock; using boost::posix_time::ptime; using boost::posix_time::seconds; using boost::posix_time::milliseconds; using boost::shared_ptr; using I2n::Logger::GlobalLogger; //----------------------------------------------------------------------------- // PingScheduler //----------------------------------------------------------------------------- /** * @brief Parameterized constructor. * * @param io_serv The one @c io_serv object that controls async processing * @param network_interface The name of the network interface sending the pings. * @param destination_address The remote address to ping. * @param destination_port The remote port to ping. * @param ping_protocol_list A list of protocols to use. * @param ping_interval_in_sec Amount of time between each ping. * @param ping_fail_percentage_limit Maximum amount of pings that can fail. * @param ping_congestion_percentage_limit Amount of pings indication congested * line * @param ping_congestion_duration_thresh Duration in seconds that indicates a * congested line * @param ping_reply_timeout Max amount time to wait for ping to finish * @param link_analyzer The object to monitor the link status. * @param first_delay Delay in seconds from start_pinging to first ping attempt * @param n_parallel_pings: Number of pingers to ping the same IP in parallel */ PingScheduler::PingScheduler( const IoServiceItem io_serv, const string &network_interface, const string &destination_address, const uint16_t destination_port, const PingProtocolList &ping_protocol_list, const long ping_interval_in_sec, const int ping_fail_percentage_limit, const int ping_congestion_percentage_limit, const int congest_caused_by_fail_percentage_limit, const int ping_congestion_duration_thresh, const int ping_reply_timeout, LinkStatusItem link_analyzer, const int first_delay, const int n_parallel_pings, const int parallel_ping_delay, const int ping_timeout_factor ) : IoService( io_serv ), NetworkInterfaceName( network_interface ), DestinationAddress( destination_address ), DestinationPort( destination_port ), Protocols( ping_protocol_list ), ProtocolIter(), PingIntervalInSec( ping_interval_in_sec ), NPingers( n_parallel_pings ), FirstDelay( first_delay ), NextPingTimer( *io_serv ), TimeSentLastPing( microsec_clock::universal_time() ), PingReplyTimeout( ping_reply_timeout ), PingReplyTimeoutOrig( ping_reply_timeout ), HostAnalyzer( destination_address, ping_fail_percentage_limit, ping_congestion_percentage_limit, congest_caused_by_fail_percentage_limit, ping_congestion_duration_thresh, n_parallel_pings, link_analyzer ), Resolver(), Pingers(), NPingersDone( 0 ), ParallelPingDelay( parallel_ping_delay ), DelayedPingTimer( *io_serv ), WantToPing( false ), LogPrefix(), ContinueOnOutdatedIps( false ), PingTimeoutFactor( ping_timeout_factor ) { BOOST_ASSERT( !network_interface.empty() ); BOOST_ASSERT( !destination_address.empty() ); BOOST_ASSERT( ( 0 < destination_port ) && ( destination_port < numeric_limits::max() ) ); BOOST_ASSERT( 0 < ping_interval_in_sec ); BOOST_ASSERT( (0 <= ping_fail_percentage_limit) && ( ping_fail_percentage_limit <= 100) ); update_log_prefix(); init_ping_protocol(); // start resolving already so we are prepared to ping update_dns_resolver(); } /** * @brief Destructor. */ PingScheduler::~PingScheduler() { } void PingScheduler::stop_pinging() { // stop pinger and resolver GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging"; clear_pingers(); cancel_resolve(true); // now cancel the own timer in case that pinger cancelation called callback GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer"; NextPingTimer.cancel(); } /** * @brief stop all pingers and remove them from Pingers variable which will * proboably cause their destruction * * Pingers is empty afterwards */ void PingScheduler::clear_pingers() { PingerItem pinger; while(!Pingers.empty()) { pinger = Pingers.back(); pinger->stop_pinging(); Pingers.pop_back(); } } /** * @brief Start into infinite loop of calls to ping * * Does not start yet but set NextPingTimer (possibly to 0), so action starts * when io_service is started */ void PingScheduler::start_pinging() { if ( FirstDelay > 0 ) GlobalLogger.info() << LogPrefix << "Delaying first ping by " << FirstDelay << "s"; else GlobalLogger.info() << LogPrefix << "Schedule ping as soon as possible"; (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) ); NextPingTimer.async_wait( bind( &PingScheduler::ping, this, boost::asio::placeholders::error ) ); } /** * @brief call Ping::ping and schedule a call to ping_done_handler when finished */ void PingScheduler::ping(const boost::system::error_code &error) { if ( error ) { // get here, e.g. by NextPingTimer.cancel in stop_pinging if ( error == boost::asio::error::operation_aborted ) GlobalLogger.error() << LogPrefix << "Timer for ping was cancelled!" << " --> Stopping"; else GlobalLogger.error() << LogPrefix << "Received error " << error << " waiting for ping! Stopping"; return; } // ping as soon as dns is ready WantToPing = true; ping_when_ready(); } void PingScheduler::ping_when_ready() { if ( !WantToPing ) { GlobalLogger.info() << LogPrefix << "waiting for ping request " << "(should take no more than " << PingIntervalInSec << "s)"; return; } else if ( Resolver && Resolver->is_resolving() ) { GlobalLogger.info() << LogPrefix << "waiting for DNS to finish"; return; } else if ( !Resolver ) { // should not happen, but check anyway GlobalLogger.warning() << LogPrefix << "Have no resolver!"; return; } GlobalLogger.info() << LogPrefix << "start ping"; WantToPing = false; // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set // because a CNAME was out of date -- IPs may still be current) HostAddress ip = Resolver->get_next_ip(); if ( !ip.is_valid() ) { // this can happen in 2 cases: if ContinueOnOutdatedIps==true // or when ip went out of date between resolve and now // --> try to use outdated IP GlobalLogger.info() << LogPrefix << "Checking for outdated IPs"; bool check_up_to_date = false; ip = Resolver->get_next_ip(check_up_to_date); } if ( !ip.is_valid() ) { // Do not even have an outdated IP! // This happens if have no cached IPs and resolve failed GlobalLogger.info() << LogPrefix << "Not even outdated IP to ping " << "-- treat like a failed ping."; // skip the ping and directly call ping_done_handler the appropriate // number of times for (int count=0; countping(ip, DestinationPort, boost::bind(&PingScheduler::ping_done_handler, this, _1, _2) ); if (pinger_index >= NPingers-1) GlobalLogger.debug() << LogPrefix << "started all delayed pings"; else { DelayedPingTimer.expires_from_now( milliseconds(ParallelPingDelay) ); DelayedPingTimer.async_wait( bind( &PingScheduler::delayed_ping, this, boost::asio::placeholders::error, ip, pinger_index+1) ); } } //------------------------------------------------------------------------------ // Post Processing of Ping result and Preparation for next ping //------------------------------------------------------------------------------ /** * @brief called when Ping::ping is done; calls functions to update * statistics, ping interval and elapsed time; * schedules a call to ping, thereby closing the loop */ void PingScheduler::ping_done_handler( const PingStatus &result, const long ping_duration_us ) { PingStatus edited_result = result; if (result == PingStatus_SuccessReply && ContinueOnOutdatedIps) { edited_result = PingStatus_SuccessOutdatedIP; // reset ContinueOnOutdatedIps ContinueOnOutdatedIps = false; update_log_prefix(); } ++NPingersDone; GlobalLogger.info() << LogPrefix << "Ping " << NPingersDone << " of " << NPingers << " done with result " << to_string(edited_result); // post-processing HostAnalyzer.update_ping_statistics( edited_result, ping_duration_us ); // prepare next ping only after all pingers are done if (NPingersDone == NPingers) { // stop and destruct all pingers clear_pingers(); GlobalLogger.debug() << LogPrefix << "--------------------------------------------------------------"; // update variables for next ping: number of pings, delay, protocol // do this only after call to update_ping_statistics! update_ping_protocol(); update_ping_interval(); update_ping_number(); prepare_next_ping(); } } void PingScheduler::prepare_next_ping() { // start DNS resolve if necessary update_dns_resolver(); NPingersDone = 0; // schedule next ping int seconds_since_last_ping = (microsec_clock::universal_time() - TimeSentLastPing).total_seconds(); if ( seconds_since_last_ping > PingIntervalInSec ) { GlobalLogger.info() << "We are late for next ping!"; seconds_since_last_ping = PingIntervalInSec; (void) NextPingTimer.expires_from_now( seconds(0) ); } else (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec - seconds_since_last_ping ) ); NextPingTimer.async_wait( bind( &PingScheduler::ping, this, boost::asio::placeholders::error ) ); } void PingScheduler::update_ping_interval() { // have to ping more often? if ( HostAnalyzer.exceeded_ping_failed_limit() ) { PingIntervalInSec.speed_up(); GlobalLogger.debug() << LogPrefix << "- Speeding up ping interval to: " << PingIntervalInSec << "s"; } else { PingIntervalInSec.back_to_original(); GlobalLogger.debug() << LogPrefix << "- Stick to the original ping " << "interval: " << PingIntervalInSec << "s"; } } /** in case of congested line, increase number of pings * * also increases ping timeout if line is congested * * CAUTION! Only call this after clear_pingers !!! * */ void PingScheduler::update_ping_number() { // make sure we do not loose track of pingers here if ( NPingersDone != NPingers || !Pingers.empty() ) { GlobalLogger.warning() << LogPrefix << "Should only change number of " << "pingers when all are finished and deleted! Have " << NPingers << " pingers, " << NPingersDone << " of which are done and " << Pingers.size() << " in listDone! Will not change NPingers."; return; } if ( HostAnalyzer.exceeded_ping_congestion_limit() ) { NPingers.increase(); GlobalLogger.notice() << LogPrefix << "No reply from host, " << "switching to burst ping mode with longer timeouts (" << DnsMaster::get_cname_chain_str(DestinationAddress) << ")"; GlobalLogger.debug() << LogPrefix << "- Increasing ping number to: " << NPingers; PingReplyTimeout = PingReplyTimeoutOrig * PingTimeoutFactor; GlobalLogger.debug() << LogPrefix << "- Increase ping timeout to " << PingReplyTimeout << "s"; } else { NPingers.back_to_original(); GlobalLogger.debug() << LogPrefix << "- Stick to the original ping " << "number: " << NPingers; PingReplyTimeout = PingReplyTimeoutOrig; GlobalLogger.debug() << LogPrefix << "- Reset ping timeout to " << PingReplyTimeout << "s"; } // tell host analyzer so it expects the correct number of ping results HostAnalyzer.set_n_parallel_pings(NPingers); } //------------------------------------------------------------------------------ // Ping Protocol Rotation //------------------------------------------------------------------------------ void PingScheduler::init_ping_protocol() { ProtocolIter = Protocols.end(); get_next_ping_protocol(); } void PingScheduler::update_ping_protocol() { if ( can_change_ping_protocol() ) { get_next_ping_protocol(); } } void PingScheduler::get_next_ping_protocol() { ++ProtocolIter; if (ProtocolIter == Protocols.end()) ProtocolIter = Protocols.begin(); } bool PingScheduler::can_change_ping_protocol() const { // TODO can_change_ping_protocol() and get_next_ping_protocol() may be // implemented in a Algorithm class that can be exchanged in this class to // provide an algorithm neutral class return true; } //------------------------------------------------------------------------------ // DNS host name resolution //------------------------------------------------------------------------------ // show "!" after host name if running on outdated IPs void PingScheduler::update_log_prefix() { std::stringstream temp; temp << "Sched(" << DestinationAddress; if (ContinueOnOutdatedIps) temp << "!"; temp << "): "; LogPrefix = temp.str(); } void PingScheduler::update_dns_resolver() { if (Resolver && Resolver->is_resolving()) cancel_resolve(false); if (ContinueOnOutdatedIps) { ContinueOnOutdatedIps = false; update_log_prefix(); } // DNS master caches created resolvers and resolved IPs, so this will // probably just return an existing resolver with already resolved IPs for // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important) Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress, *ProtocolIter); // get number of up-to-date IPs // TODO should check here, if they will be up to date in PingIntervalInSec bool check_up_to_date = true; int ip_count = Resolver->get_resolved_ip_count(check_up_to_date); if (ip_count > 0) { GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " << ip_count << " (IPs may be outdated=" << !check_up_to_date << ")"; HostAnalyzer.set_resolved_ip_count( ip_count ); if (Resolver->is_resolving()) GlobalLogger.warning() << LogPrefix << "have up to date IPs but " << "resolver seems to be resolving all the same... " << "Start pinging anyway!"; ping_when_ready(); } else { GlobalLogger.info() << LogPrefix << "No up-to-date IPs --> start resolve"; start_resolving_ping_address(); // set resolved_ip_count will be called in resolve callback } } void PingScheduler::start_resolving_ping_address() { Resolver->async_resolve( boost::bind(&PingScheduler::dns_resolve_callback, this, _1, _2) ); } void PingScheduler::dns_resolve_callback(const bool was_success, const int recursion_count) { GlobalLogger.info() << LogPrefix << "dns resolution finished " << "with success = " << was_success << " " << "after " << recursion_count << " recursions"; if ( was_success ) { // trust that a successfull DNS resolve means we have an IP with TTL>0 int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps); if (ip_count == 0) { GlobalLogger.warning() << LogPrefix << "Should not have reached this case: resolve was " << "successfull but still have no IPs (up-to-date=" << !ContinueOnOutdatedIps << ")!"; if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0) GlobalLogger.warning() << LogPrefix << "This probably happened " << "because you specified a TTL threshold > 0 but resolving" << " had no effect on TTLs since external cache is only " << "updated when TTL=0 is reached."; } else { GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " << ip_count << " (IPs may be outdated=" << ContinueOnOutdatedIps << ") --> could ping now"; } HostAnalyzer.set_resolved_ip_count( ip_count ); ping_when_ready(); } else { // host name resolution failed; try again bypassing first outdated CNAME // or using cached IP std::string skip_host = Resolver->get_skip_cname(); if (skip_host.empty()) { // try to continue with cached IPs int ip_count = Resolver->get_resolved_ip_count(false); if (ip_count == 0) GlobalLogger.notice() << LogPrefix << "DNS failed " << "and have no cached IPs either --> cannot ping"; // ping_when_ready will deal with this case else { ContinueOnOutdatedIps = true; update_log_prefix(); GlobalLogger.notice() << LogPrefix << "DNS failed, " << "try anyway with cached data"; } GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " << ip_count << " (IPs may be outdated=" << true << ")"; HostAnalyzer.set_resolved_ip_count( ip_count ); ping_when_ready(); } else { // have CNAME to continue ContinueOnOutdatedIps = true; update_log_prefix(); GlobalLogger.notice() << LogPrefix << "DNS failed, " << "try again skipping a CNAME and resolving " << skip_host << " directly"; cancel_resolve(false); // now create new resolver Resolver = DnsMaster::get_instance() ->get_resolver_for(skip_host, *ProtocolIter); start_resolving_ping_address(); } } } /** * cancel resolver if force_cancel or if it is not resolving DestinationAddress * * Resolvers have a life on their own: they are cached by DnsMaster so never go * out of scope and even after calling callbacks, there might still be a * longterm timer active to re-try resolving. * We want to cancel that long-term timer only if the Resolver is not for our * real, original DestinationAddress but a CNAME, which can happen when trying * to skip cnames and working on out-dated IPs */ void PingScheduler::cancel_resolve(const bool force_cancel) { if (force_cancel) { GlobalLogger.info() << "Cancelling resolver (forced)"; Resolver->cancel_resolve(); } else if ( Resolver->get_hostname() == DestinationAddress ) GlobalLogger.info() << LogPrefix << "Leave original resolver active in background"; else { GlobalLogger.info() << LogPrefix << "Cancel resolver for " << Resolver->get_hostname() << " since is not the original " << DestinationAddress; Resolver->cancel_resolve(); } }