From 72be9e7d855c13253a9183281c8d280a817cec2e Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Wed, 29 Apr 2015 17:45:16 +0200 Subject: [PATCH] fixed 2 bugs and made clearer that Long-term timer in DnsResolver is not affected by finalize_resolve bugs: * HostAddress::is_valid returned opposite * use the returned IP to ping and do not request another in PingScheduler::ping * move reset of ContinueOnOutdatedIp within PingScheduler to after ping long-term timer: * added a variable DnsResolver: LongtermTimerIsActive * added a function to ResolverBase: is_waiting_to_resolve * added a function cancel_resolve to PingScheduler that decides whether to cancel or not * added an arg to DnsResolver::stop_trying --- src/dns/dnsresolver.cpp | 69 ++++++++++++++++++++++++++++++++------- src/dns/dnsresolver.h | 6 ++- src/dns/hostaddress.cpp | 4 +- src/dns/ippseudoresolver.h | 9 +++-- src/dns/resolverbase.h | 3 +- src/host/pingscheduler.cpp | 78 +++++++++++++++++++++++++++++++------------- src/host/pingscheduler.h | 1 + 7 files changed, 126 insertions(+), 44 deletions(-) diff --git a/src/dns/dnsresolver.cpp b/src/dns/dnsresolver.cpp index 0ac9828..07eeac3 100644 --- a/src/dns/dnsresolver.cpp +++ b/src/dns/dnsresolver.cpp @@ -73,6 +73,7 @@ DnsResolver::DnsResolver(IoServiceItem &io_serv, , RandomIdGenerator() , RequestId( 0 ) , OperationCancelled( false ) + , LongermTimerIsActive( false ) { std::stringstream temp; temp << "Dns(" << ResolverBase::Hostname << "): "; @@ -125,6 +126,7 @@ void DnsResolver::do_resolve() ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); StaleDataLongtermTimer.cancel(); + LongermTimerIsActive = false; // create DNS request boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol ); @@ -362,6 +364,7 @@ void DnsResolver::handle_unavailable() this, boost::asio::placeholders::error ) ); + LongermTimerIsActive = true; // for now, admit failure bool was_success = false; @@ -460,7 +463,7 @@ void DnsResolver::handle_cname(const std::vector &result_cnames) // treat a CNAME as a partial result: not enough to run callbacks // from finalize_resolve, but enough to stop timers and reset // RetryCount --> name resolution can take longer - stop_trying(); + stop_trying(true); } } } @@ -476,15 +479,18 @@ void DnsResolver::cname_resolve_callback(const bool was_success, return; } else if (was_success) + { GlobalLogger.debug() << LogPrefix << "CNAME resolution succeeded"; + finalize_resolve(was_success, cname_count+1); + } else + { GlobalLogger.info() << LogPrefix << "CNAME resolution failed"; // no use to schedule retry in this case since cname resolver must have // failed several times and we can only re-start the same procedure with - // the same information - - // cname counts like one more recursion step ... - finalize_resolve(was_success, cname_count+1); + // the same information. But can re-try later + handle_unavailable(); + } } @@ -507,7 +513,7 @@ void DnsResolver::finalize_resolve(const bool was_success, << "waiting for DNS reply!"; // stop timers - stop_trying(); + stop_trying(was_success); // schedule callbacks, clearing callback list ResolverBase::schedule_callbacks(was_success, cname_count); @@ -520,31 +526,61 @@ void DnsResolver::finalize_resolve(const bool was_success, } -void DnsResolver::stop_trying() +/** + * arg was_success determines if stop trying forever or just for the moment + * --> determines if we cancel StaleDataLongtermTimer or not + */ +void DnsResolver::stop_trying(bool was_success) { // cancel timers GlobalLogger.debug() << LogPrefix << "Cancelling timers"; ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); - StaleDataLongtermTimer.cancel(); + + if (was_success) + { + StaleDataLongtermTimer.cancel(); + LongermTimerIsActive = false; + } // clean up RetryCount = 0; } -bool DnsResolver::is_resolving() +/** + * return true if resolver is currently resolving + * + * Is true from call to async_resolve until callbacks + * --> returns true if waiting for result or (short-term) retry + * + * However, does NOT tell you if the (long-term) stale timeout is active! + * That timer has no effect on result, need to check is_waiting_to_resolve + * for that + */ +bool DnsResolver::is_resolving() const { return IsResolving; } /** + * returns true if either is_resolving or the long-term timer is active + * + * is_resolving returns true if the short-term retry timer is active + */ +bool DnsResolver::is_waiting_to_resolve() const +{ + return IsResolving || LongermTimerIsActive; +} + + +/** * cancel a earlier call to async_resolve * * callbacks will be called with was_success=false; all internal operations * will be cancelled and internal callbacks (timers, dns results) have no - * effect any more + * effect any more; cancels also the long-term stale-data timer */ void DnsResolver::cancel_resolve() { @@ -570,6 +606,9 @@ void DnsResolver::cancel_resolve() int cname_count = 1; finalize_resolve(was_success, cname_count); + // also cancel the long-term timer + StaleDataLongtermTimer.cancel(); + // set after finalize_resolve, so can check in finalize_resolve that // OperationCancelled is never true OperationCancelled = true; @@ -692,7 +731,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) GlobalLogger.info() << LogPrefix << "Get next IP from cached result of " << n_ips << " IPs; first index to consider is " << NextIpIndex - << "; TTL thresh = " << ttl_thresh << " is used: " << check_up_to_date; + << "; TTL thresh=" << ttl_thresh << "s is used: " << check_up_to_date; // loop until we have found a cached result (that is up to date) // or until we have tried all cached IPs @@ -700,9 +739,15 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) { // check index since cache size may have changed since last call if (NextIpIndex >= n_ips) + { + GlobalLogger.debug() << LogPrefix << "Reset NextIpIndex"; NextIpIndex = 0; + } else if ( n_iter >= n_ips) + { + GlobalLogger.debug() << LogPrefix << "No IP found"; return HostAddress(); // have checked all candidates + } else { // there are candidates left to consider return_candidate = cached_data[NextIpIndex++]; @@ -710,7 +755,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) return return_candidate; else if (cached_data[NextIpIndex].get_ttl().get_updated_value() > ttl_thresh) - return cached_data[++NextIpIndex]; + return return_candidate; else ++n_iter; } diff --git a/src/dns/dnsresolver.h b/src/dns/dnsresolver.h index 895f6b4..5caf9c7 100644 --- a/src/dns/dnsresolver.h +++ b/src/dns/dnsresolver.h @@ -65,7 +65,8 @@ public: bool have_up_to_date_ip(); int get_resolved_ip_count(); void cancel_resolve(); - bool is_resolving(); + bool is_resolving() const; + bool is_waiting_to_resolve() const; // implementation of ResolverBase::async_resolve protected: @@ -83,7 +84,7 @@ private: const int cname_count); void schedule_retry(); void finalize_resolve(const bool success, const int cname_count=0); - void stop_trying(); + void stop_trying(const bool was_success); void wait_timer_timeout_handler(const boost::system::error_code &error); void gather_results( const boost::net::dns::rr_list_t *rr_list, std::vector *result_ips, @@ -108,6 +109,7 @@ private: boost::uuids::random_generator RandomIdGenerator; uint16_t RequestId; bool OperationCancelled; + bool LongermTimerIsActive; }; #endif diff --git a/src/dns/hostaddress.cpp b/src/dns/hostaddress.cpp index 2879090..52f9bd3 100644 --- a/src/dns/hostaddress.cpp +++ b/src/dns/hostaddress.cpp @@ -75,6 +75,6 @@ void HostAddress::set_ttl( const TimeToLive &ttl ) bool HostAddress::is_valid() const { - return Ip == address() || Ip == boost::asio::ip::address_v4() - || Ip == boost::asio::ip::address_v6(); + return Ip != address() && Ip != boost::asio::ip::address_v4() + && Ip != boost::asio::ip::address_v6(); } diff --git a/src/dns/ippseudoresolver.h b/src/dns/ippseudoresolver.h index a3edee7..9a33a38 100644 --- a/src/dns/ippseudoresolver.h +++ b/src/dns/ippseudoresolver.h @@ -64,10 +64,11 @@ private: // only real public function public: HostAddress get_next_ip(const bool check_up_to_date=true) - { return IpAddress; } - bool have_up_to_date_ip() { return true; } - int get_resolved_ip_count(){ return 1; } - bool is_resolving() { return false; } + { return IpAddress; } + bool have_up_to_date_ip() { return true; } + int get_resolved_ip_count() { return 1; } + bool is_resolving() const { return false; } + bool is_waiting_to_resolve() const { return false; } void cancel_resolve() {} // implementation of ResolverBase::async_resolve diff --git a/src/dns/resolverbase.h b/src/dns/resolverbase.h index 7ceea42..ad2a70c 100644 --- a/src/dns/resolverbase.h +++ b/src/dns/resolverbase.h @@ -53,7 +53,8 @@ public: */ void async_resolve(const callback_type &callback); virtual void cancel_resolve() = 0; - virtual bool is_resolving() = 0; + virtual bool is_resolving() const = 0; + virtual bool is_waiting_to_resolve() const = 0; virtual HostAddress get_next_ip(const bool check_up_to_date=true) = 0; virtual bool have_up_to_date_ip() = 0; diff --git a/src/host/pingscheduler.cpp b/src/host/pingscheduler.cpp index 0f08565..b38271a 100644 --- a/src/host/pingscheduler.cpp +++ b/src/host/pingscheduler.cpp @@ -117,7 +117,7 @@ void PingScheduler::stop_pinging() // stop pinger and resolver GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging"; Ping->stop_pinging(); - Resolver->cancel_resolve(); + cancel_resolve(true); // now cancel the own timer in case that pinger cancelation called callback GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer"; @@ -195,11 +195,9 @@ void PingScheduler::ping_when_ready() ip = Resolver->get_next_ip(check_up_to_date); } if ( ip.is_valid() ) - Ping->ping( - Resolver->get_next_ip().get_ip(), - DestinationPort, - boost::bind(&PingScheduler::ping_done_handler, this, _1) - ); + Ping->ping( ip.get_ip(), + DestinationPort, + boost::bind(&PingScheduler::ping_done_handler, this, _1) ); else { // should not happen GlobalLogger.error() << LogPrefix << "No IP to ping " @@ -208,9 +206,6 @@ void PingScheduler::ping_when_ready() if ( !Resolver->is_resolving() ) start_resolving_ping_address(); } - - // next time try with up-to-date IP - ContinueOnOutdatedIps = false; } @@ -234,6 +229,12 @@ void PingScheduler::ping_done_handler( const bool ping_success ) update_ping_interval(); update_ping_elapsed_time(); + if (ping_success) + { // reset ContinueOnOutdatedIps + ContinueOnOutdatedIps = false; + update_log_prefix(); + } + // get next protocol, possibly start resolving IPs update_ping_protocol(); @@ -298,7 +299,7 @@ void PingScheduler::get_next_ping_protocol() if (ProtocolIter == Protocols.end()) ProtocolIter = Protocols.begin(); PingProtocol ping_protocol = *ProtocolIter; - // --> ProtocolIter still points to currently used protocol which is + // --> ProtocolIter still points to currently used protocol which is // required in dns_resolve_callback if (Ping) @@ -308,6 +309,7 @@ void PingScheduler::get_next_ping_protocol() NetworkInterfaceName, PingReplyTimeout); update_dns_resolver( ping_protocol ); + } bool PingScheduler::can_change_ping_protocol() const @@ -326,7 +328,7 @@ bool PingScheduler::can_change_ping_protocol() const void PingScheduler::update_log_prefix() { std::stringstream temp; - temp << "PS(" << DestinationAddress; + temp << "Sched(" << DestinationAddress; if (ContinueOnOutdatedIps) temp << "!"; temp << "): "; @@ -336,10 +338,12 @@ void PingScheduler::update_log_prefix() void PingScheduler::update_dns_resolver( PingProtocol current_protocol ) { if (Resolver && Resolver->is_resolving()) + cancel_resolve(false); + + if (ContinueOnOutdatedIps) { - GlobalLogger.warning() << LogPrefix - << "Resolver still seems to be resolving --> cancel!"; - Resolver->cancel_resolve(); + ContinueOnOutdatedIps = false; + update_log_prefix(); } // DNS master caches created resolvers and resolved IPs, so this will @@ -350,7 +354,7 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol ) // start resolving if no ips available if ( Resolver->have_up_to_date_ip() ) { - if (!Resolver->is_resolving()) + if (Resolver->is_resolving()) GlobalLogger.warning() << LogPrefix << "have up to date IPs but " << "resolver seems to be resolving all the same... " << "Start pinging anyway!"; @@ -373,11 +377,6 @@ void PingScheduler::dns_resolve_callback(const bool was_success, << "with success = " << was_success << " " << "and cname_count = " << cname_count; - // TODO this is too simple, but need to think more about how to update here! - // (may have to switch back some time to resolver for original host or so - ContinueOnOutdatedIps = !was_success; - update_log_prefix(); - if ( was_success ) { HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count()); @@ -386,6 +385,8 @@ void PingScheduler::dns_resolve_callback(const bool was_success, else { // host name resolution failed; try again bypassing first outdated CNAME // or using cached IP + ContinueOnOutdatedIps = true; + update_log_prefix(); std::string skip_host = Resolver->get_skip_cname(); @@ -401,12 +402,43 @@ void PingScheduler::dns_resolve_callback(const bool was_success, GlobalLogger.notice() << LogPrefix << "DNS failed, " << "try again skipping a CNAME and resolving " << skip_host << " directly"; + + cancel_resolve(false); + + // now create new resolver Resolver = DnsMaster::get_instance() ->get_resolver_for(skip_host, *ProtocolIter); start_resolving_ping_address(); - - // (the original resolver is still alive and cached by DnsMaster and - // counting down time to re-try on its own until cancel_resolve) } } } + +/** + * cancel resolver if force_cancel or if it is not resolving DestinationAddress + * + * Resolvers have a life on their own: they are cached by DnsMaster so never go + * out of scope and even after calling callbacks, there might still be a + * longterm timer active to re-try resolving. + * We want to cancel that long-term timer only if the Resolver is not for our + * real, original DestinationAddress but a CNAME, which can happen when trying + * to skip cnames and working on out-dated IPs + */ +void PingScheduler::cancel_resolve(const bool force_cancel) +{ + if (force_cancel) + { + GlobalLogger.info() << "Cancelling resolver (forced)"; + Resolver->cancel_resolve(); + } + else if ( Resolver->get_hostname() == DestinationAddress ) + GlobalLogger.info() << LogPrefix + << "Leave original resolver active in background"; + else + { + GlobalLogger.info() << LogPrefix << "Cancel resolver for " + << Resolver->get_hostname() << " since is not the original " + << DestinationAddress; + Resolver->cancel_resolve(); + } +} + diff --git a/src/host/pingscheduler.h b/src/host/pingscheduler.h index c37f3c1..0de5711 100644 --- a/src/host/pingscheduler.h +++ b/src/host/pingscheduler.h @@ -86,6 +86,7 @@ private: void start_resolving_ping_address(); void update_log_prefix(); + void cancel_resolve(const bool force_cancel); // // Attributes -- 1.7.1