From fd62d09f010edf4becab310ce8fec37411b0e58c Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Thu, 30 Apr 2015 15:28:51 +0200 Subject: [PATCH] tested new DNS with internal server, make more robust against caching; works nicely now --- src/config/configuration.cpp | 2 +- src/dns/dnscache.cpp | 11 +++-- src/dns/dnsresolver.cpp | 79 +++++++++++++++++++++++--------------- src/dns/dnsresolver.h | 4 +- src/dns/ippseudoresolver.h | 3 +- src/dns/resolverbase.h | 2 +- src/host/pingscheduler.cpp | 87 +++++++++++++++++++++++++++++++++-------- src/icmp/icmppinger.cpp | 5 ++- 8 files changed, 135 insertions(+), 58 deletions(-) diff --git a/src/config/configuration.cpp b/src/config/configuration.cpp index 5843563..4efd663 100644 --- a/src/config/configuration.cpp +++ b/src/config/configuration.cpp @@ -231,7 +231,7 @@ int Configuration::get_resolved_ip_ttl_threshold() const } void Configuration::set_resolved_ip_ttl_threshold( const int resolved_ip_ttl_threshold ) { - BOOST_ASSERT(resolved_ip_ttl_threshold > 0 ); + BOOST_ASSERT(resolved_ip_ttl_threshold >= 0 ); ResolvedIpTtlThreshold = resolved_ip_ttl_threshold; } diff --git a/src/dns/dnscache.cpp b/src/dns/dnscache.cpp index 3a507e6..2796a07 100644 --- a/src/dns/dnscache.cpp +++ b/src/dns/dnscache.cpp @@ -277,7 +277,8 @@ HostAddressVec DnsCache::get_ips(const std::string &hostname, << " --> " << result.size() << "-list"; BOOST_FOREACH( const HostAddress &addr, result ) GlobalLogger.debug() << "DnsCache: " << addr.get_ip().to_string() - << " (TTL " << addr.get_ttl().get_value() << "s)"; + << " (TTL " << addr.get_ttl().get_updated_value() + << "s)"; return result; } @@ -290,7 +291,8 @@ Cname DnsCache::get_cname(const std::string &hostname, std::string key = key_for_hostname(hostname); Cname result_obj = CnameCache[key]; GlobalLogger.debug() << "DnsCache: request CNAME for " << key - << " --> \"" << result_obj.Host << "\""; + << " --> \"" << result_obj.Host << "\" (TTL " + << result_obj.Ttl.get_updated_value() << "s)"; if (result_obj.Host.empty()) return result_obj; @@ -334,7 +336,8 @@ HostAddressVec DnsCache::get_ips_recursive(const std::string &hostname, } else { - min_cname_ttl = min(min_cname_ttl, current_cname.Ttl.get_value()); + min_cname_ttl = min(min_cname_ttl, + current_cname.Ttl.get_updated_value()); result = get_ips(current_host, check_up_to_date); } } @@ -350,7 +353,7 @@ HostAddressVec DnsCache::get_ips_recursive(const std::string &hostname, BOOST_FOREACH( HostAddress &addr, result ) { - if (addr.get_ttl().get_value() > min_cname_ttl) + if (addr.get_ttl().get_updated_value() > min_cname_ttl) { GlobalLogger.debug() << "DnsCache: using shorter CNAME TTL"; addr.set_ttl(cname_ttl); diff --git a/src/dns/dnsresolver.cpp b/src/dns/dnsresolver.cpp index 07eeac3..085eda0 100644 --- a/src/dns/dnsresolver.cpp +++ b/src/dns/dnsresolver.cpp @@ -41,13 +41,12 @@ using I2n::Logger::GlobalLogger; using boost::posix_time::seconds; -using boost::posix_time::minutes; namespace Config { - const int ResolveTimeoutSeconds = 3; - const int PauseBeforeRetrySeconds = 10; - const int StaleDataLongtermMinutes = 15; + const int ResolveTimeoutSeconds = 2; + const int PauseBeforeRetrySeconds = 1; + const int StaleDataLongtermSeconds = 5*60; const int DNS_PORT = 53; } @@ -73,7 +72,7 @@ DnsResolver::DnsResolver(IoServiceItem &io_serv, , RandomIdGenerator() , RequestId( 0 ) , OperationCancelled( false ) - , LongermTimerIsActive( false ) + , LongtermTimerIsActive( false ) { std::stringstream temp; temp << "Dns(" << ResolverBase::Hostname << "): "; @@ -126,7 +125,7 @@ void DnsResolver::do_resolve() ResolveTimeoutTimer.cancel(); PauseBeforeRetryTimer.cancel(); StaleDataLongtermTimer.cancel(); - LongermTimerIsActive = false; + LongtermTimerIsActive = false; // create DNS request boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol ); @@ -190,7 +189,7 @@ void DnsResolver::handle_dns_result(const boost::system::error_code &error, if (error) { GlobalLogger.info() << LogPrefix << "DNS resolve resulted in error " - << error << " --> try again after a little while"; + << error << " --> request retry"; schedule_retry(); return; } @@ -247,7 +246,7 @@ void DnsResolver::handle_dns_result(const boost::system::error_code &error, else { // no answers --> cannot proceed GlobalLogger.warning() << LogPrefix << "No IP nor CNAME received! " - << "--> schedule retry"; + << "--> request retry"; schedule_retry(); } } @@ -358,13 +357,13 @@ void DnsResolver::handle_unavailable() { // schedule new attempt in quite a while StaleDataLongtermTimer.expires_from_now( - minutes(Config::StaleDataLongtermMinutes)); + seconds(Config::StaleDataLongtermSeconds)); StaleDataLongtermTimer.async_wait( boost::bind( &DnsResolver::wait_timer_timeout_handler, this, boost::asio::placeholders::error ) ); - LongermTimerIsActive = true; + LongtermTimerIsActive = true; // for now, admit failure bool was_success = false; @@ -540,7 +539,7 @@ void DnsResolver::stop_trying(bool was_success) if (was_success) { StaleDataLongtermTimer.cancel(); - LongermTimerIsActive = false; + LongtermTimerIsActive = false; } // clean up @@ -571,7 +570,7 @@ bool DnsResolver::is_resolving() const */ bool DnsResolver::is_waiting_to_resolve() const { - return IsResolving || LongermTimerIsActive; + return IsResolving || LongtermTimerIsActive; } @@ -584,10 +583,10 @@ bool DnsResolver::is_waiting_to_resolve() const */ void DnsResolver::cancel_resolve() { - if ( !IsResolving ) + if ( !IsResolving && !LongtermTimerIsActive) { - GlobalLogger.info() << LogPrefix - << "Cancel called on non-resolving resolver -- ignore"; + GlobalLogger.info() << LogPrefix << "Cancel called on non-resolving, " + << "non-waiting resolver -- ignore"; return; } else if (OperationCancelled) @@ -596,18 +595,23 @@ void DnsResolver::cancel_resolve() << "Cancel called on cancelled resolver -- ignore"; return; } + GlobalLogger.info() << LogPrefix << "Cancel resolver"; // set before finalize_resolve so can check in finalize_resolve that ID is // always 0; ID is not used any more since handle_dns_result stops if // OperationCancelled is true RequestId = 0; - bool was_success = false; - int cname_count = 1; - finalize_resolve(was_success, cname_count); + if ( IsResolving ) + { + bool was_success = false; + int cname_count = 1; + finalize_resolve(was_success, cname_count); + } // also cancel the long-term timer StaleDataLongtermTimer.cancel(); + LongtermTimerIsActive = false; // set after finalize_resolve, so can check in finalize_resolve that // OperationCancelled is never true @@ -620,15 +624,15 @@ void DnsResolver::handle_resolve_timeout(const boost::system::error_code &error) { if ( error == boost::asio::error::operation_aborted ) // cancelled { - GlobalLogger.info() << LogPrefix - << "Resolve timeout timer was cancelled!"; + GlobalLogger.debug() << LogPrefix + << "Resolve timeout timer was cancelled!"; return; } else if (error) { GlobalLogger.warning() << LogPrefix << "resolve timeout handler received error " - << error << " --> retry"; + << error << " --> request retry"; schedule_retry(); } else if ( OperationCancelled ) @@ -679,8 +683,8 @@ void DnsResolver::wait_timer_timeout_handler( if ( error == boost::asio::error::operation_aborted ) // cancelled { // assume that our code cancelled this timer, so callbacks will be // taken care of! - GlobalLogger.info() << LogPrefix - << "Resolve wait timer was cancelled! "; + GlobalLogger.debug() << LogPrefix + << "Resolve wait timer was cancelled! "; } else if (error) { // not sure what to do here, but callers waiting forever for a callback @@ -700,6 +704,7 @@ void DnsResolver::wait_timer_timeout_handler( else { GlobalLogger.info() << LogPrefix << "Done waiting --> re-try resolve"; + IsResolving = false; // will be set to true immediately in do_resolve do_resolve(); } } @@ -713,7 +718,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) { // get cached data // (do not use arg check_up_to_date here in order to give NextIpIndex - // a chance to stay above number of outdate IPs) + // a chance to stay above number of outdated IPs) HostAddressVec cached_data = ResolverBase::get_cached_ips_recursively(); // if no results cached, return default-constructed HostAddress (0.0.0.0) @@ -724,7 +729,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) return return_candidate; } - int n_iter = 0; + std::size_t n_iter = 0; std::size_t n_ips = cached_data.size(); uint32_t ttl_thresh = static_cast( DnsMaster::get_instance() ->get_resolved_ip_ttl_threshold() ); @@ -750,27 +755,39 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date) } else { // there are candidates left to consider + GlobalLogger.debug() << LogPrefix << "Check IP candidate at index " + << NextIpIndex; return_candidate = cached_data[NextIpIndex++]; if (!check_up_to_date) + { + GlobalLogger.debug() << LogPrefix << "not checking ttl, accept"; return return_candidate; - else if (cached_data[NextIpIndex].get_ttl().get_updated_value() + } + else if (return_candidate.get_ttl().get_updated_value() > ttl_thresh) + { + GlobalLogger.debug() << LogPrefix << "is up to date, accept"; return return_candidate; + } else + { + GlobalLogger.debug() << LogPrefix << "is out of date (" + << return_candidate.get_ttl().get_updated_value() + << "s <= " << ttl_thresh << "s), continue"; ++n_iter; + } } } } bool DnsResolver::have_up_to_date_ip() { - return get_resolved_ip_count() > 0; + return get_resolved_ip_count(true) > 0; } -int DnsResolver::get_resolved_ip_count() +int DnsResolver::get_resolved_ip_count(const bool check_up_to_date) { - // run with empty hostname (--> uses internal var Hostname) - // and check_up_to_date = true - return ResolverBase::get_cached_ips_recursively("", true).size(); + // run with empty hostname --> uses internal var Hostname + return ResolverBase::get_cached_ips_recursively("",check_up_to_date).size(); } diff --git a/src/dns/dnsresolver.h b/src/dns/dnsresolver.h index 5caf9c7..0918fd0 100644 --- a/src/dns/dnsresolver.h +++ b/src/dns/dnsresolver.h @@ -63,7 +63,7 @@ private: public: HostAddress get_next_ip(const bool check_up_to_date=true); bool have_up_to_date_ip(); - int get_resolved_ip_count(); + int get_resolved_ip_count(const bool check_up_to_date=true); void cancel_resolve(); bool is_resolving() const; bool is_waiting_to_resolve() const; @@ -109,7 +109,7 @@ private: boost::uuids::random_generator RandomIdGenerator; uint16_t RequestId; bool OperationCancelled; - bool LongermTimerIsActive; + bool LongtermTimerIsActive; }; #endif diff --git a/src/dns/ippseudoresolver.h b/src/dns/ippseudoresolver.h index 9a33a38..eeac609 100644 --- a/src/dns/ippseudoresolver.h +++ b/src/dns/ippseudoresolver.h @@ -66,7 +66,8 @@ public: HostAddress get_next_ip(const bool check_up_to_date=true) { return IpAddress; } bool have_up_to_date_ip() { return true; } - int get_resolved_ip_count() { return 1; } + int get_resolved_ip_count(const bool check_up_to_date=true) + { return 1; } bool is_resolving() const { return false; } bool is_waiting_to_resolve() const { return false; } void cancel_resolve() {} diff --git a/src/dns/resolverbase.h b/src/dns/resolverbase.h index ad2a70c..8262124 100644 --- a/src/dns/resolverbase.h +++ b/src/dns/resolverbase.h @@ -58,7 +58,7 @@ public: virtual HostAddress get_next_ip(const bool check_up_to_date=true) = 0; virtual bool have_up_to_date_ip() = 0; - virtual int get_resolved_ip_count() = 0; + virtual int get_resolved_ip_count(const bool check_up_to_date=true) = 0; std::string get_hostname() const; diff --git a/src/host/pingscheduler.cpp b/src/host/pingscheduler.cpp index b38271a..15f10fa 100644 --- a/src/host/pingscheduler.cpp +++ b/src/host/pingscheduler.cpp @@ -170,12 +170,12 @@ void PingScheduler::ping_when_ready() { if ( !WantToPing ) { - GlobalLogger.info() << LogPrefix << "not pinging (not requested to)"; + GlobalLogger.info() << LogPrefix << "waiting for ping request"; return; } else if ( Resolver && Resolver->is_resolving() ) { - GlobalLogger.info() << LogPrefix << "not pinging (DNS not finished)"; + GlobalLogger.info() << LogPrefix << "waiting for DNS to finish"; return; } else if ( !Resolver ) @@ -185,26 +185,44 @@ void PingScheduler::ping_when_ready() GlobalLogger.info() << LogPrefix << "start ping"; WantToPing = false; - // try to get an up-to-date IP + // try to get an up-to-date IP (ContinueOnOutdatedIps may only be set + // because a CNAME was out of date -- IPs may still be current) HostAddress ip = Resolver->get_next_ip(); - if ( !ip.is_valid() && ContinueOnOutdatedIps) - { // we failed to resolve --> try to use outdated IP + if ( !ip.is_valid() ) + { // this can happen in 2 cases: if ContinueOnOutdatedIps==true + // or when ip went out of date between resolve and now + // --> try to use outdated IP GlobalLogger.info() << LogPrefix << "Checking for outdated IPs"; bool check_up_to_date = false; ip = Resolver->get_next_ip(check_up_to_date); } - if ( ip.is_valid() ) + if ( !ip.is_valid() ) + { // still no valid IP --> should not happen + GlobalLogger.error() << LogPrefix << "Not even outdated IP to ping " + << "-- this should not have happened!! Treat like a failed ping."; + if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0) + GlobalLogger.warning() << LogPrefix << "This probably happened " + << "because you specified a TTL threshold > 0 but resolving" + << " had no effect on TTLs since external cache is only " + << "updated when TTL=0 is reached."; + + // skip the ping and directly call ping_done_handler + ping_done_handler(false); + } + else + { + uint32_t ttl = ip.get_ttl().get_updated_value(); + boost::posix_time::ptime now = + boost::posix_time::second_clock::local_time(); + std::string expiry = + boost::posix_time::to_simple_string(now + seconds(ttl)); + + GlobalLogger.info() << LogPrefix << "pinging IP " << ip.get_ip() + << " with TTL " << ttl << "s (until " << expiry << ")"; Ping->ping( ip.get_ip(), DestinationPort, boost::bind(&PingScheduler::ping_done_handler, this, _1) ); - else - { // should not happen - GlobalLogger.error() << LogPrefix << "No IP to ping " - << "-- this should not have happened!!"; - WantToPing = true; - if ( !Resolver->is_resolving() ) - start_resolving_ping_address(); } } @@ -295,6 +313,8 @@ void PingScheduler::update_ping_protocol() void PingScheduler::get_next_ping_protocol() { + GlobalLogger.debug() << LogPrefix + << "------------------------------------------------------------------"; ++ProtocolIter; if (ProtocolIter == Protocols.end()) ProtocolIter = Protocols.begin(); @@ -351,9 +371,16 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol ) // requested protocol ( ICMP/TCP is ignored, only IPv4/v6 is important) Resolver = DnsMaster::get_instance()->get_resolver_for(DestinationAddress, current_protocol); - // start resolving if no ips available - if ( Resolver->have_up_to_date_ip() ) + + // get number of up-to-date IPs + bool check_up_to_date = true; + int ip_count = Resolver->get_resolved_ip_count(check_up_to_date); + if (ip_count > 0) { + GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " + << ip_count << " where IPs may be outdated: " << false; + HostAnalyzer.set_resolved_ip_count( ip_count ); + if (Resolver->is_resolving()) GlobalLogger.warning() << LogPrefix << "have up to date IPs but " << "resolver seems to be resolving all the same... " @@ -362,6 +389,7 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol ) } else start_resolving_ping_address(); + // set resolved_ip_count in resolve callback } void PingScheduler::start_resolving_ping_address() @@ -379,7 +407,27 @@ void PingScheduler::dns_resolve_callback(const bool was_success, if ( was_success ) { - HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count()); + // trust that a successfull DNS resolve means we have an IP with TTL>0 + int ip_count = Resolver->get_resolved_ip_count(!ContinueOnOutdatedIps); + if (ip_count == 0) + { // this will create trouble in HostAnalyzer + GlobalLogger.warning() << LogPrefix + << "Should not have reached this case: resolve was " + << "successfull but still have no IPs (up-to-date=" + << !ContinueOnOutdatedIps << ")!"; + if (DnsMaster::get_instance()->get_resolved_ip_ttl_threshold() > 0) + GlobalLogger.warning() << LogPrefix << "This probably happened " + << "because you specified a TTL threshold > 0 but resolving" + << " had no effect on TTLs since external cache is only " + << "updated when TTL=0 is reached."; + } + else + { + GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " + << ip_count << " where IPs may be outdated: " + << ContinueOnOutdatedIps << " --> could ping now"; + HostAnalyzer.set_resolved_ip_count( ip_count ); + } ping_when_ready(); } else @@ -394,7 +442,12 @@ void PingScheduler::dns_resolve_callback(const bool was_success, { // continue with IP GlobalLogger.notice() << LogPrefix << "DNS failed, " << "try anyway with cached data"; - HostAnalyzer.set_resolved_ip_count(0); + + int ip_count = Resolver->get_resolved_ip_count(false); + GlobalLogger.info() << LogPrefix << "Set resolved_ip_count to " + << ip_count << " where IPs may be outdated: " << true; + HostAnalyzer.set_resolved_ip_count( ip_count ); + ping_when_ready(); } else diff --git a/src/icmp/icmppinger.cpp b/src/icmp/icmppinger.cpp index e6d51b6..c9ee1b2 100644 --- a/src/icmp/icmppinger.cpp +++ b/src/icmp/icmppinger.cpp @@ -594,7 +594,10 @@ void IcmpPacketDistributor::clean_up_all() void IcmpPacketDistributor::clean_up() { - if (PingerList.size() > 0) + if (PingerList.size() == 0) + GlobalLogger.notice() << "All IcmpPingers have de-registered :-)" + << std::endl; + else GlobalLogger.warning() << "There were still " << PingerList.size() << " pingers registered in IcmpPacketDistributor!" << std::endl; PingerList.clear(); -- 1.7.1