fixed 2 bugs and made clearer that Long-term timer in DnsResolver is not affected...
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Wed, 29 Apr 2015 15:45:16 +0000 (17:45 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Mon, 4 May 2015 14:57:58 +0000 (16:57 +0200)
bugs:
* HostAddress::is_valid returned opposite
* use the returned IP to ping and do not request another in PingScheduler::ping
* move reset of ContinueOnOutdatedIp within PingScheduler to after ping

long-term timer:
* added a variable DnsResolver: LongtermTimerIsActive
* added a function to ResolverBase: is_waiting_to_resolve
* added a function cancel_resolve to PingScheduler that decides whether to cancel or not
* added an arg to DnsResolver::stop_trying

src/dns/dnsresolver.cpp
src/dns/dnsresolver.h
src/dns/hostaddress.cpp
src/dns/ippseudoresolver.h
src/dns/resolverbase.h
src/host/pingscheduler.cpp
src/host/pingscheduler.h

index 0ac9828..07eeac3 100644 (file)
@@ -73,6 +73,7 @@ DnsResolver::DnsResolver(IoServiceItem &io_serv,
     , RandomIdGenerator()
     , RequestId( 0 )
     , OperationCancelled( false )
+    , LongermTimerIsActive( false )
 {
     std::stringstream temp;
     temp << "Dns(" << ResolverBase::Hostname << "): ";
@@ -125,6 +126,7 @@ void DnsResolver::do_resolve()
     ResolveTimeoutTimer.cancel();
     PauseBeforeRetryTimer.cancel();
     StaleDataLongtermTimer.cancel();
+    LongermTimerIsActive = false;
 
     // create DNS request
     boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol );
@@ -362,6 +364,7 @@ void DnsResolver::handle_unavailable()
                          this, boost::asio::placeholders::error
             )
     );
+    LongermTimerIsActive = true;
 
     // for now, admit failure
     bool was_success = false;
@@ -460,7 +463,7 @@ void DnsResolver::handle_cname(const std::vector<src_cname_pair> &result_cnames)
             // treat a CNAME as a partial result: not enough to run callbacks
             // from finalize_resolve, but enough to stop timers and reset
             // RetryCount --> name resolution can take longer
-            stop_trying();
+            stop_trying(true);
         }
     }
 }
@@ -476,15 +479,18 @@ void DnsResolver::cname_resolve_callback(const bool was_success,
         return;
     }
     else if (was_success)
+    {
         GlobalLogger.debug() << LogPrefix << "CNAME resolution succeeded";
+        finalize_resolve(was_success, cname_count+1);
+    }
     else
+    {
         GlobalLogger.info() << LogPrefix << "CNAME resolution failed";
         // no use to schedule retry in this case since cname resolver must have
         // failed several times and we can only re-start the same procedure with
-        // the same information
-
-    // cname counts like one more recursion step ...
-    finalize_resolve(was_success, cname_count+1);
+        // the same information. But can re-try later
+        handle_unavailable();
+    }
 }
 
 
@@ -507,7 +513,7 @@ void DnsResolver::finalize_resolve(const bool was_success,
                                             << "waiting for DNS reply!";
 
     // stop timers
-    stop_trying();
+    stop_trying(was_success);
 
     // schedule callbacks, clearing callback list
     ResolverBase::schedule_callbacks(was_success, cname_count);
@@ -520,31 +526,61 @@ void DnsResolver::finalize_resolve(const bool was_success,
 }
 
 
-void DnsResolver::stop_trying()
+/**
+ * arg was_success determines if stop trying forever or just for the moment
+ * --> determines if we cancel StaleDataLongtermTimer or not
+ */
+void DnsResolver::stop_trying(bool was_success)
 {
     // cancel timers
     GlobalLogger.debug() << LogPrefix << "Cancelling timers";
     ResolveTimeoutTimer.cancel();
     PauseBeforeRetryTimer.cancel();
-    StaleDataLongtermTimer.cancel();
+
+    if (was_success)
+    {
+        StaleDataLongtermTimer.cancel();
+        LongermTimerIsActive = false;
+    }
 
     // clean up
     RetryCount = 0;
 }
 
 
-bool DnsResolver::is_resolving()
+/**
+ * return true if resolver is currently resolving
+ *
+ * Is true from call to async_resolve until callbacks
+ * --> returns true if waiting for result or (short-term) retry
+ *
+ * However, does NOT tell you if the (long-term) stale timeout is active! 
+ *   That timer has no effect on result, need to check is_waiting_to_resolve
+ *   for that
+ */
+bool DnsResolver::is_resolving() const
 {
     return IsResolving;
 }
 
 
 /**
+ * returns true if either is_resolving or the long-term timer is active
+ * 
+ * is_resolving returns true if the short-term retry timer is active
+ */
+bool DnsResolver::is_waiting_to_resolve() const
+{
+    return IsResolving || LongermTimerIsActive;
+}
+
+
+/**
  * cancel a earlier call to async_resolve
  *
  * callbacks will be called with was_success=false; all internal operations
  * will be cancelled and internal callbacks (timers, dns results) have no
- * effect any more
+ * effect any more; cancels also the long-term stale-data timer
  */
 void DnsResolver::cancel_resolve()
 {
@@ -570,6 +606,9 @@ void DnsResolver::cancel_resolve()
     int cname_count = 1;
     finalize_resolve(was_success, cname_count);
 
+    // also cancel the long-term timer
+    StaleDataLongtermTimer.cancel();
+
     // set after finalize_resolve, so can check in finalize_resolve that 
     // OperationCancelled is never true
     OperationCancelled = true;
@@ -692,7 +731,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
 
     GlobalLogger.info() << LogPrefix << "Get next IP from cached result of "
         << n_ips << " IPs; first index to consider is " << NextIpIndex
-        << "; TTL thresh = " << ttl_thresh << " is used: " << check_up_to_date;
+        << "; TTL thresh=" << ttl_thresh << "s is used: " << check_up_to_date;
 
     // loop until we have found a cached result (that is up to date)
     //   or until we have tried all cached IPs
@@ -700,9 +739,15 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
     {
         // check index since cache size may have changed since last call
         if (NextIpIndex >= n_ips)
+        {
+            GlobalLogger.debug() << LogPrefix << "Reset NextIpIndex";
             NextIpIndex = 0;
+        }
         else if ( n_iter >= n_ips)
+        {
+            GlobalLogger.debug() << LogPrefix << "No IP found";
             return HostAddress();   // have checked all candidates
+        }
         else
         {   // there are candidates left to consider
             return_candidate = cached_data[NextIpIndex++];
@@ -710,7 +755,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
                 return return_candidate;
             else if (cached_data[NextIpIndex].get_ttl().get_updated_value()
                      > ttl_thresh)
-                return cached_data[++NextIpIndex];
+                return return_candidate;
             else
                 ++n_iter;
         }
index 895f6b4..5caf9c7 100644 (file)
@@ -65,7 +65,8 @@ public:
     bool have_up_to_date_ip();
     int get_resolved_ip_count();
     void cancel_resolve();
-    bool is_resolving();
+    bool is_resolving() const;
+    bool is_waiting_to_resolve() const;
 
 // implementation of ResolverBase::async_resolve
 protected:
@@ -83,7 +84,7 @@ private:
                                 const int cname_count);
     void schedule_retry();
     void finalize_resolve(const bool success, const int cname_count=0);
-    void stop_trying();
+    void stop_trying(const bool was_success);
     void wait_timer_timeout_handler(const boost::system::error_code &error);
     void gather_results( const boost::net::dns::rr_list_t *rr_list,
                          std::vector<host_addr_pair> *result_ips,
@@ -108,6 +109,7 @@ private:
     boost::uuids::random_generator RandomIdGenerator;
     uint16_t RequestId;
     bool OperationCancelled;
+    bool LongermTimerIsActive;
 };
 
 #endif
index 2879090..52f9bd3 100644 (file)
@@ -75,6 +75,6 @@ void HostAddress::set_ttl( const TimeToLive &ttl )
 
 bool HostAddress::is_valid() const
 {
-    return Ip == address() || Ip == boost::asio::ip::address_v4()
-                           || Ip == boost::asio::ip::address_v6();
+    return Ip != address() && Ip != boost::asio::ip::address_v4()
+                           && Ip != boost::asio::ip::address_v6();
 }
index a3edee7..9a33a38 100644 (file)
@@ -64,10 +64,11 @@ private:
 // only real public function
 public:
     HostAddress get_next_ip(const bool check_up_to_date=true)
-                               { return IpAddress;  }
-    bool have_up_to_date_ip()  { return true;       }
-    int get_resolved_ip_count(){ return 1;          }
-    bool is_resolving()        { return false;      }
+                                       { return IpAddress; }
+    bool have_up_to_date_ip()          { return true;      }
+    int get_resolved_ip_count()        { return 1;         }
+    bool is_resolving() const          { return false;     }
+    bool is_waiting_to_resolve() const { return false;     }
     void cancel_resolve() {}
 
 // implementation of ResolverBase::async_resolve
index 7ceea42..ad2a70c 100644 (file)
@@ -53,7 +53,8 @@ public:
      */
     void async_resolve(const callback_type &callback);
     virtual void cancel_resolve() = 0;
-    virtual bool is_resolving() = 0;
+    virtual bool is_resolving() const = 0;
+    virtual bool is_waiting_to_resolve() const = 0;
 
     virtual HostAddress get_next_ip(const bool check_up_to_date=true) = 0;
     virtual bool have_up_to_date_ip() = 0;
index 0f08565..b38271a 100644 (file)
@@ -117,7 +117,7 @@ void PingScheduler::stop_pinging()
     // stop pinger and resolver
     GlobalLogger.debug() << LogPrefix << "scheduler: stop pinging";
     Ping->stop_pinging();
-    Resolver->cancel_resolve();
+    cancel_resolve(true);
 
     // now cancel the own timer in case that pinger cancelation called callback
     GlobalLogger.debug() << LogPrefix << "scheduler: cancel timer";
@@ -195,11 +195,9 @@ void PingScheduler::ping_when_ready()
         ip = Resolver->get_next_ip(check_up_to_date);
     }
     if ( ip.is_valid() )
-        Ping->ping(
-                Resolver->get_next_ip().get_ip(),
-                DestinationPort,
-                boost::bind(&PingScheduler::ping_done_handler, this, _1)
-        );
+        Ping->ping( ip.get_ip(),
+                    DestinationPort,
+                    boost::bind(&PingScheduler::ping_done_handler, this, _1) );
     else
     {   // should not happen
         GlobalLogger.error() << LogPrefix << "No IP to ping "
@@ -208,9 +206,6 @@ void PingScheduler::ping_when_ready()
         if ( !Resolver->is_resolving() )
             start_resolving_ping_address();
     }
-
-    // next time try with up-to-date IP
-    ContinueOnOutdatedIps = false;
 }
 
 
@@ -234,6 +229,12 @@ void PingScheduler::ping_done_handler( const bool ping_success )
     update_ping_interval();
     update_ping_elapsed_time();
 
+    if (ping_success)
+    {   // reset ContinueOnOutdatedIps
+        ContinueOnOutdatedIps = false;
+        update_log_prefix();
+    }
+
     // get next protocol, possibly start resolving IPs
     update_ping_protocol();
 
@@ -298,7 +299,7 @@ void PingScheduler::get_next_ping_protocol()
     if (ProtocolIter == Protocols.end())
         ProtocolIter = Protocols.begin();
     PingProtocol ping_protocol = *ProtocolIter;
-    // --> ProtocolIter still points to currently used protocol which is 
+    // --> ProtocolIter still points to currently used protocol which is
     //     required in dns_resolve_callback
 
     if (Ping)
@@ -308,6 +309,7 @@ void PingScheduler::get_next_ping_protocol()
                                        NetworkInterfaceName, PingReplyTimeout);
 
     update_dns_resolver( ping_protocol );
+
 }
 
 bool PingScheduler::can_change_ping_protocol() const
@@ -326,7 +328,7 @@ bool PingScheduler::can_change_ping_protocol() const
 void PingScheduler::update_log_prefix()
 {
     std::stringstream temp;
-    temp << "PS(" << DestinationAddress;
+    temp << "Sched(" << DestinationAddress;
     if (ContinueOnOutdatedIps)
         temp << "!";
     temp << "): ";
@@ -336,10 +338,12 @@ void PingScheduler::update_log_prefix()
 void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
 {
     if (Resolver && Resolver->is_resolving())
+        cancel_resolve(false);
+
+    if (ContinueOnOutdatedIps)
     {
-        GlobalLogger.warning() << LogPrefix
-                          << "Resolver still seems to be resolving --> cancel!";
-        Resolver->cancel_resolve();
+        ContinueOnOutdatedIps = false;
+        update_log_prefix();
     }
 
     // DNS master caches created resolvers and resolved IPs, so this will
@@ -350,7 +354,7 @@ void PingScheduler::update_dns_resolver( PingProtocol current_protocol )
     // start resolving if no ips available
     if ( Resolver->have_up_to_date_ip() )
     {
-        if (!Resolver->is_resolving())
+        if (Resolver->is_resolving())
             GlobalLogger.warning() << LogPrefix << "have up to date IPs but "
                 << "resolver seems to be resolving all the same... "
                 << "Start pinging anyway!";
@@ -373,11 +377,6 @@ void PingScheduler::dns_resolve_callback(const bool was_success,
                         << "with success = " << was_success << " "
                         << "and cname_count = " << cname_count;
 
-    // TODO this is too simple, but need to think more about how to update here!
-    // (may have to switch back some time to resolver for original host or so
-    ContinueOnOutdatedIps = !was_success;
-    update_log_prefix();
-
     if ( was_success )
     {
         HostAnalyzer.set_resolved_ip_count( Resolver->get_resolved_ip_count());
@@ -386,6 +385,8 @@ void PingScheduler::dns_resolve_callback(const bool was_success,
     else
     {   // host name resolution failed; try again bypassing first outdated CNAME
         // or using cached IP
+        ContinueOnOutdatedIps = true;
+        update_log_prefix();
 
         std::string skip_host = Resolver->get_skip_cname();
 
@@ -401,12 +402,43 @@ void PingScheduler::dns_resolve_callback(const bool was_success,
             GlobalLogger.notice() << LogPrefix << "DNS failed, "
                 << "try again skipping a CNAME and resolving "
                 << skip_host << " directly";
+
+            cancel_resolve(false);
+
+            // now create new resolver
             Resolver = DnsMaster::get_instance()
                                    ->get_resolver_for(skip_host, *ProtocolIter);
             start_resolving_ping_address();
-
-            // (the original resolver is still alive and cached by DnsMaster and
-            //  counting down time to re-try on its own until cancel_resolve)
         }
     }
 }
+
+/**
+ * cancel resolver if force_cancel or if it is not resolving DestinationAddress
+ *
+ * Resolvers have a life on their own: they are cached by DnsMaster so never go
+ *   out of scope and even after calling callbacks, there might still be a
+ *   longterm timer active to re-try resolving.
+ * We want to cancel that long-term timer only if the Resolver is not for our
+ *   real, original DestinationAddress but a CNAME, which can happen when trying
+ *   to skip cnames and working on out-dated IPs
+ */
+void PingScheduler::cancel_resolve(const bool force_cancel)
+{
+    if (force_cancel)
+    {
+        GlobalLogger.info() << "Cancelling resolver (forced)";
+        Resolver->cancel_resolve();
+    }
+    else if ( Resolver->get_hostname() == DestinationAddress )
+        GlobalLogger.info() << LogPrefix
+                            << "Leave original resolver active in background";
+    else
+    {
+        GlobalLogger.info() << LogPrefix << "Cancel resolver for "
+            << Resolver->get_hostname() << " since is not the original "
+            << DestinationAddress;
+        Resolver->cancel_resolve();
+    }
+}
+
index c37f3c1..0de5711 100644 (file)
@@ -86,6 +86,7 @@ private:
     void start_resolving_ping_address();
 
     void update_log_prefix();
+    void cancel_resolve(const bool force_cancel);
 
     //
     // Attributes