tested new DNS with internal server, make more robust against caching; works nicely now
[pingcheck] / src / dns / dnsresolver.cpp
index 07eeac3..085eda0 100644 (file)
 
 using I2n::Logger::GlobalLogger;
 using boost::posix_time::seconds;
-using boost::posix_time::minutes;
 
 namespace Config
 {
-    const int ResolveTimeoutSeconds = 3;
-    const int PauseBeforeRetrySeconds = 10;
-    const int StaleDataLongtermMinutes = 15;
+    const int ResolveTimeoutSeconds = 2;
+    const int PauseBeforeRetrySeconds = 1;
+    const int StaleDataLongtermSeconds = 5*60;
     const int DNS_PORT = 53;
 }
 
@@ -73,7 +72,7 @@ DnsResolver::DnsResolver(IoServiceItem &io_serv,
     , RandomIdGenerator()
     , RequestId( 0 )
     , OperationCancelled( false )
-    , LongermTimerIsActive( false )
+    , LongtermTimerIsActive( false )
 {
     std::stringstream temp;
     temp << "Dns(" << ResolverBase::Hostname << "): ";
@@ -126,7 +125,7 @@ void DnsResolver::do_resolve()
     ResolveTimeoutTimer.cancel();
     PauseBeforeRetryTimer.cancel();
     StaleDataLongtermTimer.cancel();
-    LongermTimerIsActive = false;
+    LongtermTimerIsActive = false;
 
     // create DNS request
     boost::net::dns::message dns_message( ResolverBase::Hostname, Protocol );
@@ -190,7 +189,7 @@ void DnsResolver::handle_dns_result(const boost::system::error_code &error,
     if (error)
     {
         GlobalLogger.info() << LogPrefix << "DNS resolve resulted in error "
-                            << error << " --> try again after a little while";
+                            << error << " --> request retry";
         schedule_retry();
         return;
     }
@@ -247,7 +246,7 @@ void DnsResolver::handle_dns_result(const boost::system::error_code &error,
     else
     {   // no answers --> cannot proceed
         GlobalLogger.warning() << LogPrefix << "No IP nor CNAME received! "
-                               << "--> schedule retry";
+                               << "--> request retry";
         schedule_retry();
     }
 }
@@ -358,13 +357,13 @@ void DnsResolver::handle_unavailable()
 {
     // schedule new attempt in quite a while
     StaleDataLongtermTimer.expires_from_now(
-                                     minutes(Config::StaleDataLongtermMinutes));
+                                     seconds(Config::StaleDataLongtermSeconds));
     StaleDataLongtermTimer.async_wait(
             boost::bind( &DnsResolver::wait_timer_timeout_handler,
                          this, boost::asio::placeholders::error
             )
     );
-    LongermTimerIsActive = true;
+    LongtermTimerIsActive = true;
 
     // for now, admit failure
     bool was_success = false;
@@ -540,7 +539,7 @@ void DnsResolver::stop_trying(bool was_success)
     if (was_success)
     {
         StaleDataLongtermTimer.cancel();
-        LongermTimerIsActive = false;
+        LongtermTimerIsActive = false;
     }
 
     // clean up
@@ -571,7 +570,7 @@ bool DnsResolver::is_resolving() const
  */
 bool DnsResolver::is_waiting_to_resolve() const
 {
-    return IsResolving || LongermTimerIsActive;
+    return IsResolving || LongtermTimerIsActive;
 }
 
 
@@ -584,10 +583,10 @@ bool DnsResolver::is_waiting_to_resolve() const
  */
 void DnsResolver::cancel_resolve()
 {
-    if ( !IsResolving )
+    if ( !IsResolving && !LongtermTimerIsActive)
     {
-        GlobalLogger.info() << LogPrefix
-               << "Cancel called on non-resolving resolver -- ignore";
+        GlobalLogger.info() << LogPrefix << "Cancel called on non-resolving, "
+                                         << "non-waiting resolver -- ignore";
         return;
     }
     else if (OperationCancelled)
@@ -596,18 +595,23 @@ void DnsResolver::cancel_resolve()
                << "Cancel called on cancelled resolver -- ignore";
         return;
     }
+    GlobalLogger.info() << LogPrefix << "Cancel resolver";
 
     // set before finalize_resolve so can check in finalize_resolve that ID is
     //   always 0; ID is not used any more since handle_dns_result stops if
     //   OperationCancelled is true
     RequestId = 0;
 
-    bool was_success = false;
-    int cname_count = 1;
-    finalize_resolve(was_success, cname_count);
+    if ( IsResolving )
+    {
+        bool was_success = false;
+        int cname_count = 1;
+        finalize_resolve(was_success, cname_count);
+    }
 
     // also cancel the long-term timer
     StaleDataLongtermTimer.cancel();
+    LongtermTimerIsActive = false;
 
     // set after finalize_resolve, so can check in finalize_resolve that 
     // OperationCancelled is never true
@@ -620,15 +624,15 @@ void DnsResolver::handle_resolve_timeout(const boost::system::error_code &error)
 {
     if ( error ==  boost::asio::error::operation_aborted )   // cancelled
     {
-        GlobalLogger.info() << LogPrefix
-                            << "Resolve timeout timer was cancelled!";
+        GlobalLogger.debug() << LogPrefix
+                             << "Resolve timeout timer was cancelled!";
         return;
     }
     else if (error)
     {
         GlobalLogger.warning() << LogPrefix
                                << "resolve timeout handler received error "
-                               << error << " --> retry";
+                               << error << " --> request retry";
         schedule_retry();
     }
     else if ( OperationCancelled )
@@ -679,8 +683,8 @@ void DnsResolver::wait_timer_timeout_handler(
     if ( error ==  boost::asio::error::operation_aborted )   // cancelled
     {   // assume that our code cancelled this timer, so callbacks will be
         // taken care of!
-        GlobalLogger.info() << LogPrefix
-                            << "Resolve wait timer was cancelled! ";
+        GlobalLogger.debug() << LogPrefix
+                             << "Resolve wait timer was cancelled! ";
     }
     else if (error)
     {   // not sure what to do here, but callers waiting forever for a callback
@@ -700,6 +704,7 @@ void DnsResolver::wait_timer_timeout_handler(
     else
     {
         GlobalLogger.info() << LogPrefix << "Done waiting --> re-try resolve";
+        IsResolving = false;  // will be set to true immediately in do_resolve
         do_resolve();
     }
 }
@@ -713,7 +718,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
 {
     // get cached data
     // (do not use arg check_up_to_date here in order to give NextIpIndex
-    //  a chance to stay above number of outdate IPs)
+    //  a chance to stay above number of outdated IPs)
     HostAddressVec cached_data = ResolverBase::get_cached_ips_recursively();
 
     // if no results cached, return default-constructed HostAddress (0.0.0.0)
@@ -724,7 +729,7 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
         return return_candidate;
     }
 
-    int n_iter = 0;
+    std::size_t n_iter = 0;
     std::size_t n_ips = cached_data.size();
     uint32_t ttl_thresh = static_cast<uint32_t>( DnsMaster::get_instance()
                                             ->get_resolved_ip_ttl_threshold() );
@@ -750,27 +755,39 @@ HostAddress DnsResolver::get_next_ip(bool check_up_to_date)
         }
         else
         {   // there are candidates left to consider
+            GlobalLogger.debug() << LogPrefix << "Check IP candidate at index "
+                                 << NextIpIndex;
             return_candidate = cached_data[NextIpIndex++];
             if (!check_up_to_date)
+            {
+                GlobalLogger.debug() << LogPrefix << "not checking ttl, accept";
                 return return_candidate;
-            else if (cached_data[NextIpIndex].get_ttl().get_updated_value()
+            }
+            else if (return_candidate.get_ttl().get_updated_value()
                      > ttl_thresh)
+            {
+                GlobalLogger.debug() << LogPrefix << "is up to date, accept";
                 return return_candidate;
+            }
             else
+            {
+                GlobalLogger.debug() << LogPrefix << "is out of date ("
+                    << return_candidate.get_ttl().get_updated_value()
+                    << "s <= " << ttl_thresh << "s), continue";
                 ++n_iter;
+            }
         }
     }
 }
 
 bool DnsResolver::have_up_to_date_ip()
 {
-    return get_resolved_ip_count() > 0;
+    return get_resolved_ip_count(true) > 0;
 }
 
-int DnsResolver::get_resolved_ip_count()
+int DnsResolver::get_resolved_ip_count(const bool check_up_to_date)
 {
-    // run with empty hostname (--> uses internal var Hostname)
-    // and check_up_to_date = true
-    return ResolverBase::get_cached_ips_recursively("", true).size();
+    // run with empty hostname --> uses internal var Hostname
+    return ResolverBase::get_cached_ips_recursively("",check_up_to_date).size();
 }