added limit to DNS resolution attempts before pinging; report host down to hostatus...
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Wed, 5 Nov 2014 15:27:06 +0000 (16:27 +0100)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Wed, 12 Nov 2014 10:11:05 +0000 (11:11 +0100)
src/host/hoststatus.cpp
src/host/hoststatus.h
src/host/pingscheduler.cpp
src/host/pingscheduler.h

index 6749d79..31b88e3 100644 (file)
@@ -106,6 +106,12 @@ void HostStatus::update_ping_statistics( bool ping_success )
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
 }
 
+void HostStatus::report_dns_resolution_failure()
+{
+    LinkAnalyzer->notify_host_down( HostAddress );
+}
+
+
 bool HostStatus::tried_all_resolved_ip() const
 {
     BOOST_ASSERT( ( 0 < PingsPerformedCount ) && ( PingsPerformedCount <= ResolvedIpCount ) );
index c5f558d..0bb1478 100644 (file)
@@ -46,6 +46,7 @@ public:
     void set_resolved_ip_count( const int resolved_ip_count );
     bool exceeded_ping_failed_limit() const;
     void update_ping_statistics( bool ping_success );
+    void report_dns_resolution_failure();
 
 private:
     bool tried_all_resolved_ip() const;
index ce5d1d1..c1f79a0 100644 (file)
@@ -78,6 +78,9 @@ PingScheduler::PingScheduler(
     PingIntervalInSec( ping_interval_in_sec ),
     AddressResolveIntervalInSec( ping_interval_in_sec ),
     HostAnalyzer( destination_address, ping_fail_percentage_limit, link_analyzer ),
+    FirstDelay( first_delay ),
+    AddressResolutionAttempts( 0 ),
+    EverHadAnyIP( false ),
     Ping(),
     Thread()
 {
@@ -96,8 +99,6 @@ PingScheduler::PingScheduler(
             destination_port,
             nameserver
     );
-    AddressResolvedFirstTime = false;
-    FirstDelay = first_delay;
 }
 
 /**
@@ -136,126 +137,112 @@ void PingScheduler::stop_pinging_thread()
     stop_pinging();
 }
 
-bool PingScheduler::start_pinging()
+void PingScheduler::stop_pinging()
 {
-    bool address_resolved = resolve_ping_address();
-    if ( !address_resolved )
-    {
-        //return false;
-        // new behaviour: try again later
-        schedule_address_resolve();
-    }
-    else
-        AddressResolvedFirstTime = true;
+    // This is thread safe
+    IoService.stop();
+}
+
+/**
+ * @brief Start into infinite loop of calls to resolve_and_ping, using an
+ *   IoService which is started here and can be stopped through stop_pinging[_thread]
+ */
+void PingScheduler::start_pinging()
+{
+    EverHadAnyIP = false;
 
-    if (FirstDelay > 0)
+    if ( FirstDelay > 0 )
     {
         GlobalLogger.info() << "Delaying first ping by " << FirstDelay << "s";
         (void) NextPingTimer.expires_from_now( seconds( FirstDelay ) );
-        NextPingTimer.async_wait( bind( &PingScheduler::setup_next_ping, this ) );
+        NextPingTimer.async_wait( bind( &PingScheduler::resolve_and_ping, this ) );
     }
     else
-        setup_next_ping();
+        resolve_and_ping();
 
     // event processing loop, it is a blocking call!
     IoService.run();                                                                                    //lint !e534
-
-    return true;
+    GlobalLogger.error() << "User interrupted ping thread!";
 }
 
-void PingScheduler::stop_pinging()
-{
-    // This is thread safe
-    IoService.stop();
-}
 
-void PingScheduler::update_ping_address()
+/**
+ * @brief Check whether need to resolve host names via DNS and then
+ *   call ping;
+ *   If name resolution fails too often, reports this to HostAnalyzer
+ */
+void PingScheduler::resolve_and_ping()
 {
-    // TODO this code is similar to the one at start_pinging(), make it simple!
-    if ( Ping->expired_resolved_ip() )
+    bool ips_up_to_date = EverHadAnyIP && !Ping->expired_resolved_ip();
+
+    // determine if address resolution is required
+    if ( !ips_up_to_date )
     {
-        GlobalLogger.info() << "Updating DNS ... ";
-        bool address_resolved = resolve_ping_address();
-        if ( !address_resolved )
+        ips_up_to_date = Ping->resolve_ping_address();    // try to resolve 
+        if ( ips_up_to_date )
         {
-            GlobalLogger.error() << "Error: could not update host IP, may use outdated address"
-                    << endl;
+            EverHadAnyIP = true;
+            GlobalLogger.warning() << "DEBUG: resolved ping address, have "
+                                   << Ping->get_resolved_ip_count() << " IPs" << endl;
+            HostAnalyzer.set_resolved_ip_count( Ping->get_resolved_ip_count() );
+            AddressResolutionAttempts = 0;
         }
     }
-}
-
-bool PingScheduler::resolve_ping_address()
-{
-    bool address_resolved = Ping->resolve_ping_address();
-    if ( !address_resolved )
-    {
-        return false;
-    }
+    // (may still have no or only outdated IPs)
 
-    int resolved_ip_count = Ping->get_resolved_ip_count();
-    HostAnalyzer.set_resolved_ip_count( resolved_ip_count );
-
-    return true;
-}
 
-void PingScheduler::schedule_address_resolve()
-{
-    if (resolve_ping_address())
-        AddressResolvedFirstTime = true;
+    if ( EverHadAnyIP )
+    {   // have IPs --> can ping (even if IPs are out of date)
+        if ( !ips_up_to_date )
+            GlobalLogger.info() << "Using outdated IPs" << endl;
+        ping();
+    }
     else
-    {
+    {   // no IPs --> try again later, possibly report offline before
+        AddressResolutionAttempts++;
+        if (AddressResolutionAttempts >= MAX_ADDRESS_RESOLUTION_ATTEMPTS)
+        {
+            GlobalLogger.notice() << "No IPs after " << AddressResolutionAttempts << " DNS queries!";
+            HostAnalyzer.report_dns_resolution_failure();
+        }
         (void) NextAddressTimer.expires_from_now( seconds( AddressResolveIntervalInSec ) );
-        NextAddressTimer.async_wait( bind( &PingScheduler::schedule_address_resolve, this ) );
+        NextAddressTimer.async_wait( bind( &PingScheduler::resolve_and_ping, this ) );
     }
 }
 
+/**
+ * @brief call Ping::ping and schedule a call to ping_done_handler when finished
+ */
 void PingScheduler::ping()
 {
     Ping->ping( boost::bind(&PingScheduler::ping_done_handler, this, _1) );
 }
 
-void PingScheduler::ping_done_handler( bool ping_success )
-{
-    update_ping_statistics( ping_success );
-    update_ping_elapsed_time();
-
-    schedule_next_ping();
-}
-
-void PingScheduler::setup_next_ping()
-{
-    if (AddressResolvedFirstTime)
-    {
-        BOOST_ASSERT( 1 <= Ping->get_resolved_ip_count() );
-
-        update_ping_address();
 
-        ping();
-    }
-}
-
-void PingScheduler::schedule_next_ping()
-{
-    BOOST_ASSERT( 0 < PingIntervalInSec );
-
-    (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec ) );
-    NextPingTimer.async_wait( bind( &PingScheduler::setup_next_ping, this ) );
-}
-
-void PingScheduler::update_ping_statistics( const bool ping_success )
+/**
+ * @brief called when Ping::ping is done; calls functions to update
+ *   statistics, ping interval and elapsed time;
+ *   schedules a call to resolve_and_ping, thereby closing the loop
+ */
+void PingScheduler::ping_done_handler( const bool ping_success )
 {
-    HostAnalyzer.update_ping_statistics( ping_success );
-
-    // TODO you must call the method bellow AFTER update_ping_statistics
+    // post-processing
+    // TODO you must call these 3 methods exactly in this order
     // Fix this method, once it has a semantic dependency with the
-    // update_ping_statistics method, because it depends on the PingeAnalyzer
+    // update_ping_statistics method, because it depends on the PingAnalyzer
     // statistics to update the exceeded_ping_failed_limit
+    HostAnalyzer.update_ping_statistics( ping_success );
     update_ping_interval();
+    update_ping_elapsed_time();
+
+    // schedule next ping
+    (void) NextPingTimer.expires_from_now( seconds( PingIntervalInSec ) );
+    NextPingTimer.async_wait( bind( &PingScheduler::resolve_and_ping, this ) );
 }
 
 void PingScheduler::update_ping_interval()
 {
-    // must to ping more often?
+    // have to ping more often?
     if ( HostAnalyzer.exceeded_ping_failed_limit() )
     {
         PingIntervalInSec.speed_up();
@@ -281,3 +268,4 @@ void PingScheduler::update_ping_elapsed_time()
 
     TimeSentLastPing = microsec_clock::universal_time();
 }
+
index 88888dc..cacc599 100644 (file)
@@ -40,6 +40,8 @@ on this file might be covered by the GNU General Public License.
 // PingScheduler
 //-----------------------------------------------------------------------------
 
+const int MAX_ADDRESS_RESOLUTION_ATTEMPTS = 3;
+
 /**
  * @brief This class is responsible to control whether to ping a host. It
  * schedules periodic pings to a host.
@@ -70,20 +72,15 @@ private:
     // Methods
     //
 
-    bool start_pinging();
+    void start_pinging();
     void stop_pinging();
 
-    void update_ping_address();
-    bool resolve_ping_address();
-    void schedule_address_resolve();
+    void resolve_and_ping();
+    bool resolve_address();
+    void force_address_resolution();
 
     void ping();
-    void ping_done_handler(bool ping_success);
-
-    void setup_next_ping();
-    void schedule_next_ping();
-
-    void update_ping_statistics( const bool ping_success );
+    void ping_done_handler(const bool ping_success);
     void update_ping_interval();
     void update_ping_elapsed_time();
 
@@ -109,10 +106,13 @@ private:
     PingRotateItem Ping;
     /// Thread object
     boost::thread Thread;
-    /// flag that address is resolved, so can start pinging
-    bool AddressResolvedFirstTime;
     /// delay for very first ping to avoid lots of simultaneous pings
     int  FirstDelay;
+    /// number of attempts at Address Resolution
+    int AddressResolutionAttempts;
+    /// flat whether any DNS lookup succeeded 
+    //  (comparing get_ip_count to 0 might violate assertion in dnsresolver.cpp)
+    bool EverHadAnyIP;
 
 };