added congestion analysis to HostStatus
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Tue, 26 May 2015 09:35:00 +0000 (11:35 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Tue, 26 May 2015 09:35:00 +0000 (11:35 +0200)
(make cast from time difference to long explicit in pinger callback)

src/host/hoststatus.cpp
src/host/hoststatus.h
src/host/pingscheduler.cpp
src/host/pingscheduler.h
src/icmp/icmppinger.cpp
src/main.cpp
src/tcp/tcppinger.cpp
test/test_hoststatus.cpp

index 74178a6..4887bdc 100644 (file)
@@ -35,31 +35,55 @@ using I2n::Logger::GlobalLogger;
  * @param host_address The address of the host it has to analyze.
  * @param ping_fail_percentage_limit The percentage threshold of pings that can
  * fail.
+ * @param ping_congestion_limit_percentage The percentage threshold of pings
+ * that can fail due to line congestion
+ * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
+ * the difference between a "normal" and a congested line
+ * @param n_parallel_pings Number of pings that is sent for each IP
  * @param link_analyzer The object used to notify the status of the host.
  */
 HostStatus::HostStatus(
         const string &host_address,
         const int ping_fail_limit_percentage,
+        const int ping_congestion_limit_percentage,
+        const int ping_duration_congestion_thresh,
         const int n_parallel_pings,
         const LinkStatusItem link_analyzer
 ) :
     HostAddress( host_address ),
     LinkAnalyzer( link_analyzer ),
     PingFailLimitPercentage( ping_fail_limit_percentage ),
+    PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
+    PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
     ResolvedIpCount( 0 ),
     PingsPerformedCount( 0 ),
     PingsFailedCount( 0 ),
+    PingCongestionCount( 0 ),
     ExceededPingFailedLimit( false ),
+    ExceededPingCongestionLimit( false ),
     NParallelPingers( n_parallel_pings)
 {
     BOOST_ASSERT( !HostAddress.empty() );
-    BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
+                    && ( PingFailLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
+                    && ( PingCongestionLimitPercentage <= 100 ) );
 }
 
 HostStatus::~HostStatus()
 {
 }
 
+
+std::string HostStatus::log_prefix()
+{
+    std::stringstream temp;
+    temp << "Stat(" << HostAddress << "): "
+        << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
+        << PingsPerformedCount << " pings/" << ResolvedIpCount << "*"
+        << NParallelPingers << " IPs: ";
+}
+
 /**
  * @param resolved_ip_count The number of IPs resolved for the host.
  */
@@ -73,9 +97,7 @@ void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
     }
     ResolvedIpCount = resolved_ip_count;
 
-    GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-        << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-        << ResolvedIpCount << "*" << NParallelPingers << " IPs: #IPs set";
+    GlobalLogger.debug() << log_prefix() << "#IPs set";
 }
 
 /**
@@ -88,6 +110,15 @@ bool HostStatus::exceeded_ping_failed_limit() const
 }
 
 /**
+ * @return true if the amount of congested pings given to the host exceeded the
+ * limit.
+ */
+bool HostStatus::exceeded_ping_congestion_limit() const
+{
+    return ExceededPingCongestionLimit;
+}
+
+/**
  * Tells the status analyzer how the last ping went
  *
  * @param result: status of ping specifying success/failure and reason of fail
@@ -98,24 +129,16 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
 {
     float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.;
 
-    GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-        << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-        << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-        << "add ping with result " << to_string(result) << " which took "
-        << ping_duration_ms << " ms";
+    GlobalLogger.debug() << log_prefix() << "add ping with result "
+        << to_string(result) << " which took " << ping_duration_ms << " ms";
 
     BOOST_ASSERT( 1 <= ResolvedIpCount );
     BOOST_ASSERT( 0 <= PingsPerformedCount );
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
+    BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
 
-    increase_ping_performed_count();
-
-    if ( result != PingStatus_SuccessReply )
-    {
-        increase_ping_failed_count();
-    }
-
-    analyze_ping_failed_count();
+    update_fail_stats( result );
+    update_congestion_stats( result, ping_duration_us );
 
     // after we tried all IPs resolved for this host, we can analyze how many
     // failed
@@ -127,6 +150,35 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
     }
 
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
+    BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
+}
+
+
+void HostStatus::update_fail_stats( const PingStatus &result)
+{
+    increase_ping_performed_count();
+
+    if ( result != PingStatus_SuccessReply
+      && result != PingStatus_SuccessOutdatedIP)
+    {
+        increase_ping_failed_count();
+    }
+
+    analyze_ping_failed_count();
+}
+
+
+void HostStatus::update_congestion_stats( const PingStatus &result,
+                                          const long ping_duration_us )
+{
+    if (ping_duration_us > PingDurationCongestionsThresh)
+        increase_ping_congestion_count();
+    else if ( result == PingStatus_FailureTimeout )
+        increase_ping_congestion_count();
+    // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
+    // by congestion, but also by other reasons (e.g. firewall blocking port 53)
+
+    analyze_ping_congestion_count();
 }
 
 
@@ -146,26 +198,24 @@ void HostStatus::analyze_ping_statistics()
     // notify if the amount of pings that failed exceed the limit
     if ( exceeded_ping_failed_limit() )
     {
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "notify down";
+        GlobalLogger.debug() << log_prefix() << "notify down";
         LinkAnalyzer->notify_host_down( HostAddress );
     }
     else
     {
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "notify up";
+        GlobalLogger.debug() << log_prefix() << "notify up";
         LinkAnalyzer->notify_host_up( HostAddress );
     }
+
+    // nothing to do about congestion here, congestion is not forwarded to
+    // central LinkAnalyzer
 } //lint !e1762
 
 void HostStatus::reset_ping_counters()
 {
     PingsPerformedCount = 0;
     PingsFailedCount = 0;
+    PingCongestionCount = 0;
 }
 
 void HostStatus::increase_ping_performed_count()
@@ -183,6 +233,14 @@ void HostStatus::increase_ping_failed_count()
     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
 }
 
+void HostStatus::increase_ping_congestion_count()
+{
+    ++PingCongestionCount;
+
+    BOOST_ASSERT( ( 0 <= PingCongestionCount )
+                    && ( PingCongestionCount <= PingsPerformedCount ) );
+}
+
 void HostStatus::analyze_ping_failed_count()
 {
     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
@@ -196,18 +254,41 @@ void HostStatus::analyze_ping_failed_count()
     {
         ExceededPingFailedLimit = true;
 
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "exceed limit=" << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "exceed fail limit="
+                             << ping_fail_limit_count;
     }
     else
     {
         ExceededPingFailedLimit = false;
 
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "below limit=" << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "below fail limit="
+                             << ping_fail_limit_count;
+    }
+}
+
+void HostStatus::analyze_ping_congestion_count()
+{
+    BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
+                    && ( PingCongestionLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingCongestionCount )
+                    && ( PingCongestionCount <= PingsPerformedCount ) );
+
+    int ping_congestion_limit_count = ( ResolvedIpCount * NParallelPingers
+                                        * PingCongestionLimitPercentage ) / 100;
+
+    // keep a boolean variable because the PingCongestionCount can be reseted
+    if ( PingCongestionCount > ping_congestion_limit_count )
+    {
+        ExceededPingCongestionLimit = true;
+
+        GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
+                             << ping_congestion_limit_count;
+    }
+    else
+    {
+        ExceededPingCongestionLimit = false;
+
+        GlobalLogger.debug() << log_prefix() << "below congestion limit="
+                             << ping_congestion_limit_count;
     }
 }
index 3dba06c..1bb5a08 100644 (file)
@@ -40,6 +40,8 @@ public:
     HostStatus(
             const std::string &host_address,
             const int ping_fail_limit_percentage,
+            const int ping_congestion_limit_percentage,
+            const int ping_duration_congestion_thresh,
             const int n_parallel_pings,
             const LinkStatusItem link_analyzer
     );
@@ -47,16 +49,24 @@ public:
 
     void set_resolved_ip_count( const int resolved_ip_count );
     bool exceeded_ping_failed_limit() const;
+    bool exceeded_ping_congestion_limit() const;
     void update_ping_statistics( const PingStatus &ping_success,
                                  const long ping_duration_us );
 
 private:
+    void update_fail_stats( const PingStatus &ping_success );
+    void update_congestion_stats( const PingStatus &ping_success,
+                                  const long ping_duration_us );
     bool tried_all_resolved_ip() const;
     void analyze_ping_statistics();
     void reset_ping_counters();
     void increase_ping_performed_count();
     void increase_ping_failed_count();
+    void increase_ping_congestion_count();
     void analyze_ping_failed_count();
+    void analyze_ping_congestion_count();
+
+    std::string log_prefix();
 
 private:
     /// the DNS address of the host to analyze
@@ -65,15 +75,24 @@ private:
     const LinkStatusItem LinkAnalyzer;
     /// the maximum amount of pings that can fail without warning
     int PingFailLimitPercentage;
+    /// the maximum amount of pings that can be congested without warning
+    int PingCongestionLimitPercentage;
+    /// the threshold in micro seconds that ping can take with/out congestion
+    long PingDurationCongestionsThresh;
     /// the amount of IPs that are aliases to the host DNS
     int ResolvedIpCount;
     /// the amount of pings sent until now
     int PingsPerformedCount;
     /// the amount of pings sent that failed until now
     int PingsFailedCount;
+    /// the amount of pings sent that indicate congestion until now
+    int PingCongestionCount;
     /// boolean flag that indicate if the last set of failed pings exceed the
     /// limit
     bool ExceededPingFailedLimit;
+    /// boolean flag that indicate if the last set of congested pings exceed the
+    /// limit
+    bool ExceededPingCongestionLimit;
     /// number of pingers that ping the same IP in parallel
     int NParallelPingers;
 
index 81149f6..464373a 100644 (file)
@@ -58,6 +58,10 @@ using I2n::Logger::GlobalLogger;
  * @param ping_protocol_list A list of protocols to use.
  * @param ping_interval_in_sec Amount of time between each ping.
  * @param ping_fail_percentage_limit Maximum amount of pings that can fail.
+ * @param ping_congestion_percentage_limit Amount of pings indication congested
+ * line
+ * @param ping_congestion_duration_thresh Duration in seconds that indicates a
+ * congested line
  * @param ping_reply_timeout Max amount time to wait for ping to finish
  * @param link_analyzer The object to monitor the link status.
  * @param first_delay Delay in seconds from start_pinging to first ping attempt
@@ -71,6 +75,8 @@ PingScheduler::PingScheduler(
         const PingProtocolList &ping_protocol_list,
         const long ping_interval_in_sec,
         const int ping_fail_percentage_limit,
+        const int ping_congestion_percentage_limit,
+        const int ping_congestion_duration_thresh,
         const int ping_reply_timeout,
         LinkStatusItem link_analyzer,
         const int first_delay,
@@ -89,7 +95,9 @@ PingScheduler::PingScheduler(
     TimeSentLastPing( microsec_clock::universal_time() ),
     PingReplyTimeout( ping_reply_timeout ),
     HostAnalyzer( destination_address, ping_fail_percentage_limit,
-                  n_parallel_pings, link_analyzer ),
+                  ping_congestion_percentage_limit,
+                  ping_congestion_duration_thresh, n_parallel_pings,
+                  link_analyzer ),
     Resolver(),
     Pingers(),
     NPingers( n_parallel_pings ),
index 54392c3..adb4a97 100644 (file)
@@ -58,6 +58,8 @@ public:
             const PingProtocolList &ping_protocol_list,
             const long ping_interval_in_sec,
             const int ping_fail_percentage_limit,
+            const int ping_congestion_percentage_limit,
+            const int ping_congestion_duration_thresh,
             const int ping_reply_timeout,
             LinkStatusItem link_analyzer,
             const int first_delay,
index b14e1e2..c0415bf 100644 (file)
@@ -274,9 +274,9 @@ void IcmpPinger::handle_timeout(const boost::system::error_code& error)
     }
 
     // Call ping-done handler
-    PingDoneCallback( PingerStatus,
+    PingDoneCallback( PingerStatus, static_cast<long>(
                      (microsec_clock::universal_time()
-                                             - TimeSent).total_microseconds() );
+                                            - TimeSent).total_microseconds()) );
 }
 
 
index 22273bc..940f455 100644 (file)
@@ -281,6 +281,8 @@ bool init_pingers(
         delays[ping_interval_in_sec] += delay_shifts[ping_interval_in_sec];
         int n_parallel_pings = 10;
         int parallel_ping_delay = 100;   // ms
+        int congestion_duration_thresh = 10; // seconds
+        int congestion_percentage_thresh = 75;
 
         PingSchedulerItem scheduler(
                 new PingScheduler(
@@ -291,6 +293,8 @@ bool init_pingers(
                         protocol_list,
                         ping_interval_in_sec,
                         ping_fail_limit,
+                        congestion_percentage_thresh,
+                        congestion_duration_thresh,
                         ping_reply_timeout,
                         status_notifier,
                         current_delay,
index d583867..e7a6e95 100644 (file)
@@ -252,8 +252,8 @@ void TcpPinger::handle_ping_done()
     }
 
     // Call ping-done handler
-    PingDoneCallback( PingerStatus,
-           (microsec_clock::universal_time() - TimeSent).total_microseconds() );
+    PingDoneCallback( PingerStatus, static_cast<long>(
+          (microsec_clock::universal_time() - TimeSent).total_microseconds()) );
 }
 
 void TcpPinger::start_receive()
index e6bfd5b..1eb5fc8 100644 (file)
@@ -36,10 +36,17 @@ BOOST_AUTO_TEST_SUITE( TestHostStatus )
 BOOST_AUTO_TEST_CASE( fail_percentage_10 )
 {
     int ping_fail_percentage_limit = 10;
+    int ping_congestion_percentage_limit = 75;
+    int ping_congestion_duration_thresh = 5;
+    int n_parallel_pings = 1;
     int resolved_ip_count = 10;
 
     LinkStatusItem link_status( new LinkStatus );
-    HostStatus host_status( "localhost", ping_fail_percentage_limit, 1,
+    HostStatus host_status( "localhost",
+                            ping_fail_percentage_limit,
+                            ping_congestion_percentage_limit,
+                            ping_congestion_duration_thresh,
+                            n_parallel_pings,
                             link_status );
     host_status.set_resolved_ip_count( resolved_ip_count );
 
@@ -77,10 +84,17 @@ BOOST_AUTO_TEST_CASE( fail_percentage_10 )
 BOOST_AUTO_TEST_CASE( fail_percentage_50 )
 {
     int ping_fail_percentage_limit = 50;
+    int ping_congestion_percentage_limit = 75;
+    int ping_congestion_duration_thresh = 5;
+    int n_parallel_pings = 1;
     int resolved_ip_count = 10;
 
     LinkStatusItem link_status( new LinkStatus );
-    HostStatus host_status( "localhost", ping_fail_percentage_limit, 1,
+    HostStatus host_status( "localhost",
+                            ping_fail_percentage_limit,
+                            ping_congestion_percentage_limit,
+                            ping_congestion_duration_thresh,
+                            n_parallel_pings,
                             link_status );
     host_status.set_resolved_ip_count( resolved_ip_count );
 
@@ -118,10 +132,17 @@ BOOST_AUTO_TEST_CASE( fail_percentage_50 )
 BOOST_AUTO_TEST_CASE( fail_percentage_80 )
 {
     int ping_fail_percentage_limit = 80;
+    int ping_congestion_percentage_limit = 75;
+    int ping_congestion_duration_thresh = 5;
+    int n_parallel_pings = 1;
     int resolved_ip_count = 10;
 
     LinkStatusItem link_status( new LinkStatus );
-    HostStatus host_status( "localhost", ping_fail_percentage_limit, 1,
+    HostStatus host_status( "localhost",
+                            ping_fail_percentage_limit,
+                            ping_congestion_percentage_limit,
+                            ping_congestion_duration_thresh,
+                            n_parallel_pings,
                             link_status );
     host_status.set_resolved_ip_count( resolved_ip_count );