added congestion analysis to HostStatus
[pingcheck] / src / host / hoststatus.cpp
index 74178a6..4887bdc 100644 (file)
@@ -35,31 +35,55 @@ using I2n::Logger::GlobalLogger;
  * @param host_address The address of the host it has to analyze.
  * @param ping_fail_percentage_limit The percentage threshold of pings that can
  * fail.
+ * @param ping_congestion_limit_percentage The percentage threshold of pings
+ * that can fail due to line congestion
+ * @param ping_duration_congestion_thresh Threshold in micro seconds that marks
+ * the difference between a "normal" and a congested line
+ * @param n_parallel_pings Number of pings that is sent for each IP
  * @param link_analyzer The object used to notify the status of the host.
  */
 HostStatus::HostStatus(
         const string &host_address,
         const int ping_fail_limit_percentage,
+        const int ping_congestion_limit_percentage,
+        const int ping_duration_congestion_thresh,
         const int n_parallel_pings,
         const LinkStatusItem link_analyzer
 ) :
     HostAddress( host_address ),
     LinkAnalyzer( link_analyzer ),
     PingFailLimitPercentage( ping_fail_limit_percentage ),
+    PingCongestionLimitPercentage( ping_congestion_limit_percentage ),
+    PingDurationCongestionsThresh( ping_duration_congestion_thresh*1000000 ),
     ResolvedIpCount( 0 ),
     PingsPerformedCount( 0 ),
     PingsFailedCount( 0 ),
+    PingCongestionCount( 0 ),
     ExceededPingFailedLimit( false ),
+    ExceededPingCongestionLimit( false ),
     NParallelPingers( n_parallel_pings)
 {
     BOOST_ASSERT( !HostAddress.empty() );
-    BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
+                    && ( PingFailLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
+                    && ( PingCongestionLimitPercentage <= 100 ) );
 }
 
 HostStatus::~HostStatus()
 {
 }
 
+
+std::string HostStatus::log_prefix()
+{
+    std::stringstream temp;
+    temp << "Stat(" << HostAddress << "): "
+        << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
+        << PingsPerformedCount << " pings/" << ResolvedIpCount << "*"
+        << NParallelPingers << " IPs: ";
+}
+
 /**
  * @param resolved_ip_count The number of IPs resolved for the host.
  */
@@ -73,9 +97,7 @@ void HostStatus::set_resolved_ip_count( const int resolved_ip_count )
     }
     ResolvedIpCount = resolved_ip_count;
 
-    GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-        << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-        << ResolvedIpCount << "*" << NParallelPingers << " IPs: #IPs set";
+    GlobalLogger.debug() << log_prefix() << "#IPs set";
 }
 
 /**
@@ -88,6 +110,15 @@ bool HostStatus::exceeded_ping_failed_limit() const
 }
 
 /**
+ * @return true if the amount of congested pings given to the host exceeded the
+ * limit.
+ */
+bool HostStatus::exceeded_ping_congestion_limit() const
+{
+    return ExceededPingCongestionLimit;
+}
+
+/**
  * Tells the status analyzer how the last ping went
  *
  * @param result: status of ping specifying success/failure and reason of fail
@@ -98,24 +129,16 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
 {
     float ping_duration_ms = static_cast<float>(ping_duration_us) / 1000.;
 
-    GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-        << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-        << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-        << "add ping with result " << to_string(result) << " which took "
-        << ping_duration_ms << " ms";
+    GlobalLogger.debug() << log_prefix() << "add ping with result "
+        << to_string(result) << " which took " << ping_duration_ms << " ms";
 
     BOOST_ASSERT( 1 <= ResolvedIpCount );
     BOOST_ASSERT( 0 <= PingsPerformedCount );
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
+    BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
 
-    increase_ping_performed_count();
-
-    if ( result != PingStatus_SuccessReply )
-    {
-        increase_ping_failed_count();
-    }
-
-    analyze_ping_failed_count();
+    update_fail_stats( result );
+    update_congestion_stats( result, ping_duration_us );
 
     // after we tried all IPs resolved for this host, we can analyze how many
     // failed
@@ -127,6 +150,35 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
     }
 
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
+    BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
+}
+
+
+void HostStatus::update_fail_stats( const PingStatus &result)
+{
+    increase_ping_performed_count();
+
+    if ( result != PingStatus_SuccessReply
+      && result != PingStatus_SuccessOutdatedIP)
+    {
+        increase_ping_failed_count();
+    }
+
+    analyze_ping_failed_count();
+}
+
+
+void HostStatus::update_congestion_stats( const PingStatus &result,
+                                          const long ping_duration_us )
+{
+    if (ping_duration_us > PingDurationCongestionsThresh)
+        increase_ping_congestion_count();
+    else if ( result == PingStatus_FailureTimeout )
+        increase_ping_congestion_count();
+    // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
+    // by congestion, but also by other reasons (e.g. firewall blocking port 53)
+
+    analyze_ping_congestion_count();
 }
 
 
@@ -146,26 +198,24 @@ void HostStatus::analyze_ping_statistics()
     // notify if the amount of pings that failed exceed the limit
     if ( exceeded_ping_failed_limit() )
     {
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "notify down";
+        GlobalLogger.debug() << log_prefix() << "notify down";
         LinkAnalyzer->notify_host_down( HostAddress );
     }
     else
     {
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "notify up";
+        GlobalLogger.debug() << log_prefix() << "notify up";
         LinkAnalyzer->notify_host_up( HostAddress );
     }
+
+    // nothing to do about congestion here, congestion is not forwarded to
+    // central LinkAnalyzer
 } //lint !e1762
 
 void HostStatus::reset_ping_counters()
 {
     PingsPerformedCount = 0;
     PingsFailedCount = 0;
+    PingCongestionCount = 0;
 }
 
 void HostStatus::increase_ping_performed_count()
@@ -183,6 +233,14 @@ void HostStatus::increase_ping_failed_count()
     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
 }
 
+void HostStatus::increase_ping_congestion_count()
+{
+    ++PingCongestionCount;
+
+    BOOST_ASSERT( ( 0 <= PingCongestionCount )
+                    && ( PingCongestionCount <= PingsPerformedCount ) );
+}
+
 void HostStatus::analyze_ping_failed_count()
 {
     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
@@ -196,18 +254,41 @@ void HostStatus::analyze_ping_failed_count()
     {
         ExceededPingFailedLimit = true;
 
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "exceed limit=" << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "exceed fail limit="
+                             << ping_fail_limit_count;
     }
     else
     {
         ExceededPingFailedLimit = false;
 
-        GlobalLogger.debug() << "Stat(" << HostAddress << "): "
-            << PingsFailedCount << " fail/" << PingsPerformedCount << " pings/"
-            << ResolvedIpCount << "*" << NParallelPingers << " IPs: "
-            << "below limit=" << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "below fail limit="
+                             << ping_fail_limit_count;
+    }
+}
+
+void HostStatus::analyze_ping_congestion_count()
+{
+    BOOST_ASSERT( ( 0 <= PingCongestionLimitPercentage )
+                    && ( PingCongestionLimitPercentage <= 100 ) );
+    BOOST_ASSERT( ( 0 <= PingCongestionCount )
+                    && ( PingCongestionCount <= PingsPerformedCount ) );
+
+    int ping_congestion_limit_count = ( ResolvedIpCount * NParallelPingers
+                                        * PingCongestionLimitPercentage ) / 100;
+
+    // keep a boolean variable because the PingCongestionCount can be reseted
+    if ( PingCongestionCount > ping_congestion_limit_count )
+    {
+        ExceededPingCongestionLimit = true;
+
+        GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
+                             << ping_congestion_limit_count;
+    }
+    else
+    {
+        ExceededPingCongestionLimit = false;
+
+        GlobalLogger.debug() << log_prefix() << "below congestion limit="
+                             << ping_congestion_limit_count;
     }
 }