congestion detection now working; also add case that if all IPs get timeout despite...
[pingcheck] / src / host / hoststatus.cpp
index 7449a91..2e1db87 100644 (file)
@@ -61,7 +61,8 @@ HostStatus::HostStatus(
     PingCongestionCount( 0 ),
     ExceededPingFailedLimit( false ),
     ExceededPingCongestionLimit( false ),
-    NParallelPingers( n_parallel_pings)
+    NParallelPingers( n_parallel_pings),
+    InBurstMode( false )
 {
     BOOST_ASSERT( !HostAddress.empty() );
     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage )
@@ -77,6 +78,11 @@ HostStatus::~HostStatus()
 
 void HostStatus::set_n_parallel_pings(const int n_parallel_pings)
 {
+    if (ExceededPingCongestionLimit)
+        InBurstMode = true;
+    else
+        InBurstMode = true;
+
     if (NParallelPingers != n_parallel_pings)
     {
         NParallelPingers = n_parallel_pings;
@@ -91,8 +97,8 @@ std::string HostStatus::log_prefix()
     std::stringstream temp;
     temp << "Stat(" << HostAddress << "): "
         << PingsFailedCount << " fail," << PingCongestionCount << " cong/"
-        << PingsPerformedCount << " pings/" << ResolvedIpCount << "*"
-        << NParallelPingers << " IPs: ";
+        << PingsPerformedCount << " pings/" << NParallelPingers << "*"
+        << ResolvedIpCount << " IPs: ";
     return temp.str();
 }
 
@@ -149,8 +155,11 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
     BOOST_ASSERT( PingsFailedCount <= PingsPerformedCount );
     BOOST_ASSERT( PingCongestionCount <= PingsPerformedCount );
 
-    update_fail_stats( result );
-    update_congestion_stats( result, ping_duration_us );
+    increase_ping_performed_count();
+
+    bool failed_because_congested = update_congestion_stats( result,
+                                                             ping_duration_us );
+    update_fail_stats( result, failed_because_congested );
 
     // after we tried all IPs resolved for this host, we can analyze how many
     // failed
@@ -166,12 +175,12 @@ void HostStatus::update_ping_statistics( const PingStatus &result,
 }
 
 
-void HostStatus::update_fail_stats( const PingStatus &result)
+void HostStatus::update_fail_stats( const PingStatus &result,
+                                    const bool failed_because_congested )
 {
-    increase_ping_performed_count();
-
     if ( result != PingStatus_SuccessReply
-      && result != PingStatus_SuccessOutdatedIP)
+      && result != PingStatus_SuccessOutdatedIP
+      && !failed_because_congested )
     {
         increase_ping_failed_count();
     }
@@ -180,17 +189,23 @@ void HostStatus::update_fail_stats( const PingStatus &result)
 }
 
 
-void HostStatus::update_congestion_stats( const PingStatus &result,
+bool HostStatus::update_congestion_stats( const PingStatus &result,
                                           const long ping_duration_us )
 {
+    bool is_congested = false;
     if (ping_duration_us > PingDurationCongestionsThresh)
-        increase_ping_congestion_count();
+        is_congested = true;
     else if ( result == PingStatus_FailureTimeout )
-        increase_ping_congestion_count();
+        is_congested = true;
     // PingStatus_FailureNoIP, PingStatus_SuccessOutdatedIP could also be caused
     // by congestion, but also by other reasons (e.g. firewall blocking port 53)
 
+    if (is_congested)
+        increase_ping_congestion_count();
+
     analyze_ping_congestion_count();
+
+    return is_congested;
 }
 
 
@@ -201,11 +216,19 @@ bool HostStatus::tried_all_resolved_ip() const
     return ( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
 }
 
+
+/** @brief called when tried_all_resolved_ip() */
 void HostStatus::analyze_ping_statistics()
 {
     BOOST_ASSERT( !HostAddress.empty() );
     BOOST_ASSERT( PingsPerformedCount >= ResolvedIpCount*NParallelPingers );
 
+    // timeouts are not counted towards failures, only count as congestions
+    // However, if all pings timed out even in burst mode, then we still declare
+    // the line down
+    if (InBurstMode && PingCongestionCount >= PingsPerformedCount)
+        ExceededPingFailedLimit = true;
+
     // notify if the amount of pings that failed exceed the limit
     if ( exceeded_ping_failed_limit() )
     {
@@ -218,7 +241,7 @@ void HostStatus::analyze_ping_statistics()
         LinkAnalyzer->notify_host_up( HostAddress );
     }
 
-    // nothing to do about congestion here, congestion is not forwarded to
+    // nothing else to do about congestion here, congestion is not forwarded to
     // central LinkAnalyzer
 } //lint !e1762
 
@@ -256,23 +279,20 @@ void HostStatus::analyze_ping_failed_count()
     BOOST_ASSERT( ( 0 <= PingFailLimitPercentage ) && ( PingFailLimitPercentage <= 100 ) );
     BOOST_ASSERT( ( 0 <= PingsFailedCount ) && ( PingsFailedCount <= PingsPerformedCount ) );
 
-    int ping_fail_limit_count = ( ResolvedIpCount * PingFailLimitPercentage
-                                                  * NParallelPingers) / 100;
+    int limit = ( PingsPerformedCount * PingFailLimitPercentage) / 100;
 
     // keep a boolean variable because the PingsFailedCount can be reseted
-    if ( PingsFailedCount > ping_fail_limit_count )
+    if ( PingsFailedCount > limit )
     {
         ExceededPingFailedLimit = true;
 
-        GlobalLogger.debug() << log_prefix() << "exceed fail limit="
-                             << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "exceed fail limit=" << limit;
     }
     else
     {
         ExceededPingFailedLimit = false;
 
-        GlobalLogger.debug() << log_prefix() << "below fail limit="
-                             << ping_fail_limit_count;
+        GlobalLogger.debug() << log_prefix() << "below fail limit=" << limit;
     }
 }
 
@@ -283,22 +303,21 @@ void HostStatus::analyze_ping_congestion_count()
     BOOST_ASSERT( ( 0 <= PingCongestionCount )
                     && ( PingCongestionCount <= PingsPerformedCount ) );
 
-    int ping_congestion_limit_count = ( ResolvedIpCount * NParallelPingers
-                                        * PingCongestionLimitPercentage ) / 100;
+    int limit = ( PingsPerformedCount * PingCongestionLimitPercentage) / 100;
 
     // keep a boolean variable because the PingCongestionCount can be reseted
-    if ( PingCongestionCount > ping_congestion_limit_count )
+    if ( PingCongestionCount > limit )
     {
         ExceededPingCongestionLimit = true;
 
         GlobalLogger.debug() << log_prefix() << "exceed congestion limit="
-                             << ping_congestion_limit_count;
+                             << limit;
     }
     else
     {
         ExceededPingCongestionLimit = false;
 
         GlobalLogger.debug() << log_prefix() << "below congestion limit="
-                             << ping_congestion_limit_count;
+                             << limit;
     }
 }